In [1]:
import torch
import joblib
import pickle
import numpy as np
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt

In [2]:
from qwen_asr import Qwen3ASRModel

  from .autonotebook import tqdm as notebook_tqdm


## Load ASR Model

In [3]:
asr_model = Qwen3ASRModel.from_pretrained(
    "Qwen/Qwen3-ASR-0.6B",
    dtype=torch.float32,
    device_map="cpu",
)

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


## Transcribe Audio

In [4]:
def transcribe_audio(audio_path):
    result = asr_model.transcribe(
        audio=audio_path,
        language="English"
    )

    text = result[0].text

    return text

## Load TF-IDF + XGB 

In [5]:
xgb_model = joblib.load("tf-idf_xgb.pkl")

In [6]:
def predict_xgb(text):
    prob = xgb_model.predict_proba([text])[:, 1][0]
    pred = 1 if prob > 0.5 else 0
    return pred, prob

## Load GRU

In [7]:
with open("models/gru_vocab.pkl", "rb") as f:
    vocab = pickle.load(f)

print("Vocab loaded")

Vocab loaded


In [8]:
import torch.nn as nn

class GRUModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.gru = nn.GRU(embed_dim, hidden_dim, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        x = self.embedding(x)
        _, h = self.gru(x)
        out = self.fc(h[-1])
        return out.squeeze()


In [9]:
gru_model = GRUModel(len(vocab), 128, 64)
gru_model.load_state_dict(torch.load("models/gru_models.pth"))
gru_model.eval()

print("GRU loaded")

GRU loaded


  super().__init__("GRU", *args, **kwargs)


In [10]:
import re

MAX_LEN = 50

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+", " ", text)
    text = re.sub(r"\d+", " ", text)
    text = re.sub(r"[^\w\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def tokenize(text):
    return re.findall(r'\b\w+\b', text.lower())

def encode(text):
    text = clean_text(text)
    tokens = tokenize(text)
    ids = [vocab.get(t, 1) for t in tokens]
    return ids[:MAX_LEN]

def pad(seq):
    return seq + [0] * (MAX_LEN - len(seq))

In [11]:
def predict_gru(text, threshold=0.4):
    ids = pad(encode(text))
    tensor_input = torch.tensor([ids])

    with torch.no_grad():
        outputs = gru_model(tensor_input)
        probs = torch.sigmoid(outputs)
        prob = probs.item()
        pred = 1 if prob > threshold else 0

    return pred, prob

In [12]:
audio_path = r"C:\Users\winon\Documents\Bootcamp\ai-spam-call-detection\dataset\sample_call2.mp3"

transcript = transcribe_audio(audio_path)
print("Transcript:", transcript)

xgb_pred, xgb_prob = predict_xgb(transcript)

gru_pred, gru_prob = predict_gru(transcript)

print("\n=== RESULT ===")
print("XGB = Pred:", xgb_pred, "Prob:", xgb_prob)
print("GRU = Pred:", gru_pred, "Prob:", gru_prob)


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


Transcript: Hello, I am an artificial intelligent notification bot. The purpose of this call is to make you aware that as a U.S. resident, you are now able to take advantage of alternative federal student loan repayment options and hardship programs. These programs are only for individuals who have less than one hundred and sixty thousand dollars in federal student loan debt. Having debt obligations may cause a hardship when added to your overall monthly bills. Using our automated approval technology, you are now able to obtain enrollment information based on your current situation.

=== RESULT ===
XGB = Pred: 1 Prob: 0.9945802
GRU = Pred: 0 Prob: 0.039121754467487335


In [13]:
audio_path = r"C:\Users\winon\Documents\Bootcamp\ai-spam-call-detection\dataset\sample_call.mp3"

transcript = transcribe_audio(audio_path)
print("Transcript:", transcript)

xgb_pred, xgb_prob = predict_xgb(transcript)

gru_pred, gru_prob = predict_gru(transcript)

print("\n=== RESULT ===")
print("XGB = Pred:", xgb_pred, "Prob:", xgb_prob)
print("GRU = Pred:", gru_pred, "Prob:", gru_prob)


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


Transcript: Hi, this is Daniel from Amazon Customer Service. We have seen a recent order number AMZ0987 of iPhone 11 Pro on your account, which is billed on your card attached to your Amazon account. The amount charged is $1,499. We notice some suspicious activity on your account, so we have put a hold to this transaction. Please press 1 now, and to report please press 2. Thank you.

=== RESULT ===
XGB = Pred: 1 Prob: 0.9995266
GRU = Pred: 0 Prob: 0.11535235494375229


In [14]:
audio_path = r"C:\Users\winon\Documents\Bootcamp\ai-spam-call-detection\dataset\sample_call3.wav"

transcript = transcribe_audio(audio_path)
print("Transcript:", transcript)

xgb_pred, xgb_prob = predict_xgb(transcript)

gru_pred, gru_prob = predict_gru(transcript)

print("\n=== RESULT ===")
print("XGB = Pred:", xgb_pred, "Prob:", xgb_prob)
print("GRU = Pred:", gru_pred, "Prob:", gru_prob)


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


Transcript: Hello. Hi, Grandpa. How are you?

=== RESULT ===
XGB = Pred: 0 Prob: 0.028521826
GRU = Pred: 0 Prob: 0.07341259717941284


In [15]:
audio_path = r"C:\Users\winon\Documents\Bootcamp\ai-spam-call-detection\dataset\sample_call4.wav"

transcript = transcribe_audio(audio_path)
print("Transcript:", transcript)

xgb_pred, xgb_prob = predict_xgb(transcript)

gru_pred, gru_prob = predict_gru(transcript)

print("\n=== RESULT ===")
print("XGB = Pred:", xgb_pred, "Prob:", xgb_prob)
print("GRU = Pred:", gru_pred, "Prob:", gru_prob)

Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


Transcript: Or would like to stop the disjoin? Please press one to start your claim. Otherwise, press two to speak with the billing department.

=== RESULT ===
XGB = Pred: 1 Prob: 0.98128635
GRU = Pred: 1 Prob: 0.9555901288986206


In [16]:
audio_path = r"C:\Users\winon\Documents\Bootcamp\ai-spam-call-detection\dataset\sample_call5.wav"

transcript = transcribe_audio(audio_path)
print("Transcript:", transcript)

xgb_pred, xgb_prob = predict_xgb(transcript)

gru_pred, gru_prob = predict_gru(transcript)

print("\n=== RESULT ===")
print("XGB = Pred:", xgb_pred, "Prob:", xgb_prob)
print("GRU = Pred:", gru_pred, "Prob:", gru_prob)


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


Transcript: Hi, this is Emma with the Processing Center. I was giving you a quick call because I am reviewing your student loan profile, and as you know, there have been some pretty big changes to the federal student loan programs recently. When you have a moment, give me a call back. I would like to discuss your possible options with you while the programs are still available. It's urgent that you return my call prior to when payments resume. If you could please call me at 866-758-1276 to complete your application and finalize your enrollment as soon as possible. Again, that's 866-758-1276. Please have your reference number ready. Your reference number is SL367. I look forward to hearing from you soon, and I hope you have a great day.

=== RESULT ===
XGB = Pred: 1 Prob: 0.8566606
GRU = Pred: 0 Prob: 0.22885000705718994
