Install Dependencies

In [1]:
# Install Whisper + Torch
!pip install -q openai-whisper jiwer
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121


Imports + Mount Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


Upload Audio File

In [3]:
from google.colab import files

uploaded = files.upload()
audio_file = list(uploaded.keys())[0]
print("Uploaded file:", audio_file)


Saving fraud_call_030.wav to fraud_call_030.wav
Uploaded file: fraud_call_030.wav


Load ASR & NLP Models

In [4]:
!ls /content/drive/MyDrive/SeniorProject/output_of_NLP/models/MARBERT_final


config.json	   special_tokens_map.json  tokenizer.json     vocab.txt
model.safetensors  tokenizer_config.json    training_args.bin


In [5]:
import whisper
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Select device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load Whisper ASR
asr_model = whisper.load_model("large-v3").to(device)

# Path to MARBERT model in Google Drive
MODEL_PATH = "/content/drive/MyDrive/SeniorProject/output_of_NLP/models/MARBERT_final"

# Load MARBERT tokenizer & model (LOCAL ONLY)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
nlp_model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True).to(device)

# Labels
label2id = {"fraud": 0, "safe": 1}
id2label = {0: "fraud", 1: "safe"}

print("System ready. Call analysis service is active.")


System ready. Call analysis service is active.


Run ASR

In [6]:
print("Analyzing call...")

result = asr_model.transcribe(
    audio_file,
    language="ar",
    temperature=0,
    beam_size=5,
    fp16=False
)

call_text = result["text"].strip()

print("\nCall transcript:")
print(call_text)


Analyzing call...

Call transcript:
ألو، السلام عليكم معاك خدمة العملاء من البنك فيه مشكلة كبيرة بخصوص حسابك ولازم نسوي تأكيد معلومات بسرعة عليكم السلام، وش السالفة؟ وشلون مشكلة؟ نلاحظ محاولات دخول غير مصرح فيها لضمان الأمان، نحتاج منك رقم الهوية والبطاقة البنكية بسرعة أها، طيب، بس هل هو ضروري الحين؟ ما في وقت أقدر أروح للفرع؟ لازم الحين، السالفة خطيرة ولازم نوقف الاختراق قبل ما يضرر حسابك طيب، وش تحتاج بالضبط؟ أحتاج رقم الهوية وتاريخ الميلاد عشان نبدأ هذا الإجراء يحميك بس، ما يطلبون هذه المعلومات بالتليفون عادة إيه هدري، لكن هذه حالة طارئة ومؤقتة نبي نثبت أمان حسابك في أسرع وقت الله يستر، بكلم البنك مباشرة بعد هالمكالمة


Fraud Detection and Risk Explanation Module

In [7]:
# Prepare the text for the NLP model
enc = tokenizer(
    call_text,
    padding="max_length",
    truncation=True,
    max_length=256,
    return_tensors="pt"
)
enc = {k: v.to(device) for k, v in enc.items()}

# Run the model
with torch.no_grad():
    logits = nlp_model(**enc).logits
    probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]

# Get the final prediction
p_fraud = float(probs[label2id["fraud"]])
p_safe  = float(probs[label2id["safe"]])
label   = id2label[int(probs.argmax())]

# Main alert message
if label == "fraud":
    print("Fraudulent Call Warning")
else:
    print("Call marked as safe")

# Decide risk level
if p_fraud > 0.85:
    risk_level = "High"
elif p_fraud > 0.6:
    risk_level = "Medium"
else:
    risk_level = "Low"

# Convert score to percentage
risk_percentage = round(p_fraud * 100, 1)


# Keywords used to explain fraud type
FRAUD_LEXICON = {
    "phishing": [
        "تعطيل","تم إيقاف","حسابك","سيتم إغلاق","توثيق","التحقق","رمز","رمز التحقق",
        "إعادة تفعيل","تحديث","تحديث البيانات","رسالة","فتح الحساب","تسجيل الدخول",
        "نؤكد هويتك","مرفوض","رفض النظام"
    ],
    "bank_credentials": [
        "بطاقة","cvv","رقم البطاقة","رقم سري","الرقم السري","كلمة السر","تحويل","سحب","ايداع",
        "رصيد","حوالة","فاتورة","تفاصيل الحساب","حساب بنكي","بيانات البنك"
    ],
    "identity_theft": [
        "رقم الهوية","بطاقة الأحوال","رقم الإقامة","رقم السجل","إثبات","اثبات الهوية",
        "نسخة الهوية","ارسل هويتك"
    ],
    "customer_service": [
        "موظف البنك","خدمة العملاء","الدعم الفني","موظف حكومي","إدارة البنك","نحتاج بياناتك",
        "تصحيح البيانات","لدينا مشكلة","خطأ في النظام"
    ],
    "investment": [
        "عوائد","عوائد مضمونة","نسبة","استثمار","استثمر","أرباح","ربح سريع","دخل إضافي",
        "مبلغ بسيط","بدون خسارة","مكسب","عرض خاص","فرصة ذهبية","مضمون","80%"
    ],
    "lottery_prize": [
        "ربحت","جائزة","مبروك","سحب","فوز","هدية","مبلغ مالي","سحب الجوائز","الرقم الفائز"
    ],
    "threat": [
        "سيتم ايقاف","سيتم إيقاف","سيتم حظر","غرامة","بلاغ","مخالفة","تهديد","شرطة"
    ],
}

# Find suspicious words in the call
detected = {}
for category, words in FRAUD_LEXICON.items():
    hits = [w for w in words if w in call_text]
    if hits:
        detected[category] = hits


import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Show extra details only if it's fraud
if label == "fraud":

    # Simple reason mapping
    REASON_TEXT = {
        "phishing": "OTP / account verification request",
        "bank_credentials": "bank card or account credentials",
        "identity_theft": "ID number or identity information",
        "customer_service": "fake customer service interaction",
        "investment": "suspicious investment promises",
        "lottery_prize": "fake lottery or prize announcement",
        "threat": "account blocking or legal threat"
    }

    # Pick main fraud type
    if detected:
        main_category = max(detected, key=lambda c: len(detected[c]))
        reason_phrase = REASON_TEXT.get(main_category, "suspicious language in the call")
    else:
        reason_phrase = "general suspicious language"

    # Build the result table
    df = pd.DataFrame([{
        "call_id": 0,
        "Fraud_Risk_Score": round(p_fraud, 2),
        "Risk_Level": risk_level,
        "Reason": reason_phrase
    }])

    # Color the risk level cell
    def color_risk(val):
        if val == "High":
            return "background-color: #ffb3b3;"
        elif val == "Medium":
            return "background-color: #ffe9a8;"
        else:
            return "background-color: #b8f5ba;"

    styled_df = df.style.map(color_risk, subset=["Risk_Level"])

    # Button to show details
    button = widgets.Button(description="More details")
    output = widgets.Output()

    def on_click(b):
        with output:
            output.clear_output()
            display(styled_df)

    display(button, output)
    button.on_click(on_click)

else:
    print("No additional details: call is safe.")




Button(description='More details', style=ButtonStyle())

Output()