<a href="https://colab.research.google.com/github/lameessa/nabaha/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Nabaha | نباهة

In [6]:
# Install all dependencies
!apt-get install -y ffmpeg
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q fastapi uvicorn sentence-transformers joblib pyngrok scikit-learn pydantic requests

# Imports
import uvicorn
from fastapi import FastAPI, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
from pyngrok import ngrok
import nest_asyncio
import threading
import tempfile
import whisper
import requests

# Load models
encoder = SentenceTransformer("asafaya/bert-base-arabic")
clf = joblib.load("vishing_classifier.pkl")
whisper_model = whisper.load_model("large")

# Prediction logic for text
def process_text_message(text):
    features = encoder.encode([text])
    preds = clf.predict(features)[0]
    probs = clf.predict_proba(features)
    labels = ['is_urgent', 'used_threat', 'good_offers', 'request_money_transfer',
              'request_personal_info', 'request_banking_info', 'request_passwords',
              'request_code']
    predicted_labels = [labels[i] for i, val in enumerate(preds) if val == 1]
    confidence = float(np.max(probs[int(np.argmax(preds))]))
    return {
        "text": text,
        "prediction": ", ".join(predicted_labels) or "Normal Call",
        "confidence": round(confidence * 100, 2)
    }

# FastAPI app
app = FastAPI()

# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Text route
class TextInput(BaseModel):
    message: str

@app.post("/analyze-text")
async def analyze_text(data: TextInput):
    return process_text_message(data.message)

# Audio route
@app.post("/analyze-audio")
async def analyze_audio(file: UploadFile = File(...)):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    # Transcribe using Whisper
    result = whisper_model.transcribe(tmp_path, language="ar")
    transcribed = result["text"]

    # Classify
    output = process_text_message(transcribed)
    output["transcribed_text"] = transcribed
    return output

# Start server
def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Setup ngrok
ngrok.set_auth_token("30NIZIEVqq6WXulqPboiYJAIT0O_zcbk3TQ8SaBSivpL5wgM")  # Replace with your token
public_url = ngrok.connect(8000)
print(f"Public URL is ready: {public_url}")
print(f"Text API: {public_url}/analyze-text")
print(f"Audio API: {public_url}/analyze-audio")

# Run server in background
nest_asyncio.apply()
threading.Thread(target=run).start()


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


100%|█████████████████████████████████████| 2.88G/2.88G [00:51<00:00, 59.4MiB/s]


🚀 Public URL is ready: NgrokTunnel: "https://c0fef0dd53ca.ngrok-free.app" -> "http://localhost:8000"
🔤 Text API: NgrokTunnel: "https://c0fef0dd53ca.ngrok-free.app" -> "http://localhost:8000"/analyze-text
🎧 Audio API: NgrokTunnel: "https://c0fef0dd53ca.ngrok-free.app" -> "http://localhost:8000"/analyze-audio
