<a href="https://colab.research.google.com/github/kushalshah0/colab_tools/blob/main/ai_generated_phishing_email_detection_FastAPI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [122]:
#@title Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [123]:
#@title Install Dependencies
!pip install fastapi uvicorn pyngrok transformers torch tensorflow pickle-mixin



In [124]:
#@title Verify model paths
import os

SAMPLE_NAME = "sample1"

BASE_PATH = f"/content/drive/MyDrive/Detect_AI_Phishing_Project/{SAMPLE_NAME}"

paths = {
    "LSTM": f"{BASE_PATH}/lstm_model.pt",
    "GRU": f"{BASE_PATH}/gru_model.pt",
    "BERT": f"{BASE_PATH}/bert/final_model",
    "Tokenizer": f"{BASE_PATH}/rnn_tokenizer.pkl"
}

for k, v in paths.items():
    print(k, "✅" if os.path.exists(v) else "❌", v)


LSTM ✅ /content/drive/MyDrive/Detect_AI_Phishing_Project/sample1/lstm_model.pt
GRU ✅ /content/drive/MyDrive/Detect_AI_Phishing_Project/sample1/gru_model.pt
BERT ✅ /content/drive/MyDrive/Detect_AI_Phishing_Project/sample1/bert/final_model
Tokenizer ✅ /content/drive/MyDrive/Detect_AI_Phishing_Project/sample1/rnn_tokenizer.pkl


In [125]:
#@title Create FastAPI project structure
import os

API_DIR = "/content/api"
os.makedirs(API_DIR, exist_ok=True)

files = ["main.py", "models.py", "schemas.py"]
for f in files:
    with open(os.path.join(API_DIR, f), "w") as fp:
        fp.write("")

print("FastAPI files created:", files)

FastAPI files created: ['main.py', 'models.py', 'schemas.py']


In [126]:
#@title Write schemas.py
%%writefile /content/api/schemas.py
from pydantic import BaseModel

class EmailRequest(BaseModel):
    text: str
    model: str  # bert | lstm | gru

class PredictionResponse(BaseModel):
    model: str
    prediction: str
    confidence: float

Overwriting /content/api/schemas.py


In [127]:
#@title Write models.py
%%writefile /content/api/models.py
import torch
import torch.nn as nn
import pickle
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification

SAMPLE_NAME = "sample1"
BASE_PATH = f"/content/drive/MyDrive/Detect_AI_Phishing_Project/{SAMPLE_NAME}"
DEVICE = torch.device("cpu")

# Define LSTM Model Architecture (assuming a basic setup)
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text):
        embedded = self.embedding(text)
        _, (hidden, _) = self.lstm(embedded)
        return self.fc(hidden.squeeze(0))

# Define GRU Model Architecture (assuming a basic setup)
class GRUModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text):
        embedded = self.embedding(text)
        _, hidden = self.gru(embedded)
        return self.fc(hidden.squeeze(0))

# LOAD TOKENIZER
with open(f"{BASE_PATH}/rnn_tokenizer.pkl", "rb") as f:
    rnn_tokenizer = pickle.load(f)

MAX_LEN = 200
# Corrected model parameters based on checkpoint (from error message)
MODEL_VOCAB_SIZE = 20000    # As indicated by embedding.weight shape in error
EMBEDDING_DIM = 128         # As indicated by embedding.weight shape in error
HIDDEN_DIM = 128            # As indicated by lstm.weight_ih_l0 shape in error
OUTPUT_DIM = 1              # Binary classification

# LOAD LSTM
lstm_model = LSTMModel(MODEL_VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM).to(DEVICE)
lstm_model.load_state_dict(torch.load(f"{BASE_PATH}/lstm_model.pt", map_location=DEVICE))
lstm_model.eval()

# LOAD GRU
gru_model = GRUModel(MODEL_VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM).to(DEVICE)
gru_model.load_state_dict(torch.load(f"{BASE_PATH}/gru_model.pt", map_location=DEVICE))
gru_model.eval()

# LOAD BERT
bert_tokenizer = BertTokenizer.from_pretrained(
    f"{BASE_PATH}/bert/final_model"
)
bert_model = BertForSequenceClassification.from_pretrained(
    f"{BASE_PATH}/bert/final_model"
).to(DEVICE)
bert_model.eval()

# HELPERS
def preprocess_rnn(text):
    seq = rnn_tokenizer.texts_to_sequences([text])
    # Map token IDs >= MODEL_VOCAB_SIZE to 0 (assuming 0 is OOV/padding)
    processed_seq = [[token_id if token_id < MODEL_VOCAB_SIZE else 0 for token_id in s] for s in seq]

    padded = np.zeros((1, MAX_LEN))
    # Ensure sequence is not longer than MAX_LEN
    padded[0, :min(MAX_LEN, len(processed_seq[0]))] = processed_seq[0][:MAX_LEN]
    return torch.tensor(padded, dtype=torch.long)

def predict_rnn(model, text):
    with torch.no_grad():
        x = preprocess_rnn(text)
        output = model(x)
        prob = torch.sigmoid(output).item()
        label = "Phishing" if prob >= 0.5 else "Legitimate"
        confidence = prob if prob >= 0.5 else 1 - prob
        return label, confidence

def predict_bert(text):
    inputs = bert_tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
    ).to(DEVICE)
    with torch.no_grad():
        outputs = bert_model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        conf, pred = torch.max(probs, dim=1)
        label = "Phishing" if pred.item() == 1 else "Legitimate"
        return label, conf.item()

Overwriting /content/api/models.py


In [128]:
#@title Write main.py
%%writefile /content/api/main.py
from fastapi import FastAPI, HTTPException
from schemas import EmailRequest, PredictionResponse
from models import predict_rnn, predict_bert, lstm_model, gru_model

app = FastAPI(
    title="AI-Generated Phishing Detection API",
    version="1.0"
)

@app.post("/predict", response_model=PredictionResponse)
def predict(request: EmailRequest):
    text = request.text
    model_name = request.model.lower()

    if model_name == "bert":
        label, confidence = predict_bert(text)

    elif model_name == "lstm":
        label, confidence = predict_rnn(lstm_model, text)

    elif model_name == "gru":
        label, confidence = predict_rnn(gru_model, text)

    else:
        raise HTTPException(
            status_code=400,
            detail="Invalid model. Choose from: bert, lstm, gru"
        )

    return PredictionResponse(
        model=model_name,
        prediction=label,
        confidence=round(confidence, 4)
    )

Overwriting /content/api/main.py


In [141]:
#@title Run FastAPI
import subprocess
import time

# Kill any processes running on port 8000 or any uvicorn process
!pkill -f uvicorn || true
!fuser -k 8000/tcp || true

%cd /content/api

# Start uvicorn in the background using nohup
!nohup uvicorn main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &

print("FastAPI server starting in the background...")
time.sleep(5) # Give it some time to start
print("FastAPI server should be running.")

^C
/content/api
FastAPI server starting in the background...
FastAPI server should be running.


In [142]:
#@title Display FastAPI Server Logs
!cat nohup.out

2026-01-20 18:07:31.955610: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-20 18:07:31.960874: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-20 18:07:31.975648: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768932452.002328   27993 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768932452.010904   27993 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768932452.029848   27993 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

In [143]:
#@title Test API Locally (with model selector)
import requests

API_URL = "http://127.0.0.1:8000/predict"

email_text = "Dear Customer,   Your bank account has been temporarily suspended due to suspicious activity.  Please click the link below to verify your account immediately:  http://verify-bank-login.com" #@param {type:"string"}
model_selector = "bert" #@param ["bert", "lstm", "gru"]

payload = {
    "text": email_text,
    "model": model_selector
}

response = requests.post(API_URL, json=payload)

print("Status Code:", response.status_code)
print("Response:", response.json())

Status Code: 200
Response: {'model': 'bert', 'prediction': 'Phishing', 'confidence': 0.9971}


In [144]:
#@title Install Cloudflare Tunnel
!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb
!cloudflared --version

--2026-01-20 18:08:10--  https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
Resolving github.com (github.com)... 140.82.116.4
Connecting to github.com (github.com)|140.82.116.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/cloudflare/cloudflared/releases/download/2026.1.1/cloudflared-linux-amd64.deb [following]
--2026-01-20 18:08:10--  https://github.com/cloudflare/cloudflared/releases/download/2026.1.1/cloudflared-linux-amd64.deb
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/106867604/12aae843-3db0-4414-9b56-e2a442db3e76?sp=r&sv=2018-11-09&sr=b&spr=https&se=2026-01-20T18%3A53%3A46Z&rscd=attachment%3B+filename%3Dcloudflared-linux-amd64.deb&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&

In [145]:
#@title Start Cloudflare Tunnel
import time
import re
import os

# Kill any existing cloudflared process to ensure a clean start
!pkill -f cloudflared || true

# Start cloudflared in the background using nohup
!nohup cloudflared tunnel --url http://127.0.0.1:8000 > cloudflared.out 2>&1 &

print("Cloudflare tunnel starting in the background...")
time.sleep(5) # Give it some time to start the process

tunnel_url = None
start_time = time.time()
timeout = 60 # seconds

while not tunnel_url and (time.time() - start_time < timeout):
    if os.path.exists('cloudflared.out'):
        with open('cloudflared.out', 'r') as f:
            output = f.read()
            # Regex to find the URL, assuming it's in the format https://<subdomain>.trycloudflare.com
            match = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', output)
            if match:
                tunnel_url = match.group(0)
                break
    time.sleep(2) # Check every 2 seconds

if tunnel_url:
    print(f"Cloudflare tunnel is running. Public URL: {tunnel_url}")
    # Update the CLOUDFLARE_TUNNEL_URL variable for future use
    global CLOUDFLARE_TUNNEL_URL
    CLOUDFLARE_TUNNEL_URL = tunnel_url + '/predict'
else:
    print("Could not find Cloudflare tunnel URL within the timeout period.")
    print("Last few lines of cloudflared.out:")
    if os.path.exists('cloudflared.out'):
        with open('cloudflared.out', 'r') as f:
            print(f.readlines()[-10:])
    else:
        print("cloudflared.out not found.")


^C
Cloudflare tunnel starting in the background...
Cloudflare tunnel is running. Public URL: https://analyst-stewart-cloth-subscribe.trycloudflare.com
