In [14]:
!pip install gradio




In [15]:
import pandas as pd
import re
import string
from imblearn.over_sampling import SMOTE
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

try:
    from google.colab import files
    uploaded = files.upload()
    filename = next(iter(uploaded))
    df = pd.read_csv(filename, encoding="ISO-8859-1")
except ImportError:
    df = pd.read_csv("spam.csv", encoding="ISO-8859-1")

df = df.iloc[:, [0, 1]]
df.columns = ['label', 'message']
df["label"] = df["label"].map({"spam": 1, "ham": 0})  # Convert labels to 0 & 1

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', ' NUMERIC ', text)  # Replace numbers with 'NUMERIC'
    text = text.translate(str.maketrans("", "", string.punctuation))  # Remove punctuation
    text = re.sub(r'\b(?:http|www)\S+\b', " URL ", text)  # Replace links with 'URL'

    phishing_words = [
        "bank", "paypal", "verify", "suspended", "click", "win", "account", "urgent",
        "free", "gift", "prize", "password", "reset", "confirm", "login", "update",
        "security", "suspicious", "fraud", "locked", "warning", "hacked", "chat",
        "private", "leaked", "guaranteed", "money", "cash", "loan", "credit",
        "funds", "investment", "deposit", "lottery", "bet", "transfer", "offer",
        "exclusive", "urgent action", "congratulations", "limited time", "winner",
        "bonus", "payout", "jackpot", "risk-free", "profit", "double your money",
        "safe", "verify identity", "verify now", "act now", "confirm your details",
        "chit fund", "fake investment", "scam", "crypto", "NFT giveaway"
    ]

    for word in phishing_words:
        text = re.sub(r'\b' + word + r'\b', f" {word.upper()} ", text)

    return text

df["clean_message"] = df["message"].apply(preprocess_text)

vectorizer = CountVectorizer(ngram_range=(1, 3), min_df=1)
X = vectorizer.fit_transform(df["clean_message"])
y = df["label"]

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(" Accuracy:", accuracy)
print("\n Classification Report:\n", classification_report(y_test, y_pred))

def predict_spam(text):
    text = preprocess_text(text)

    spam_triggers = [
        "MONEY", "CASH", "LOAN", "FUNDS", "INVESTMENT", "LOTTERY", "BET", "TRANSFER",
        "PROFIT", "DOUBLE YOUR MONEY", "PAYOUT", "SAFE", "CRYPTO", "NFT GIVEAWAY",
        "SCAM", "GUARANTEED", "BANK", "VERIFY", "SUSPENDED", "CLICK", "URGENT",
        "CONGRATULATIONS", "FREE", "WARNING", "HACKED", "CHAT", "PRIVATE", "LEAKED",
        "GIFT", "PASSWORD", "SECURITY", "RISK-FREE", "LIMITED TIME", "WINNER"
    ]

    if any(word in text.upper() for word in spam_triggers):
        return " 100% Spam (Rule-Based Detection)"

    text_vectorized = vectorizer.transform([text])
    prediction = model.predict(text_vectorized)[0]

    return " Spam" if prediction == 1 else " Not Spam"

test_messages = [

    "Congratulations! You won a free iPhone. Click here to claim.",
    "Your PayPal account has been suspended due to suspicious activity. Verify now.",
    "Urgent: Your bank account has been locked. Click here to verify your information.",
    "Hey, let's catch up tomorrow for lunch!",
    "Congratulations! You've won a $500 Amazon gift card! Claim it now.",
    "Urgent! Your bank account is about to be locked. Login now to confirm your identity.",
    "Guaranteed returns! Invest $100 today and earn $1000 in just a week! Limited spots available.",
    "Hey gorgeous, I saw your profile and I think we’re meant to be. Let’s chat on this private app.",
    "Get a personal loan of $10,000 at 0% interest for 3 months! Apply now!",
    "Earn $500 daily by just working 2 hours a day! Click here to start!",
    "You have been selected for an exclusive crypto investment opportunity! Double your money in a week!",
    "URGENT: Your Netflix subscription will be suspended unless you verify your payment method now!",
    "We are offering a $10,000 payout for participating in this research study!",
    "Final notice! Your credit card has been compromised. Act now to prevent fraud!",
    "Congratulations! You are our lucky winner. Claim your exclusive payout now.",
]

for msg in test_messages:
    print(f" Message: {msg}\n Prediction: {predict_spam(msg)}\n")

Saving spam.csv to spam (2).csv
 Accuracy: 0.9642487046632124

 Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.95      0.96       985
           1       0.95      0.98      0.96       945

    accuracy                           0.96      1930
   macro avg       0.96      0.96      0.96      1930
weighted avg       0.97      0.96      0.96      1930

 Message: Congratulations! You won a free iPhone. Click here to claim.
 Prediction:  100% Spam (Rule-Based Detection)

 Message: Your PayPal account has been suspended due to suspicious activity. Verify now.
 Prediction:  100% Spam (Rule-Based Detection)

 Message: Urgent: Your bank account has been locked. Click here to verify your information.
 Prediction:  100% Spam (Rule-Based Detection)

 Message: Hey, let's catch up tomorrow for lunch!
 Prediction:  Not Spam

 Message: Congratulations! You've won a $500 Amazon gift card! Claim it now.
 Prediction:  100% Spam (Rule-Based De

In [12]:
!pip install gradio joblib scikit-learn





In [13]:
import gradio as gr
import joblib
import pickle
from sklearn.feature_extraction.text import CountVectorizer

# Load your trained model and vectorizer
model = joblib.load("spam_model.pkl")
vectorizer = pickle.load(open("vectorizer.pkl", "rb"))

# Define text preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', ' NUMERIC ', text)
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = re.sub(r'\b(?:http|www)\S+\b', " URL ", text)
    return text

# Function to predict spam
def predict_spam(message):
    message = preprocess_text(message)
    message_vectorized = vectorizer.transform([message])
    prediction = model.predict(message_vectorized)[0]
    return " Spam" if prediction == 1 else " Not Spam"

# Launch Gradio app
gr.Interface(fn=predict_spam, inputs="text", outputs="text").launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9650d9bac69f5fb9e2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
print("https://82a0950aca321a1776.gradio.live", gr.Interface(fn=predict_spam, inputs="text", outputs="text").launch(share=True))


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1e6e03ebf50d4fe674.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


https://82a0950aca321a1776.gradio.live 
