<a href="https://colab.research.google.com/github/priyam197/codsoft/blob/main/spam_msg_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

actual

In [None]:
# Install required libraries
# !pip install -q scikit-learn pandas matplotlib seaborn

In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
)


In [None]:
# Load Dataset
df = pd.read_csv("spam.csv", encoding='latin1')[['v1', 'v2']]
df.columns = ['label', 'message']

# Encode Labels: ham -> 0, spam -> 1
df['label'] = LabelEncoder().fit_transform(df['label'])

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    df['message'], df['label'], test_size=0.2, random_state=42
)


In [None]:
# Convert text messages to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.9)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
# Define Models
models = {
    "Naive Bayes": MultinomialNB(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Support Vector Machine": LinearSVC()
}

# Train & Evaluate
results = {}
for name, model in models.items():
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=["Ham", "Spam"], output_dict=True)
    results[name] = {
        "model": model,
        "accuracy": acc,
        "report": report,
        "y_pred": y_pred
    }

# Display Summary Table
summary = []
for name, metrics in results.items():
    r = metrics["report"]
    summary.append({
        "Model": name,
        "Accuracy": round(metrics["accuracy"] * 100, 2),
        "Spam Precision": round(r["Spam"]["precision"] * 100, 2),
        "Spam Recall": round(r["Spam"]["recall"] * 100, 2),
        "Spam F1-Score": round(r["Spam"]["f1-score"] * 100, 2)
    })

summary_df = pd.DataFrame(summary)
summary_df


In [None]:
# Plot confusion matrices in a row
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
colors = ['Purples', 'Greens', 'Oranges']

for ax, (name, metrics), cmap in zip(axes, results.items(), colors):
    cm = confusion_matrix(y_test, metrics["y_pred"])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Ham", "Spam"])
    disp.plot(ax=ax, cmap=cmap, values_format='d', colorbar=False)
    ax.set_title(f"{name}")
    ax.grid(False)

plt.suptitle("Confusion Matrices of Classifiers", fontsize=16)
plt.tight_layout()
plt.show()


In [None]:
# %%writefile app.py

In [None]:
import joblib

# Save after training
joblib.dump(model, "spam_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")


In [None]:
! wget -q -O - ipv4.icanhazip.com

In [None]:
# !npm install localtunnel

In [None]:
!streamlit run app.py &>/content/logs.txt &

In [106]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://floppy-lions-stick.loca.lt
^C
