<a href="https://colab.research.google.com/github/leviii008/CMP7239/blob/main/atif_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import joblib
import os

# Load dataset
file_path = "/content/Spam Detection .csv"
df = pd.read_csv(file_path)

# Rename label column
df.rename(columns={df.columns[-1]: 'label'}, inplace=True)

# Normalize features
scaler = MinMaxScaler()
features = df.drop('label', axis=1)
features_scaled = scaler.fit_transform(features)
X = pd.DataFrame(features_scaled, columns=features.columns)
y = df['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Initialize models
nb_model = GaussianNB()
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='linear', probability=True)

# Train models
nb_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)

# Predict
y_pred_nb = nb_model.predict(X_test)
y_pred_rf = rf_model.predict(X_test)
y_pred_svm = svm_model.predict(X_test)

# Evaluate
metrics = {
    "Model": ["Naive Bayes", "Random Forest", "SVM"],
    "Accuracy": [
        accuracy_score(y_test, y_pred_nb),
        accuracy_score(y_test, y_pred_rf),
        accuracy_score(y_test, y_pred_svm),
    ],
    "Precision": [
        precision_score(y_test, y_pred_nb),
        precision_score(y_test, y_pred_rf),
        precision_score(y_test, y_pred_svm),
    ],
    "Recall": [
        recall_score(y_test, y_pred_nb),
        recall_score(y_test, y_pred_rf),
        recall_score(y_test, y_pred_svm),
    ],
    "F1 Score": [
        f1_score(y_test, y_pred_nb),
        f1_score(y_test, y_pred_rf),
        f1_score(y_test, y_pred_svm),
    ],
    "ROC AUC": [
        roc_auc_score(y_test, nb_model.predict_proba(X_test)[:, 1]),
        roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1]),
        roc_auc_score(y_test, svm_model.predict_proba(X_test)[:, 1]),
    ]
}

# Display results
metrics_df = pd.DataFrame(metrics)
print("\n=== Model Performance Metrics ===")
print(metrics_df)

# Save model files
model_dir = "/content/"
os.makedirs(model_dir, exist_ok=True)
joblib.dump(nb_model, os.path.join(model_dir, "naive_bayes_model.joblib"))
joblib.dump(rf_model, os.path.join(model_dir, "random_forest_model.joblib"))
joblib.dump(svm_model, os.path.join(model_dir, "svm_model.joblib"))



=== Model Performance Metrics ===
           Model  Accuracy  Precision    Recall  F1 Score   ROC AUC
0    Naive Bayes  0.819696   0.700136  0.948529  0.805621  0.915675
1  Random Forest  0.955829   0.958254  0.928309  0.943044  0.985931
2            SVM  0.899348   0.910751  0.825368  0.865959  0.956431


['/content/svm_model.joblib']

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
