In [1]:

# SMS Spam Detection using Ensemble Models (NB + SVM + RF)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Load Dataset
df = pd.read_csv('siri.csv')
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# Vectorization
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2), stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Initialize models
nb = MultinomialNB(alpha=0.1)
svm = LinearSVC()
rf = RandomForestClassifier(n_estimators=200, random_state=42)

# Create an ensemble of models
ensemble = VotingClassifier(estimators=[('nb', nb), ('svm', svm), ('rf', rf)], voting='soft')

# Train the models
nb.fit(X_train_vec, y_train)
svm.fit(X_train_vec, y_train)
rf.fit(X_train_vec, y_train)
ensemble.fit(X_train_vec, y_train)

# Evaluate models on test set
models = [nb, svm, rf, ensemble]
model_names = ['Naive Bayes', 'SVM (LinearSVC)', 'Random Forest', 'Ensemble (NB+SVM+RF)']
accuracies = []

for model in models:
    y_pred = model.predict(X_test_vec)
    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

# Plotting the Test Accuracy for all models
plt.figure(figsize=(10, 6))
plt.bar(model_names, accuracies, color='skyblue')
plt.title("Test Accuracy on Different Models")
plt.xlabel("Model")
plt.ylabel("Accuracy")
for i in range(len(model_names)):
    plt.text(i, accuracies[i] + 0.02, f'{accuracies[i]:.4f}', ha='center', va='bottom')
plt.xticks(rotation=45, ha='right')
plt.show()


AttributeError: 'LinearSVC' object has no attribute 'predict_proba'