###Ensemble Combination for SMS Spam Classification

Import Libraries

In [1]:
!pip install ucimlrepo

import pandas as pd
import numpy as np
import os

from ucimlrepo import fetch_ucirepo

from sklearn.model_selection import StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix
)

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import VotingClassifier, StackingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.base import clone

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


Upload Zip File

In [3]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')

Saving sms+spam+collection.zip to sms+spam+collection.zip
User uploaded file "sms+spam+collection.zip" with length 203415 bytes


Unzip File

In [4]:
!unzip 'sms+spam+collection.zip'

Archive:  sms+spam+collection.zip
  inflating: SMSSpamCollection       
  inflating: readme                  


Encode Labels

In [6]:
df = pd.read_csv('SMSSpamCollection', sep='\t', names=['label', 'message'])
le = LabelEncoder()
y = le.fit_transform(df["label"])   # spam=1, ham=0
X = df["message"]

Define Models

In [7]:

nb = MultinomialNB()
lr = LogisticRegression(max_iter=2000)
svm = LinearSVC()

voting_hard = VotingClassifier(
    estimators=[('nb', nb), ('lr', lr), ('svm', svm)],
    voting='hard'
)

voting_soft = VotingClassifier(
    estimators=[('nb', nb), ('lr', lr)],
    voting='soft'
)

stacking = StackingClassifier(
    estimators=[('nb', nb), ('lr', lr)],
    final_estimator=LogisticRegression()
)

stump = DecisionTreeClassifier(max_depth=1)

adaboost = AdaBoostClassifier(
    estimator=stump,
    n_estimators=100,
    learning_rate=1.0
)

models = {
    "NaiveBayes": nb,
    "LogisticRegression": lr,
    "LinearSVM": svm,
    "VotingHard": voting_hard,
    "VotingSoft": voting_soft,
    "Stacking": stacking,
    "AdaBoost_Stumps": adaboost
}

K-Fold Evaluation

In [8]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

results = []

for name, model in models.items():

    precision_scores = []
    recall_scores = []
    f1_scores = []
    roc_scores = []

    all_true = []
    all_pred = []
    all_prob = []

    for train_idx, test_idx in skf.split(X, y):

        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        pipeline = Pipeline([
            ("tfidf", TfidfVectorizer(stop_words="english")),
            ("model", clone(model))
        ])

        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        if hasattr(pipeline.named_steps["model"], "predict_proba"):
            y_prob = pipeline.predict_proba(X_test)[:, 1]
        else:
            y_prob = y_pred

        precision_scores.append(precision_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))
        roc_scores.append(roc_auc_score(y_test, y_prob))

        all_true.extend(y_test)
        all_pred.extend(y_pred)
        all_prob.extend(y_prob)

    cm = confusion_matrix(all_true, all_pred)

    results.append({
        "Model": name,
        "Precision_mean": np.mean(precision_scores),
        "Recall_mean": np.mean(recall_scores),
        "F1_mean": np.mean(f1_scores),
        "ROC_AUC_mean": np.mean(roc_scores)
    })

    print("\n==============================")
    print(name)
    print("Confusion Matrix:")
    print(cm)
    print("Precision:", np.mean(precision_scores))
    print("Recall:", np.mean(recall_scores))
    print("F1:", np.mean(f1_scores))
    print("ROC-AUC:", np.mean(roc_scores))



NaiveBayes
Confusion Matrix:
[[4824    1]
 [ 163  584]]
Precision: 0.9982905982905983
Recall: 0.7817539149888143
F1: 0.8767103149701094
ROC-AUC: 0.9878375908474458

LogisticRegression
Confusion Matrix:
[[4817    8]
 [ 205  542]]
Precision: 0.9855103088600383
Recall: 0.725565995525727
F1: 0.8356855749679848
ROC-AUC: 0.9909214081209214

LinearSVM
Confusion Matrix:
[[4814   11]
 [  72  675]]
Precision: 0.9841641185736382
Recall: 0.903606263982103
F1: 0.9419213710396976
ROC-AUC: 0.9506632356179944

VotingHard
Confusion Matrix:
[[4821    4]
 [ 128  619]]
Precision: 0.9935864773394277
Recall: 0.8286353467561522
F1: 0.9036315449256624
ROC-AUC: 0.9139031656060554

VotingSoft
Confusion Matrix:
[[4822    3]
 [ 172  575]]
Precision: 0.9947818126335062
Recall: 0.7697449664429531
F1: 0.8678541983686092
ROC-AUC: 0.9914424453176618

Stacking
Confusion Matrix:
[[4814   11]
 [  89  658]]
Precision: 0.9835395898410845
Recall: 0.8808501118568233
F1: 0.929270599932547
ROC-AUC: 0.9911847364699609

AdaBoos

Save File

In [9]:
results_df = pd.DataFrame(results)
results_df.to_csv("comparison.csv", index=False)

print("\nSaved: comparison.csv")



Saved: comparison.csv


Train Model on full data

In [10]:

best_model_name = results_df.sort_values(
    by="F1_mean", ascending=False
).iloc[0]["Model"]

best_model = models[best_model_name]

final_pipeline = Pipeline([
    ("tfidf", TfidfVectorizer(stop_words="english")),
    ("model", clone(best_model))
])

final_pipeline.fit(X, y)

final_pred = final_pipeline.predict(X)

if hasattr(final_pipeline.named_steps["model"], "predict_proba"):
    final_prob = final_pipeline.predict_proba(X)[:, 1]
else:
    final_prob = final_pred

prediction_df = pd.DataFrame({
    "MessageId": range(len(X)),
    "Actual": y,
    "Predicted": final_pred,
    "Probability": final_prob
})

prediction_df.to_csv("final_model_predictions.csv", index=False)

print("Best Model:", best_model_name)
print("Saved: final_model_predictions.csv")

Best Model: LinearSVM
Saved: final_model_predictions.csv


#Conclusion
For SMS spam detection using TF-IDF features, ensemble methods outperform individual classifiers because they combine complementary strengths. Soft Voting improves performance by averaging prediction probabilities, while Stacking further enhances generalization by training a meta-learner to optimally combine base models. AdaBoost with decision stumps improves weak learners sequentially but may not fully exploit high-dimensional text features. Therefore, Stacking is generally the best combining strategy due to higher F1-score and ROC-AUC with stable cross-validation performance.
