In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer

# Load dataset (example: assumes 'message' and 'label' columns)
data = pd.read_csv('/content/spam.csv')  # 'message' = text, 'label' = 1 or 0

# Extract features and labels
X_text = data['Message']
y = data['Category']

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(X_text)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create weak learner
base_estimator = DecisionTreeClassifier(max_depth=1)

# Create AdaBoost classifier
model = AdaBoostClassifier(estimator=base_estimator, n_estimators=50, learning_rate=1.0)

# Train
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9076233183856502
Confusion Matrix:
 [[962   4]
 [ 99  50]]
Classification Report:
               precision    recall  f1-score   support

         ham       0.91      1.00      0.95       966
        spam       0.93      0.34      0.49       149

    accuracy                           0.91      1115
   macro avg       0.92      0.67      0.72      1115
weighted avg       0.91      0.91      0.89      1115

