In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# Step 1: Load the SpamBase dataset
# Replace the URL/path below with the correct location of the SpamBase dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data"
columns = [f"feature_{i}" for i in range(57)] + ["label"]  # The dataset has 57 features + label
data = pd.read_csv(url, header=None, names=columns)

# Step 2: Split the data into features (X) and labels (y)
X = data.iloc[:, :-1]  # All columns except the last one are features
y = data.iloc[:, -1]   # The last column is the label (spam or not spam)

# Step 3: Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Train the logistic regression model
model = LogisticRegression(max_iter=5000, solver='saga', random_state=42)
model.fit(X_train_scaled, y_train)

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Step 7: Evaluate the model
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Classification Report
class_report = classification_report(y_test, y_pred, target_names=["Not Spam", "Spam"])
print("\nClassification Report:")
print(class_report)


Accuracy: 0.92

Confusion Matrix:
[[768  36]
 [ 71 506]]

Classification Report:
              precision    recall  f1-score   support

    Not Spam       0.92      0.96      0.93       804
        Spam       0.93      0.88      0.90       577

    accuracy                           0.92      1381
   macro avg       0.92      0.92      0.92      1381
weighted avg       0.92      0.92      0.92      1381

