Perceptron Implementation for Spam Classification Using Keras/TensorFlow


Name: Jagtap Mahesh
Reg No.24MCS1O17


Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import zipfile
import io
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score
import requests
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


Step 2: Load and Preprocess the Dataset


In [None]:
# Download the ZIP file directly from the URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"
response = requests.get(url)

# Extract the ZIP file in memory
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    # List files in the ZIP archive
    z.printdir()

    # Read the 'SMSSpamCollection' file into a pandas dataframe
    with z.open('SMSSpamCollection') as f:
        data = pd.read_csv(f, sep='\t', header=None, names=['Label', 'Message'])

# Display first few rows of the dataset
print(data.head())

# Convert text labels (ham/spam) into binary values
encoder = LabelEncoder()
data['Label'] = encoder.fit_transform(data['Label'])

# Features and target
X = data['Message']
y = data['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text messages into numerical features using CountVectorizer
vectorizer = CountVectorizer(stop_words='english', max_features=3000)
X_train = vectorizer.fit_transform(X_train).toarray()
X_test = vectorizer.transform(X_test).toarray()

# Build the Perceptron model
model = Sequential([
    Dense(1, input_dim=X_train.shape[1], activation='sigmoid')  # Single neuron with sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=1)

# Predict the labels for the test set
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

File Name                                             Modified             Size
SMSSpamCollection                              2011-03-15 22:36:02       477907
readme                                         2011-04-18 14:53:56         5868
  Label                                            Message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8302 - loss: 0.5836
Epoch 2/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8621 - loss: 0.3978
Epoch 3/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8626 - loss: 0.3446
Epoch 4/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8747 - loss: 0.3033
Epoch 5/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8817 - loss: 0.2849
Epoch 6/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8881 - loss: 0.2637
Epoch 7/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8940 - loss: 0.2536
Epoch 8/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9004 - loss: 0.2384
Epoch 9/10
[1m446/446[0m [32m━━━━━━━━

Step 3: Define the Perceptron Model



In [None]:
# Build the Perceptron model
model = Sequential([
    Dense(1, input_dim=X_train.shape[1], activation='sigmoid')  # Single neuron with sigmoid for binary classification
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Step 4: Compile and Train the Model



In [None]:
# Compile the model
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=1)


Epoch 1/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8251 - loss: 0.5844
Epoch 2/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8642 - loss: 0.3993
Epoch 3/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8612 - loss: 0.3486
Epoch 4/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8737 - loss: 0.3067
Epoch 5/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8714 - loss: 0.2955
Epoch 6/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8845 - loss: 0.2736
Epoch 7/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8952 - loss: 0.2504
Epoch 8/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9040 - loss: 0.2407
Epoch 9/10
[1m446/446[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7e3539e616f0>

Step 5: Evaluate the Model


In [None]:
# Predict the labels for the test set
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Test Accuracy: 92.02%


In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Classification Report (Precision, Recall, F1-Score)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

Confusion Matrix:
[[966   0]
 [ 89  60]]
Classification Report:
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       966
           1       1.00      0.40      0.57       149

    accuracy                           0.92      1115
   macro avg       0.96      0.70      0.77      1115
weighted avg       0.93      0.92      0.90      1115

