#### In this exercise we will perform operations with perceptrons

In [15]:
# Perceptron using numpy only
import numpy as np

class Perceptron:
    def __init__(self, input_size):
        self.weights = np.zeros(input_size)
        self.bias = 0
    
    def predict(self, inputs):
        summation = np.dot(inputs, self.weights) + self.bias
        return 1 if summation > 0 else 0
    
    def train(self, inputs, labels, epochs):
        for _ in range(epochs):
            for x, y in zip(inputs, labels):
                prediction = self.predict(x)
                self.weights += (y - prediction) * x
                self.bias += (y - prediction)


if __name__ == "__main__":
    inputs = np.array([
        [1, 0],
        [0, 1],
        [0, 0],
        [1, 1]
    ])
    labels = np.array([1, 1, 0, 1])
    perceptron = Perceptron(2)
    perceptron.train(inputs, labels, 100)
    print("Weights: ", perceptron.weights)
    print("Bias: ", perceptron.bias)
    print("Prediction: ", perceptron.predict(np.array([1, 0])))
    print("Prediction: ", perceptron.predict(np.array([0, 1])))

Weights:  [1. 1.]
Bias:  0
Prediction:  1
Prediction:  1


### Activity: Spam or Not Spam

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim


df = pd.read_csv('datasets/spam.csv', encoding='ISO-8859-1')

df.head(5)


Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [17]:
df.info(), df.shape # 5572 rows, 5 columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5570 entries, 0 to 5569
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   v1          5570 non-null   object
 1   v2          5570 non-null   object
 2   Unnamed: 2  50 non-null     object
 3   Unnamed: 3  12 non-null     object
 4   Unnamed: 4  6 non-null      object
dtypes: object(5)
memory usage: 217.7+ KB


(None, (5570, 5))

In [18]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score



# Preprocess the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["v2"]).toarray()
y = (df["v1"] == "spam").astype(int)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32) # this one needs to be converted to numpy array to avoid error

# Define the perceptron model
class Perceptron(nn.Module):
    def __init__(self, input_dim):
        super(Perceptron, self).__init__()
        self.fc = nn.Linear(input_dim, 1)

    def forward(self, x):
        x = torch.sigmoid(self.fc(x))
        return x

# Instantiate the model
input_dim = X_train.shape[1]
model = Perceptron(input_dim)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Define the training loop
def train_model(model, criterion, optimizer, X_train, y_train, num_epochs=100):
    for epoch in range(num_epochs):
        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train.view(-1, 1))
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Print the loss every 10 epochs
        if (epoch+1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

# Train the model
train_model(model, criterion, optimizer, X_train_tensor, y_train_tensor)

# Evaluate the model
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred = (y_pred >= 0.5).squeeze().int()
    accuracy = accuracy_score(y_test, y_pred.numpy())
    print("Accuracy:", accuracy)


Epoch [10/100], Loss: 0.5468505024909973
Epoch [20/100], Loss: 0.46934714913368225
Epoch [30/100], Loss: 0.4245634973049164
Epoch [40/100], Loss: 0.39370197057724
Epoch [50/100], Loss: 0.3699180781841278
Epoch [60/100], Loss: 0.350299209356308
Epoch [70/100], Loss: 0.3334517776966095
Epoch [80/100], Loss: 0.3186367452144623
Epoch [90/100], Loss: 0.305420845746994
Epoch [100/100], Loss: 0.29352283477783203
Accuracy: 0.8725314183123878


In [19]:
# Make predictions using new data
def predict_spam(text, model, vectorizer):
    text = vectorizer.transform([text]).toarray()
    text = torch.tensor(text, dtype=torch.float32)
    with torch.no_grad():
        prediction = model(text)
        prediction = (prediction >= 0.5).int().item()
    return "spam" if prediction == 1 else "ham"

In [20]:
predict_spam("Congratulations! You've won a $1,000 Walmart gift card. Go to bit.ly/123456 to claim now.", model, vectorizer)

'ham'

In [21]:
# Save the model
torch.save(model.state_dict(), 'perceptron_model.pth')

In [22]:
# Load the model
model = Perceptron(input_dim)
model.load_state_dict(torch.load('perceptron_model.pth'))

<All keys matched successfully>