# Classification using ```pytorch``` - Neural Network Classifier

We first begin by loading in all of the same packages as we did in notebook [4a - Logistic Regression](classroom_04a.ipynb).

What we'll see is the code is actually _exactly_ the same throughout this notebook as it was in the previous example. The only difference is the way that define the model class using ```pytorch```.

In [2]:
# system tools
import os

# pytorch
import torch
import torch.nn as nn

# data processing
import pandas as pd
import numpy as np

# huggingface datasets
from datasets import load_dataset

# scikit learn tools
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import CountVectorizer

# plotting tools
import matplotlib.pyplot as plt


__Creating a model class__



In [7]:




# Define model
class Model(nn.Module):
    def __init__(n_input_features=10):            # default input features, can be overridden
        super().__init__()                              # inherit from parent class
        
        self.linear1 = nn.Linear(n_input_features, 30)    # one linear layer with single output
        self.linear2 = nn.Linear(30, 30)
        self.linear3 = nn.Linear(30, 1)

    def forward(self, x):                               # how should one forward pass look?
        x = self.linear1(x)                              # pass the data through the linear layer
        x = torch.sigmoid(x) 
        x = self.linear2(x)
        x = torch.sigmoid(x) 
        x = self.linear3(x)
        y_pred = torch.sigmoid(x)                     # squash the outputs through sigmoid layer
        return y_pred                                   # return predictions



## Load data


In [4]:
# load the sst2 dataset
dataset = load_dataset("glue", "sst2")
# select the train split
data = dataset["train"]
X = data["sentence"]
y = data["label"]

Found cached dataset glue (/home/coder/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]


## Create document representations


In [5]:
# vectorizer
vectorizer = CountVectorizer()
# vectorized training data
X_train_vect = vectorizer.fit_transform(X_train)
# only transform val and test
X_val_vect = vect.transform(X_val)
X_test_vect = vect.transform(X_test)
# to tensors
X_vect = torch.tensor(X_vect.toarray(), dtype=torch.float) # should this be X_train? And then create one for test and val
y = torch.tensor(list(y), dtype=torch.float)
y = y.view(y.shape[0], 1)


## Initialize parameters


In [8]:
# initialize model
n_samples, n_features = X_vect.shape
model = Model(n_input_features=n_features)

# define loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters())

TypeError: __init__() got multiple values for argument 'n_input_features'

## Train


In [None]:
# train
def train(num_epochs): # epochs should be 100
    best_accuracy = 0.0
    print("[INFO:] Training classifier...")
    loss_history = []
    
    for epoch in range(1, num_epochs+1):
        running_trian_loss = 0.0
        running_accuracy = 0.0
        running_vall_loss = 0.0
        total = 0

        # Training loop
        for data in train:
            inputs, outputs = data
            optimizer.zero_grad()
            predicted_outputs = model(inputs)
            train_loss = loss_fn(predicted_outputs, outputs)
            train_loss.backward() 
            optimizer.step()
            running_train_loss + train_loss.item()

        train_loss_value = running_train_loss/len(train) # what the data is called

        # validation loop
        with torch.no_grad():
            model.eval()
            for data in val: # What the data is called
                inputs, outputs = data
                predicted_outputs = data
                predicted_outputs = model (inputs)
                val_loss = loss_fn(predicted_outputs, outputs)

                 # The label with the highest value will be our prediction 
               _, predicted = torch.max(predicted_outputs, 1) 
               running_vall_loss += val_loss.item()  
               total += outputs.size(0) 
               running_accuracy += (predicted == outputs).sum().item() 

        # Calculate validation loss value 
        val_loss_value = running_vall_loss/len(validate_loader) 
                
        # Calculate accuracy as the number of correct predictions in the validation batch divided by the total number of predictions done.  
        accuracy = (100 * running_accuracy / total)     
 
        # Save the model if the accuracy is the best 
        if accuracy > best_accuracy: 
            saveModel() 
            best_accuracy = accuracy 
         
        # Print the statistics of the epoch 
        print('Completed training batch', epoch, 'Training Loss is: %.4f' %train_loss_value, 'Validation Loss is: %.4f' %val_loss_value, 'Accuracy is %d %%' % (accuracy)) 
            
   

## Evaluate


In [None]:
# Plot
predicted = model(X_vect).detach().numpy()
print(classification_report(y, 
                            np.where(predicted > 0.5, 1, 0),
                            target_names = ["Negative", "Positive"]))

## Plot loss history

In [None]:
loss_H = [val.item() for val in loss_history]
fig, ax = plt.subplots()
ax.plot(loss_H)