##Pytorch MLP example problem

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np


iris = load_iris()
x = iris.data #outputs 4 rows - sepal length, sepal width, petal length, nd petal width.
y = iris.target #has the respective flower that is labeled - 0 = iris setoda, 1 = iris versicolor, 2 = virginica



X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) #this is critical in making sure all features are on same scale(mean of 0, variance 1)
X_test = scaler.transform(X_test)

print(X_train.shape)



X_train_tensor = torch.tensor(X_train, dtype=torch.float32) #this transforms all of the variables in to pytorch tensors for usability.
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


'''
- batching =  Basically breaks it down, and runs training in small portions of the entire dataset.,
- shuffling = making the model see the dataset differently each epoch - helps better generalize the model.
- while the dataset is smaller, and this isn't strictly necessary, we do this to make sure we implement batching and shuffling through the epochs.
'''
class IrisDataset(Dataset):
  def __init__(self, data, labels):
    self.data = data
    self.labels = labels

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    return self.data[idx], self.labels[idx]


train_dataset = IrisDataset(X_train_tensor, y_train_tensor)
test_dataset = IrisDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size = 10, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 10, shuffle = False)




class mlp(nn.Module):
  def __init__(self, input_size, hidden_sizes, output_size): #number of input features, number of neurons in each hidden layer(typically passed as a list), number of output features,
    super(mlp, self).__init__() # this helps initialize the base class(nn.Module - this is class for all NN in pytorch). without it, the mlp class will not be accessible.
    '''
      summation of input features multiplied by first layer weights, added to a bias, and then sent into reLU activation function.
      relu function added to introduce nonlinearity. can u ad
    '''
    self.layers = nn.Sequential(
        nn.Linear(input_size, hidden_sizes[0]), #input is the first layer, output is the first HIDDEN layer
        nn.ReLU(), #used because it is the most generalized activation function.
        nn.Linear(hidden_sizes[0], hidden_sizes[1]), #goes from first to second hidden layer
        nn.ReLU(),
        nn.Linear(hidden_sizes[1], output_size)#goes from second hidden layer to output layer.

    )

  '''while redundant, and already defined in the nn.Module super class, it is good practice to write a forward propogation method'''

  def forward(self, x):
    return self.layers(x)


# --- instantiating the model ---
input_size= X_train.shape[1] #number of columns = number of input features
hidden_sizes = [64,32] #number of neurons in the first and second hidden layer respectively
output_size = 3 #we know only three outputs exist, setosa, versicolod, and virginica.

model = mlp(input_size, hidden_sizes, output_size)


# --- define loss function(cross entropy because it is a classification problem) and optimizer ---
loss_function = nn.CrossEntropyLoss()

''' model.parameter = parameters including weights and biases. lr = learning rate. small learning rate is slower computation, but more stable convergence. makes sure you don't miss the optimal weights and biases).'''
optimizer = optim.Adam(model.parameters(), lr=0.001) #most general and widely used optimizer function. slides cover more optimizer functions.


# --- model training ---

epochs = 50
for epoch in range(epochs):
  model.train()
  running_loss=0.0 #used to measure how well the model is performing.


  '''
  following code describes the process of forward pass, loss calculation, back-propogation, and optimization in a multi-layer perceptron.
  '''
  for inputs, labels in train_loader: #this demonstrates the batching. it performs the entire learning process for all the smaller batches in the epoch.
    optimizer.zero_grad() #zero's out the gradients so that we don't have any recollection of the gradients from precious batches.
    outputs = model(inputs) #forward propogation - internally calls the forward method.
    loss = loss_function(outputs, labels) #calculates the loss function
    loss.backward() #back prop
    optimizer.step() #calculates optimizer.

    running_loss+=loss.item() #helps keep track of the average loss over the entire epoch.

  print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")


  # --- model evalutaion ---

  model.eval()
  correct = 0
  total = 0

  with torch.no_grad(): #disables gradient calculation, which is really only important to save memory and increase computation speed.
    for inputs, labels in test_loader:
      outputs = model(inputs)
      _, predicted = torch.max(outputs, 1) #we do .max because outputs has the confidence of ALL the classes. we only want what is the most likely predicted, which is the index(hence 1).
      total+= labels.size(0)
      # print(predicted, "---predicted")
      # print(labels, "---- labels")
      correct+= (predicted == labels).sum().item()



accuracy = correct/total
print(f"Test Accuracy: {accuracy:.2%}")


(120, 4)
Epoch 1/50, Loss: 1.0597
Epoch 2/50, Loss: 0.8780
Epoch 3/50, Loss: 0.7375
Epoch 4/50, Loss: 0.6303
Epoch 5/50, Loss: 0.5500
Epoch 6/50, Loss: 0.4798
Epoch 7/50, Loss: 0.4185
Epoch 8/50, Loss: 0.3645
Epoch 9/50, Loss: 0.3183
Epoch 10/50, Loss: 0.2835
Epoch 11/50, Loss: 0.2490
Epoch 12/50, Loss: 0.2213
Epoch 13/50, Loss: 0.1968
Epoch 14/50, Loss: 0.1760
Epoch 15/50, Loss: 0.1593
Epoch 16/50, Loss: 0.1441
Epoch 17/50, Loss: 0.1319
Epoch 18/50, Loss: 0.1209
Epoch 19/50, Loss: 0.1128
Epoch 20/50, Loss: 0.1080
Epoch 21/50, Loss: 0.0997
Epoch 22/50, Loss: 0.0943
Epoch 23/50, Loss: 0.0900
Epoch 24/50, Loss: 0.0870
Epoch 25/50, Loss: 0.0865
Epoch 26/50, Loss: 0.0838
Epoch 27/50, Loss: 0.0767
Epoch 28/50, Loss: 0.0763
Epoch 29/50, Loss: 0.0721
Epoch 30/50, Loss: 0.0724
Epoch 31/50, Loss: 0.0697
Epoch 32/50, Loss: 0.0660
Epoch 33/50, Loss: 0.0658
Epoch 34/50, Loss: 0.0645
Epoch 35/50, Loss: 0.0625
Epoch 36/50, Loss: 0.0639
Epoch 37/50, Loss: 0.0622
Epoch 38/50, Loss: 0.0611
Epoch 39/50,

##Predictions

In [None]:
def predict(input_data):

    input_data = scaler.transform([input_data])
    input_tensor = torch.tensor(input_data, dtype=torch.float32)

    model.eval()
    with torch.no_grad():
        #this gets the output
        outputs = model(input_tensor)
        _, predicted = torch.max(outputs, 1) #gets the index of the highest predicted value, which is the class.

    print(outputs)
    print(predicted)

    # gets the class name assocciated with the predicted class.
    class_names = iris.target_names
    predicted_class = class_names[predicted.item()]

    return predicted_class

#predictions
sample_inputs = [[5.3, 2.3, 3.3, 1], [6.3, 1.3, 4.3, 2], [4.3, 1.3, 2.3, 3], [2.3, 0.3, 4.3, 2.3]]
for sample_input in sample_inputs:
  predicted_class = predict(sample_input)
  print(f"Predicted class: {predicted_class}")

tensor([[-2.8686,  3.9124, -3.5819]])
tensor([1])
Predicted class: versicolor
tensor([[-11.8294,  -0.6093,   3.7085]])
tensor([2])
Predicted class: virginica
tensor([[-10.6116,  -4.2005,   6.1600]])
tensor([2])
Predicted class: virginica
tensor([[-14.0184,  -6.8266,   9.1874]])
tensor([2])
Predicted class: virginica
