In [None]:
#Import statements
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

#Now we load scikit-learn datasets and functionalities
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#Eliminate randomness
torch.manual_seed(42)

#Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
iris = load_iris()
X = iris.data
y = iris.target

print(X[0])
print('***********')
print(y[0])

[5.1 3.5 1.4 0.2]
***********
0


In [None]:
#Basic train/test split and feature scaling
scaler = StandardScaler()

X_train, X_test, y_t, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
X_t = scaler.fit_transform(X_train)
X_te = scaler.transform(X_test)

#Conversion from numpy array to PyTorch tensor
#Note that data types need to be different for features (float due to gradients) and labels (loss functions need lookup)
X_t = torch.tensor(X_t, dtype=torch.float32)
y_t = torch.tensor(y_t, dtype=torch.long)
X_te = torch.tensor(X_te, dtype=torch.float32)
y_te = torch.tensor(y_te, dtype=torch.long)

In [None]:
#Now we create the TensorDataset (data container) and DataLoader (magic wrapper that simplifies working with data)
#Note: sometimes you want TensorDataset (most things, your data can fit on RAM), but sometimes you want Dataset (large datasets, like 50k medical images)

train_dataset = TensorDataset(X_t, y_t)
test_dataset = TensorDataset(X_te, y_te)

batch = 16

train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch, shuffle=False)

In [None]:
#Define the model
#Note! nn.Module isn't your basic parent class that gives a few helper methods. It's a registry for parameters & auto-gradients, and a whole module tracking system
#Thus, we need to call that second "init" function to set all of that up

class IrisModel(nn.Module):
  def __init__(self):
    super().__init__()
    # __init__ defines the layers/components (the 'what') of the neural network.
    # It's setting up the building blocks.
    self.fc1 = nn.Linear(4,32) #4 input features and 32 neurons
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(32,16) #32 input features (sizes must match!) and 16 output feature
    self.relu2 = nn.ReLU()
    self.fc3 = nn.Linear(16,3) #Need 3 final logits for the multi-class classification

  def forward(self, x):
    # forward defines the computational graph (the 'how') using the components
    # defined in __init__. It specifies the data flow through the network.
    x = self.fc1(x)
    x = self.relu1(x)
    x = self.fc2(x)
    x = self.relu2(x)
    logits = self.fc3(x)

    return logits

In [None]:
model = IrisModel().to(device) #Move model to GPU/CPU

criteria = nn.CrossEntropyLoss() #Define our loss function

optimizer = optim.Adam(model.parameters(), lr = 0.01) #Define our optimizer

In [None]:
#And now we define our training loop
epochs = 100

for epoch in range(epochs):
  model.train() #Set model to training mode
  total_loss = 0 #Here is where we accumulate the loss so we can track it on a per-epoch basis

  for features, labels in train_loader: #Here's where the magic of DataLoader comes into play: it helps organize our data into something sensible (this loop goes over all of the batches)

    #Move tensors to the configured device
    features, labels = features.to(device), labels.to(device)

    optimizer.zero_grad() #We actually have to clear the gradient between runs - this is an artifact of when hardware usually couldn't handle a whole dataset

    outputs = model(features) #Calculate the outputs for the current batch (forward pass)

    loss = criteria(outputs, labels) #Compute your loss function

    loss.backward() #Backpropagation step

    optimizer.step() #Update the weights

    total_loss += loss.item() #Add this batch's loss

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}")


Epoch [1/100], Loss: 1.1081
Epoch [1/100], Loss: 2.1554
Epoch [1/100], Loss: 3.1583
Epoch [1/100], Loss: 4.0441
Epoch [1/100], Loss: 4.8994
Epoch [1/100], Loss: 5.8020
Epoch [1/100], Loss: 6.5192
Epoch [1/100], Loss: 7.1100
Epoch [2/100], Loss: 0.8048
Epoch [2/100], Loss: 1.3643
Epoch [2/100], Loss: 1.8320
Epoch [2/100], Loss: 2.2840
Epoch [2/100], Loss: 2.8510
Epoch [2/100], Loss: 3.2603
Epoch [2/100], Loss: 3.8055
Epoch [2/100], Loss: 4.1613
Epoch [3/100], Loss: 0.4756
Epoch [3/100], Loss: 0.9779
Epoch [3/100], Loss: 1.2264
Epoch [3/100], Loss: 1.5047
Epoch [3/100], Loss: 1.7961
Epoch [3/100], Loss: 2.0987
Epoch [3/100], Loss: 2.4521
Epoch [3/100], Loss: 2.7225
Epoch [4/100], Loss: 0.2061
Epoch [4/100], Loss: 0.3930
Epoch [4/100], Loss: 0.7097
Epoch [4/100], Loss: 0.9124
Epoch [4/100], Loss: 1.2074
Epoch [4/100], Loss: 1.2996
Epoch [4/100], Loss: 1.5142
Epoch [4/100], Loss: 1.7036
Epoch [5/100], Loss: 0.1168
Epoch [5/100], Loss: 0.2064
Epoch [5/100], Loss: 0.6072
Epoch [5/100], Loss:

In [None]:
model.eval() #Set model to evaluation mode

correct = 0 #Number of correct predictions

total = 0 #Total samples

with torch.no_grad(): #Not tracking gradients this time

  for features, labels in test_loader:
    # Move tensors to the configured device
    features, labels = features.to(device), labels.to(device)

    outputs = model(features) #Forward pass
    _, predicted = torch.max(outputs,1) #Get index of max logit (predicted class)
    correct += (predicted == labels).sum().item() #Count correct items
    total += labels.size(0) #Count samples


accuracy = correct/total
print(f"Test accuracy: {accuracy:.4f}")

Test accuracy: 1.0000
