**Neural Network**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
X.shape, y.shape

((150, 4), (150,))

In [4]:
# Split the dataset into training and testing sets. Adding a random seed for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [5]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [6]:
X_train[0], y_train[0]

(array([6.1, 3. , 4.6, 1.4]), np.int64(1))

In [7]:
# Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test)

In [8]:
X_train_tensor[0], X_train_tensor[0]

(tensor([6.1000, 3.0000, 4.6000, 1.4000]),
 tensor([6.1000, 3.0000, 4.6000, 1.4000]))

In [9]:
type(X_train), type(y_train), type(X_train_tensor), type(y_train_tensor)

(numpy.ndarray, numpy.ndarray, torch.Tensor, torch.Tensor)

We encapsulate these features and labels into Datasets and DataLoaders

Dataset provides an abstraction for handling features + labels easily<br/>
DataLoaders makes the dataset iterable and also allows batching and shuffling of data in the Dataset.

In [10]:
batch_size = 5 # passing in 5 X, y pairs through the network at a time for 1 parameter update of network
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [11]:

test_dataset

<torch.utils.data.dataset.TensorDataset at 0x7f2549271010>

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


In [13]:
# Hyperparameters
input_size = 4  # Number of features in the Iris dataset
hidden_size = 6
num_classes = 3  # Iris dataset has 3 classes (setosa, versicolor, virginica)
learning_rate = 0.001 # How large should the jump in gradient be
num_epochs = 1_000 # Number of iterations over the dataset during the training process

# Create the neural network
model = NeuralNetwork(input_size, hidden_size, num_classes)
model

NeuralNetwork(
  (fc1): Linear(in_features=4, out_features=6, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=6, out_features=3, bias=True)
)

In [14]:
# Loss function
criterion = nn.CrossEntropyLoss()
criterion

CrossEntropyLoss()

In [15]:
# Optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [16]:
# Training the model
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        # Backward and optimize
        optimizer.zero_grad() # Clear gradients
        loss.backward() # Compute gradients
        optimizer.step() # Update parameters of Neural Network

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        print(batch_X.shape, batch_y.shape, loss)


Epoch [100/1000], Loss: 0.1933
torch.Size([5, 4]) torch.Size([5]) tensor(0.1933, grad_fn=<NllLossBackward0>)
Epoch [200/1000], Loss: 0.0087
torch.Size([5, 4]) torch.Size([5]) tensor(0.0087, grad_fn=<NllLossBackward0>)
Epoch [300/1000], Loss: 0.0071
torch.Size([5, 4]) torch.Size([5]) tensor(0.0071, grad_fn=<NllLossBackward0>)
Epoch [400/1000], Loss: 0.0280
torch.Size([5, 4]) torch.Size([5]) tensor(0.0280, grad_fn=<NllLossBackward0>)
Epoch [500/1000], Loss: 0.0059
torch.Size([5, 4]) torch.Size([5]) tensor(0.0059, grad_fn=<NllLossBackward0>)
Epoch [600/1000], Loss: 0.2826
torch.Size([5, 4]) torch.Size([5]) tensor(0.2826, grad_fn=<NllLossBackward0>)
Epoch [700/1000], Loss: 0.0007
torch.Size([5, 4]) torch.Size([5]) tensor(0.0007, grad_fn=<NllLossBackward0>)
Epoch [800/1000], Loss: 0.0022
torch.Size([5, 4]) torch.Size([5]) tensor(0.0022, grad_fn=<NllLossBackward0>)
Epoch [900/1000], Loss: 0.0075
torch.Size([5, 4]) torch.Size([5]) tensor(0.0075, grad_fn=<NllLossBackward0>)
Epoch [1000/1000], 

In [17]:
# Testing the model
with torch.no_grad():
    model.eval()
    correct = 0
    total = 0
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X) # batch_size x 3
        _, predicted = torch.max(outputs, 1) # Get the maximum of 3 values for every sample in batch.
        total += batch_y.size(0)
        correct += (predicted == batch_y).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.2f}')

Test Accuracy: 1.00
