<a href="https://colab.research.google.com/github/monishm-2004/ML-from-Scratch/blob/main/first_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from torch.utils.data import DataLoader, TensorDataset

In [8]:
dataset = load_iris()
X = dataset.data
y = dataset.target
print(type(X))
print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<class 'numpy.ndarray'>
(150, 4) (150,)


In [6]:

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [17]:
X_train_tensor =torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test)
#print(X_Train_Tensor)
print(type(y_train_tensor))
print(X_test_tensor.shape)
print(y_test_tensor.shape)

<class 'torch.Tensor'>
torch.Size([30, 4])
torch.Size([30])


In [18]:

batch_size = 5 # passing in 5 X, y pairs through the network at a time for 1 parameter update of network
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


Defining your NN architecture

In [23]:
class NeuralNetwork(nn.Module):
  def __init__(self,input_size,hidden_size,num_classes):
    super().__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, num_classes)

  def forward(self,X):
    out = self.fc1(X)
    out = self.relu(out)
    out = self.fc2(out)
    return out

In [24]:
# Hyperparameters
input_size = 4  # Number of features in the Iris dataset
hidden_size = 6
num_classes = 3  # Iris dataset has 3 classes (setosa, versicolor, virginica)
learning_rate = 0.001 # How large should the jump in gradient be
num_epochs = 1_000 # Number of iterations over the dataset during the training process

# Create the neural network
model = NeuralNetwork(input_size, hidden_size, num_classes)
model

NeuralNetwork(
  (fc1): Linear(in_features=4, out_features=6, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=6, out_features=3, bias=True)
)

In [25]:

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [26]:

# Training the model
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        # Backward and optimize
        optimizer.zero_grad() # Clear gradients
        loss.backward() # Compute gradients
        optimizer.step() # Update parameters of Neural Network

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        print(batch_X.shape, batch_y.shape, loss)


Epoch [100/1000], Loss: 0.2168
torch.Size([5, 4]) torch.Size([5]) tensor(0.2168, grad_fn=<NllLossBackward0>)
Epoch [200/1000], Loss: 0.0119
torch.Size([5, 4]) torch.Size([5]) tensor(0.0119, grad_fn=<NllLossBackward0>)
Epoch [300/1000], Loss: 0.0733
torch.Size([5, 4]) torch.Size([5]) tensor(0.0733, grad_fn=<NllLossBackward0>)
Epoch [400/1000], Loss: 0.1098
torch.Size([5, 4]) torch.Size([5]) tensor(0.1098, grad_fn=<NllLossBackward0>)
Epoch [500/1000], Loss: 0.0844
torch.Size([5, 4]) torch.Size([5]) tensor(0.0844, grad_fn=<NllLossBackward0>)
Epoch [600/1000], Loss: 0.0961
torch.Size([5, 4]) torch.Size([5]) tensor(0.0961, grad_fn=<NllLossBackward0>)
Epoch [700/1000], Loss: 0.0118
torch.Size([5, 4]) torch.Size([5]) tensor(0.0118, grad_fn=<NllLossBackward0>)
Epoch [800/1000], Loss: 0.0754
torch.Size([5, 4]) torch.Size([5]) tensor(0.0754, grad_fn=<NllLossBackward0>)
Epoch [900/1000], Loss: 0.4900
torch.Size([5, 4]) torch.Size([5]) tensor(0.4900, grad_fn=<NllLossBackward0>)
Epoch [1000/1000], 

In [27]:
# Testing the model
with torch.no_grad():
    model.eval()
    correct = 0
    total = 0
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X) # batch_size x 3
        _, predicted = torch.max(outputs, 1) # Get the maximum of 3 values for every sample in batch.
        total += batch_y.size(0)
        correct += (predicted == batch_y).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.2f}')

Test Accuracy: 1.00
