<a href="https://colab.research.google.com/github/rickygrosvenor-pramanick/learn-ml/blob/main/pytorch/nn_workflow_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Create Dataset
from sklearn.datasets import make_circles

X, y = make_circles(n_samples=1000,
                    noise=0.7,
                    factor=0.8,
                    random_state=42)

In [2]:
# Convert to Tensors
import torch

X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

In [3]:
# Train-Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Agnostic Code
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
# Investigate Data Format
# X gives 2 numbers as input, y returns 1 number as output
X_train[:5], y_train[:5]

(tensor([[ 1.3037,  0.1872],
         [-0.3370, -0.6710],
         [-1.4545,  0.4610],
         [-0.7890, -0.2558],
         [-0.0992,  1.1932]]),
 tensor([1., 0., 0., 0., 1.]))

In [6]:
# Create Model Class
class CircleModelV1(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(in_features=2, out_features=8)
    self.layer_2 = nn.Linear(in_features=8, out_features=3)
    self.layer_3 = nn.Linear(in_features=3, out_features=1)

  def forward(self, x):
    return self.layer_3(self.layer_2(self.layer_1(x)))

In [7]:
# Create Model Instance
model1 = CircleModelV1().to(device)
model1

CircleModelV1(
  (layer_1): Linear(in_features=2, out_features=8, bias=True)
  (layer_2): Linear(in_features=8, out_features=3, bias=True)
  (layer_3): Linear(in_features=3, out_features=1, bias=True)
)

In [15]:
# Initial Predictions without Training
import math
with torch.inference_mode():
  untrained_preds = model1(X_test.to(device))
untrained_preds[:5], y_test[:5]

(tensor([[-0.1718],
         [-0.4396],
         [-0.1408],
         [-0.2943],
         [-0.1577]]),
 tensor([1., 0., 1., 0., 1.]))

In [9]:
# Create a loss function

# For Regression, you may want MSE or MAE
# For Classification, Binary Cross Entropy or Categorical Cross Entropy
sigmoid = torch.Sigmoid()

loss_fn = nn.BCELoss() # requires inputs to have gone through the sigmoid activation function prior to input

# loss_fn = nn.BCEWithLogitsLoss() # has sigmoid activation function built in
# This is somewhat similar to
# nn.Sequential(
#     nn.Sigmoid(),
#     nn.BCELoss()
# )
# nn.BCEWithLogitsLoss() is more numerically stable than plain nn.Sigmoid->nn.BCELoss but we'll do it the other way
# to get a sense of the work better

optimiser = torch.optim.SGD(params=model1.parameters(), lr=0.1)


In [34]:
# parameters getting updated by SGD
model1.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[ 0.6559,  0.6475],
                      [-0.2728, -0.0245],
                      [-0.6433, -0.2504],
                      [-0.2940, -0.4931],
                      [ 0.6968, -0.6576],
                      [-0.2427,  0.5973],
                      [-0.6657, -0.5961],
                      [-0.4022,  0.4858]])),
             ('layer_1.bias',
              tensor([ 0.1872,  0.3923, -0.1614, -0.6161, -0.1182,  0.0104, -0.1730, -0.6483])),
             ('layer_2.weight',
              tensor([[ 0.2580, -0.1306,  0.3463, -0.1854, -0.0450, -0.1453, -0.0555,  0.0951],
                      [ 0.1459, -0.2665,  0.2170,  0.2830, -0.0563, -0.3383,  0.2932,  0.3381],
                      [-0.0817, -0.1348,  0.3110,  0.1524,  0.1587, -0.1425, -0.1264,  0.2304]])),
             ('layer_2.bias', tensor([-0.3515, -0.1798,  0.0155])),
             ('layer_3.weight', tensor([[-0.1521, -0.5621, -0.0147]])),
             ('layer_3.bias', tensor([-

In [12]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([1, 9, 3])
torch.eq(tensor1, tensor2).sum(), torch.eq(tensor1, tensor2).sum().item(), torch.eq(tensor1, tensor2)

(tensor(2), 2, tensor([ True, False,  True]))

In [14]:
# Create an Accuracy Function - https://www.youtube.com/watch?v=RYFViaaJxE8&ab_channel=MathsResource as a Percentage
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  accuracy = (correct/len(y_pred)) * 100
  return accuracy

In [17]:
# Train Model

# Logits are the raw outputs of our models - output from the forward() function of our model class.
# We want to go from logits -> prediction probabilities -> prediction label

# We can convert Logits into Prediction Probabilities by passing them through an activation function
# Generally sigmoid for binary classification and softmax for multiclass classification

# Logits from Model Output
y_logits = model1(X_test.to(device))

# Pass through Sigmoid Function to get Prediction Probabilities
y_pred_probs = torch.sigmoid(y_logits)

# Round the Prediction Probabilities to get Classification Category or Prediction Label.
y_preds = torch.round(y_pred_probs)
y_preds[:5], y_test[:5]

(tensor([[0.],
         [0.],
         [0.],
         [0.],
         [0.]], grad_fn=<SliceBackward0>),
 tensor([1., 0., 1., 0., 1.]))

In [19]:
# As seen from above, y_preds has an extra redundant dimension
y_preds.size()

torch.Size([200, 1])

In [22]:
# So we squeeze the Tensor
y_preds.squeeze()[:5], y_test[:5]

(tensor([0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>),
 tensor([1., 0., 1., 0., 1.]))

In [24]:
# Building a Training and Testing Loop
torch.manual_seed(42)

epochs = 1000

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
  model1.train()

  # 1. Forward pass (model outputs raw logits)
  y_logits = model1(X_train).squeeze()
  y_preds = torch.round(torch.sigmoid(y_logits))

  # 2. Calculate loss/accuracy - https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html
  loss = loss_fn(torch.sigmoid(y_logits), y_train)
  accuracy = accuracy_fn(y_true=y_train, y_pred=y_preds)

  # 3. Optimizer zero grad
  optimiser.zero_grad()

  # 4. Loss backwards
  loss.backward()

  # 5. Optimizer step
  optimiser.step()

  ### Testing
  model1.eval()
  with torch.inference_mode():
    # 1. Forward pass
    test_logits = model1(X_test).squeeze()
    test_pred = torch.round(torch.sigmoid(test_logits))

    # 2. Calculate loss/accuracy
    test_loss = loss_fn(torch.sigmoid(test_logits), y_test)
    test_accuracy = accuracy_fn(y_true=y_test, y_pred=test_pred)

    # Print out what's happening every 10 epochs
    if epoch % 10 == 0:
      print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {accuracy:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_accuracy:.2f}%")



Epoch: 0 | Loss: 0.69860, Accuracy: 49.38% | Test loss: 0.69912, Test acc: 51.50%
Epoch: 10 | Loss: 0.69639, Accuracy: 49.75% | Test loss: 0.69718, Test acc: 49.50%
Epoch: 20 | Loss: 0.69552, Accuracy: 49.00% | Test loss: 0.69627, Test acc: 51.50%
Epoch: 30 | Loss: 0.69501, Accuracy: 49.12% | Test loss: 0.69565, Test acc: 51.00%
Epoch: 40 | Loss: 0.69464, Accuracy: 49.75% | Test loss: 0.69519, Test acc: 51.00%
Epoch: 50 | Loss: 0.69436, Accuracy: 50.00% | Test loss: 0.69483, Test acc: 52.50%
Epoch: 60 | Loss: 0.69414, Accuracy: 50.25% | Test loss: 0.69455, Test acc: 52.50%
Epoch: 70 | Loss: 0.69397, Accuracy: 50.00% | Test loss: 0.69433, Test acc: 52.50%
Epoch: 80 | Loss: 0.69382, Accuracy: 50.12% | Test loss: 0.69416, Test acc: 52.50%
Epoch: 90 | Loss: 0.69370, Accuracy: 50.00% | Test loss: 0.69402, Test acc: 52.50%
Epoch: 100 | Loss: 0.69360, Accuracy: 50.00% | Test loss: 0.69392, Test acc: 52.50%
Epoch: 110 | Loss: 0.69352, Accuracy: 49.88% | Test loss: 0.69384, Test acc: 52.50%
Epo