<a href="https://colab.research.google.com/github/rickygrosvenor-pramanick/learn-ml/blob/main/pytorch/nn_workflow_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Create Dataset
from sklearn.datasets import make_circles

X, y = make_circles(n_samples=1000,
                    noise=0.7,
                    factor=0.8,
                    random_state=42)

In [3]:
# Convert to Tensors
import torch

X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

In [4]:
# Train-Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Agnostic Code
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"

In [6]:
# Investigate Data Format
# X gives 2 numbers as input, y returns 1 number as output
X_train[:5], y_train[:5]

(tensor([[ 1.3037,  0.1872],
         [-0.3370, -0.6710],
         [-1.4545,  0.4610],
         [-0.7890, -0.2558],
         [-0.0992,  1.1932]]),
 tensor([1., 0., 0., 0., 1.]))

In [7]:
# Create Model Class
class CircleModelV1(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(in_features=2, out_features=8)
    self.layer_2 = nn.Linear(in_features=8, out_features=3)
    self.layer_3 = nn.Linear(in_features=3, out_features=1)

  def forward(self, x):
    return self.layer_3(self.layer_2(self.layer_1(x)))

In [8]:
# Create Model Instance
model1 = CircleModelV1().to(device)
model1

CircleModelV1(
  (layer_1): Linear(in_features=2, out_features=8, bias=True)
  (layer_2): Linear(in_features=8, out_features=3, bias=True)
  (layer_3): Linear(in_features=3, out_features=1, bias=True)
)

In [31]:
# Initial Predictions without Training
import math
with torch.inference_mode():
  untrained_preds = model1(X_test.to(device))
  untrained_preds_to_int = untrained_preds.round()
untrained_preds[:5], untrained_preds_to_int[:5], y_test[:5]

(tensor([[ 0.3045],
         [ 0.5390],
         [ 0.2294],
         [ 0.5790],
         [-0.0679]]),
 tensor([[0.],
         [1.],
         [0.],
         [1.],
         [-0.]]),
 tensor([1., 0., 1., 0., 1.]))

In [32]:
# Create a loss function

# For Regression, you may want MSE or MAE
# For Classification, Binary Cross Entropy or Categorical Cross Entropy

loss_fn = nn.BCELoss() # requires inputs to have gone through the sigmoid activation function prior to input

# loss_fn = nn.BCEWithLogitsLoss() # has sigmoid activation function built in
# This is somewhat similar to
# nn.Sequential(
#     nn.Sigmoid(),
#     nn.BCELoss()
# )
# nn.BCEWithLogitsLoss() is more numerically stable than plain nn.Sigmoid->nn.BCELoss but we'll do it the other way
# to get a sense of the work better

optimiser = torch.optim.SGD(params=model1.parameters(), lr=0.1)


In [34]:
# parameters getting updated by SGD
model1.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[ 0.6559,  0.6475],
                      [-0.2728, -0.0245],
                      [-0.6433, -0.2504],
                      [-0.2940, -0.4931],
                      [ 0.6968, -0.6576],
                      [-0.2427,  0.5973],
                      [-0.6657, -0.5961],
                      [-0.4022,  0.4858]])),
             ('layer_1.bias',
              tensor([ 0.1872,  0.3923, -0.1614, -0.6161, -0.1182,  0.0104, -0.1730, -0.6483])),
             ('layer_2.weight',
              tensor([[ 0.2580, -0.1306,  0.3463, -0.1854, -0.0450, -0.1453, -0.0555,  0.0951],
                      [ 0.1459, -0.2665,  0.2170,  0.2830, -0.0563, -0.3383,  0.2932,  0.3381],
                      [-0.0817, -0.1348,  0.3110,  0.1524,  0.1587, -0.1425, -0.1264,  0.2304]])),
             ('layer_2.bias', tensor([-0.3515, -0.1798,  0.0155])),
             ('layer_3.weight', tensor([[-0.1521, -0.5621, -0.0147]])),
             ('layer_3.bias', tensor([-