<a href="https://colab.research.google.com/github/swethag04/ml-projects/blob/main/nlp/pytorch_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
import torch

In [54]:
torch.__version__

'2.1.0+cu121'

In [55]:
# check if NVIDIA gpu is available
torch.cuda.is_available()

False

In [56]:
# check if the mac has apple silicon chip to accelerate pytorch code execution
print(torch.backends.mps.is_available())

False


PyTorch tensors are data containers for array like structures.

*   A scalar is a 0 dimensional tensor
*   A vector is a 1-dimensional tensor
* A matrix is a 2-dimensional vector



In [57]:
# creating pytorch tensors

tensor0d = torch.tensor(1)
tensor1d = torch.tensor([1,2,3])
tensor2d = torch.tensor([[1,2,3], [4,5,6]])
tensor3d = torch.tensor([[[1,2], [3,4], [5,6], [7,8]]])

In [58]:
print(tensor1d)

tensor([1, 2, 3])


In [59]:
print(tensor1d.dtype)

torch.int64


In [60]:
floatvec =  torch.tensor([1.0,2.0,3.0])
print(floatvec.dtype)

torch.float32


In [61]:
# changing 64 bit int tensor to 32 bit float tensor
floatvec = tensor1d.to(torch.float32)
print(floatvec.dtype)

torch.float32


In [62]:
print(tensor1d.shape)

torch.Size([3])


In [63]:
print(tensor2d.shape)

torch.Size([2, 3])


In [64]:
# reshape tensor
print(tensor2d.reshape(3,2))

tensor([[1, 2],
        [3, 4],
        [5, 6]])


In [65]:
print(tensor2d)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [66]:
# another way to reshape tensor
print(tensor2d.view(3,2))

tensor([[1, 2],
        [3, 4],
        [5, 6]])


In [67]:
# Transposing a tensor
print(tensor2d.T)

tensor([[1, 4],
        [2, 5],
        [3, 6]])


In [68]:
# Multiply 2 matrices
print(tensor2d.matmul(tensor2d.T))

tensor([[14, 32],
        [32, 77]])


In [69]:
# multiply using @ operator
print(tensor2d @ tensor2d.T)

tensor([[14, 32],
        [32, 77]])


In [70]:
# Logistic regression forward pass
import torch.nn.functional as F

# true label
y = torch.tensor([1.0])

# input feature
x1 = torch.tensor([1.1])

# weight parameter
w1 = torch.tensor([2.2])

# bias unit
b = torch.tensor([0.0])

# net input
z = x1 * w1 +b

#activation and output
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)
print(a)
print(loss)


tensor([0.9183])
tensor(0.0852)


In [71]:
# computing gradients vis autograd
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a,y)

grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b = grad(loss, b, retain_graph=True)

print(grad_L_w1)
print(grad_L_b)

(tensor([-0.0898]),)
(tensor([-0.0817]),)


In [72]:
# instead of using grad function manually
loss.backward()
print(w1.grad)
print(b.grad)

tensor([-0.0898])
tensor([-0.0817])


In [73]:
# Multilayer perceptron
class NeuralNetwork(torch.nn.Module):
  def __init__(self, num_inputs, num_outputs):
    super().__init__()

    self.layers = torch.nn.Sequential(

        # first hidden layer
        torch.nn.Linear(num_inputs, 30),
        torch.nn.ReLU(),

        # 2nd hidden layer
        torch.nn.Linear(30,20),
        torch.nn.ReLU(),

        # output layert
        torch.nn.Linear(20, num_outputs)
    )

  def forward(self,x):
      # logits is the output of last layer
    logits = self.layers(x)
    return logits

In [74]:
model = NeuralNetwork(50,3)
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [75]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model params: ", num_params)

Total number of trainable model params:  2213


In [76]:
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0281,  0.1284,  0.1009,  ..., -0.0914, -0.1354,  0.1046],
        [ 0.1062, -0.0360,  0.0624,  ...,  0.0697, -0.0964,  0.0765],
        [-0.0045,  0.0584, -0.0003,  ..., -0.1287, -0.0371, -0.1072],
        ...,
        [-0.1057,  0.1387, -0.1316,  ...,  0.0019, -0.0163,  0.0589],
        [-0.1053, -0.0406, -0.1302,  ...,  0.0139, -0.0647, -0.0738],
        [-0.0511,  0.0880,  0.0709,  ...,  0.1037,  0.0555,  0.0037]],
       requires_grad=True)


In [77]:
print(model.layers[0].weight.shape)

torch.Size([30, 50])


In [78]:
model.layers[0].bias

Parameter containing:
tensor([ 0.0560, -0.0131,  0.1141,  0.0025,  0.0788,  0.1260,  0.0459,  0.0039,
        -0.0099, -0.1152, -0.1105,  0.0472,  0.0131,  0.0366, -0.1277,  0.0792,
        -0.0146,  0.0551, -0.0780,  0.0295,  0.0728, -0.1057,  0.0126, -0.0490,
         0.1161, -0.0296, -0.0827, -0.0906, -0.0129,  0.0642],
       requires_grad=True)

In [79]:
print(model.layers[0].bias.shape)

torch.Size([30])


In [80]:
torch.manual_seed(123)
model = NeuralNetwork(50,3)
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [81]:
torch.manual_seed(123)
X = torch.rand((1,50))
out = model(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)


In [82]:
with torch.no_grad():
  out = model(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]])


In [83]:
with torch.no_grad():
  out = torch.softmax(model(X), dim=1)
print(out)

tensor([[0.3113, 0.3934, 0.2952]])


In [84]:
# creating a small top dataset
X_train = torch.tensor([
    [-1.2,3.1],
    [0.9, 2.9],
    [-0.5,2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])
y_train = torch.tensor([0,0,0,1,1])
X_test = torch.tensor([
                      [-0.8,2.8],
                      [2.6, -1.6]])
y_test = torch.tensor([0,1])

In [85]:
# Defining a custom Dataset class
from torch.utils.data import Dataset

class ToyDataset(Dataset):
  def __init__(self, X, y):
    self.features = X
    self.labels = y

  def __getitem__(self, index):
    # retrieve one data record and the corresponding label
    one_x = self.features[index]
    one_y = self.labels[index]
    return one_x, one_y

  def __len__(self):
    # total length of dataset
    return self.labels.shape[0]

train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [86]:
print(len(train_ds))

5


In [87]:
# Instantiating data loader
from torch.utils.data import DataLoader
torch.manual_seed(123)
train_loader = DataLoader(
    dataset = train_ds,
    batch_size = 2,
    shuffle = True,
    num_workers = 0
)
test_loader = DataLoader(
    dataset = test_ds,
    batch_size =2,
    shuffle = False,
    num_workers = 0
)

In [88]:
for idx, (x,y) in enumerate(train_loader):
  print(f"Batch {idx+1}: ", x, y)

Batch 1:  tensor([[ 2.3000, -1.1000],
        [ 0.9000,  2.9000]]) tensor([1, 0])
Batch 2:  tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 3:  tensor([[ 2.7000, -1.5000]]) tensor([1])


In [89]:
# A training loader that drops the alst batch
train_loader = DataLoader(
    dataset = train_ds,
    batch_size = 2,
    shuffle = True,
    num_workers = 0,
    drop_last = True
)
for idx, (x,y) in enumerate(train_loader):
  print(f"Batch {idx+1}:", x,y)

Batch 1: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 2: tensor([[ 2.3000, -1.1000],
        [ 0.9000,  2.9000]]) tensor([1, 0])


In [90]:
#### Neural network training in pytorch

import torch.nn.functional as F

torch.manual_seed(123)
model = NeuralNetwork(num_inputs =2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

num_epochs = 3

for epoch in range(num_epochs):
  model.train()
  for batch_idx, (features, labels) in enumerate(train_loader):
    logits = model(features)
    loss = F.cross_entropy(logits, labels)

    # set the gradients from previous round to zero to prevent unintended gradient accumulation
    optimizer.zero_grad()
    # compute the gradients of the loss wrt the model parameters
    loss.backward()
    # optimizer uses the gradient to update the model parameters
    optimizer.step()

    # logging
    print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
          f" | Batch {batch_idx:03d}/{len(train_loader):03d}"
          f" | Train Loss: {loss:.2f}")
    model.eval()

Epoch: 001/003 | Batch 000/002 | Train Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train Loss: 0.65
Epoch: 002/003 | Batch 000/002 | Train Loss: 0.39
Epoch: 002/003 | Batch 001/002 | Train Loss: 0.25
Epoch: 003/003 | Batch 000/002 | Train Loss: 0.02
Epoch: 003/003 | Batch 001/002 | Train Loss: 0.00


In [91]:
model.eval()
with torch.no_grad():
  outputs = model(X_train)
print(outputs)

tensor([[ 3.2347, -4.7798],
        [ 2.3164, -3.7002],
        [ 2.5392, -3.8612],
        [-1.5550,  1.5517],
        [-1.8102,  1.8239]])


In [92]:
# to obtain class membership probabilities
torch.set_printoptions(sci_mode=False)
probas = torch.softmax(outputs, dim=1)
print(probas)

tensor([[    0.9997,     0.0003],
        [    0.9976,     0.0024],
        [    0.9983,     0.0017],
        [    0.0428,     0.9572],
        [    0.0257,     0.9743]])


In [93]:
predictions = torch.argmax(outputs, dim=1)
print(predictions)

tensor([0, 0, 0, 1, 1])


In [94]:
predictions == y_train

tensor([True, True, True, True, True])

In [95]:
torch.sum(predictions == y_train)

tensor(5)

In [96]:
# compute predictiona accuracy
def compute_accuracy(model, dataloader):
  model = model.eval()
  correct = 0
  total_examples =0

  for idx, (features, labels) in enumerate(dataloader):
    with torch.no_grad():
      logits = model(features)
    predictions = torch.argmax(logits, dim=1)
    compare = labels == predictions
    correct += torch.sum(compare)
    total_examples += len(compare)
  return(correct/total_examples).item()

In [97]:
print(compute_accuracy(model, train_loader))


1.0


In [98]:
print(compute_accuracy(model, test_loader))


1.0


In [99]:
torch.save(model.state_dict(), "model.pth")

In [100]:
model = NeuralNetwork(2,2)
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>