### Pytorch importing & version checking

In [1]:
import torch

In [2]:
torch.__version__

'2.6.0+cu124'

In [3]:
torch.cuda.is_available()

True

### Understanding Tensors

In [4]:
tensor0d=torch.tensor(1)  # creates a zero-dimensional tensor
tensor1d=torch.tensor([1,2,3])  # creates a one-dimensional tensor
tensor2d=torch.tensor([[1,2],[3,4]])  # creates a two-dimensional tensor
tensor3d=torch.tensor([[[1,2],[3,4]], # creates a three-dimensional tensor
                      [[5,6],[7,8]]])



### tensor data types

In [5]:
print(tensor0d.dtype) # pytorch adopts the default 64-bit integer data type

torch.int64


In [6]:
tensor1dvec=torch.tensor([1.0,2.0,3.0]) ## pytorch adopts the default 32-bit precision if we create tensors from python floats
print(tensor1dvec.dtype)

## This choice is primarily due to the balance between precision and computational efficiency

torch.float32


**Moreover it is possible to chnage the precision using a tensor's .to method**

In [7]:
floatvec=tensor1d.to(torch.float32)
print(floatvec.dtype)

torch.float32


**common PyTorch tensor operation**

In [8]:
tensor2d=torch.tensor([[1,2,3],[4,5,6]])

In [9]:
print(tensor2d.shape)

torch.Size([2, 3])


**To reshape the tensor to 3*2 tensor , we can use .reshape method**

In [10]:
print(tensor2d.reshape(3,2))

tensor([[1, 2],
        [3, 4],
        [5, 6]])


**However to reshae the tensor there is one more command .view**

In [11]:
print(tensor2d.view(3,2))


tensor([[1, 2],
        [3, 4],
        [5, 6]])


**Transpose using .T**

In [12]:
print(tensor2d.T)

tensor([[1, 4],
        [2, 5],
        [3, 6]])


**tensor multiplication**

In [13]:
print(tensor2d.matmul(tensor2d.T))

tensor([[14, 32],
        [32, 77]])


**we can also adopt the @ operator**

In [14]:
print(tensor2d@tensor2d.T)

tensor([[14, 32],
        [32, 77]])


### Computational Graph
- **PyTorch also known as autograd which is automatic differentiation engine**
- **A  computational graph is a directed graph that allows us to express and visualize mathematical expressions**

### A logistic regression forward pass

In [15]:
import torch.nn.functional as F # This import statement is a common convention in PyTorch to prevent long lines of code
y=torch.tensor([1.0]) # True Label
x1=torch.tensor([1.1]) # Input feature
w1=torch.tensor([2.2]) #Weight paramter
b=torch.tensor([0.0])  # Bias Unit
z=x1*w1+b #Net input
a=torch.sigmoid(z) #activation and output
loss=F.binary_cross_entropy(a,y)
print(loss)

tensor(0.0852)


**PyTorch builds computational graph in the background , and we can use this to calculate gradients of a loss function with respect to model parameters (here w1,b) to train the models**

In [17]:
import torch.nn.functional as F
from torch.autograd import grad
y=torch.tensor([1.0]) # True Label
x1=torch.tensor([1.1]) # Input feature
w1=torch.tensor([2.2],requires_grad=True) #Weight paramter
b=torch.tensor([0.0],requires_grad=True)  # Bias Unit
z=x1*w1+b #Net input
a=torch.sigmoid(z) #activation and output
loss=F.binary_cross_entropy(a,y)
grad_l_w1=grad(loss,w1,retain_graph=True)
grad_l_b=grad(loss,b,retain_graph=True)
print(grad_l_w1)
print(grad_l_b)


(tensor([-0.0898]),)
(tensor([-0.0817]),)


**another automated way to get gradients**

In [18]:
loss.backward()
print(w1.grad)
print(b.grad)

tensor([-0.0898])
tensor([-0.0817])


### Implementing Multilayer neural networks

- **When implementing a neural network in PyTorch , we can subclass the torch.nn.Module class to define our own custon network architecture**
- **Within this sublass, we define the network layers in the __init__ constructir and specify how the layers interact in the forward method**

### Multilayer perceptron with two hidden layers

In [22]:
class NeuralNetwork(torch.nn.Module):   # coding the number of inputs and outputs as variables allows us to reuse the same code for datasets with different numbers of features and classes
  def __init__(self,num_inputs,num_outputs):
    super().__init__()
    self.layers=torch.nn.Sequential(
        #first hidden layer
        torch.nn.Linear(num_inputs,30), # The Linear Layer takes the number of input and output nodes as arguments
        torch.nn.ReLU(),  # Non linear activation functions are placed between the hidden layers
        # second hidden layer
        torch.nn.Linear(30,20), # the number of output nodes of one layer has to match the number of inputs of the next layer
        torch.nn.ReLU(),
        #output layer
        torch.nn.Linear(20,num_outputs),
    )
  def forward(self,x):
    logits=self.layers(x)  # The outputs of the last layer are called logits
    return logits


In [23]:
model=NeuralNetwork(50,3)

In [24]:
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


- **The sequentical class makes our life easier if we have series of layers to execute in a specific order**


In [27]:
num_params=sum(p.numel() for p in model.parameters() if p.requires_grad)
print("The total number of trainable model parameters:",num_params)

The total number of trainable model parameters: 2213


- **For each parameter whoch requires_grad=True counts as a trainable parameter**
- **In the case of our neural netwrork model with the preceding two hidden layers, these trainable parameters are contained in torch.nn.Linear layers. A linear layer multiplies the weight matrix and adds a bias vector. This is sometimes referred to as a *feedforward* or fully connected layer**

In [28]:
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0298, -0.0778,  0.0777,  ..., -0.0287,  0.0443,  0.0369],
        [ 0.1116, -0.1088,  0.0920,  ...,  0.0024,  0.0763, -0.0471],
        [ 0.1391, -0.1166,  0.1143,  ...,  0.1185, -0.0386,  0.1252],
        ...,
        [ 0.1286,  0.1003,  0.0460,  ...,  0.0189, -0.0264, -0.0798],
        [-0.1259, -0.1143, -0.0968,  ..., -0.1045, -0.1215,  0.0523],
        [-0.0150,  0.1344,  0.0165,  ..., -0.0302, -0.1395,  0.0287]],
       requires_grad=True)


In [29]:
print(model.layers[0].weight.shape)

torch.Size([30, 50])


**The default setting in torch.nn.Linear is requires_grad=True**

In [30]:
torch.manual_seed(123)
X=torch.rand((1,50))
out=model(X)
print(out)

tensor([[-0.0425, -0.2723,  0.0181]], grad_fn=<AddmmBackward0>)


- **In the preceding code, we generated a single random training example x as a toy input and fed it to the model, returning three scores. When we call model(X), it will automatically execute the forward pass of the model.**
- **The forward pass refers to calculating output tensors from input tensors. This involves passing the input data through all the neural network layers, starting from input layer, through hidden layers, and finally to the output layer.**
-**Output tensor also includes a grad_fn value <AddmmBackward0>.In Addmm operation, Addmm stands for matrixmultplication(mm) followed by an addition(Add).**
- **If we want to use network for just predictions without training or backpropagation then it is best practice to use torch.no_grad() context manager. This tells PyTorch no need to keep track of gradients which can result in significant savings in memory and computation**

In [31]:
with torch.no_grad():
  out=model(X)
print(out)

tensor([[-0.0425, -0.2723,  0.0181]])


- **In PyTorch, they return outputs of last layer without passing them to nonlinear activation function**
- **To compute class membership probabilities , we haveto call the softmax function (Sigmoid for binary class)**

In [32]:
with torch.no_grad():
  out=torch.softmax(model(X),dim=1)
print(out)

tensor([[0.3500, 0.2782, 0.3719]])


- **The values can now be interpreted as class-membership probabilities that sum upto 1. The values are roughly equal for this random input, which is expected for a randomly initialized model without training.**

### Data Loaders

- **PyTorch implements Dataset and DataLoader class.The Dataset class is used to instantiate objects that define how each data record is loaded. The DataLoader handles how the data is shuffled and assembled into batches**

In [34]:
## Creating a small toy dataset
X_train=torch.tensor([[-1.2,3.1],
                      [-0.9,1.5],
                      [-0.3,2.3],
                      [2.3,-1.1],
                      [2.7,-1.5]
                      ])
y_train=torch.tensor([0,0,0,1,1])
X_test=torch.tensor([
    [-0.8,2.1],
    [2.6,1.5]
])
y_test=torch.tensor([0,1])

In [36]:
## Defining a custom Dataset class
from torch.utils.data import Dataset

class ToyDataset(Dataset):
  def __init__(self,X,y):
    self.features=X
    self.labels=y

  def __getitem__(self,index):
    one_x=self.features[index]            # Instructions for retrieving exactly one data record and the corresponding label
    one_y=self.labels[index]
    return one_x,one_y

  def __len__(self):
    return self.labels.shape[0]           # Instructions for returning the total length of the dataset

train_ds=ToyDataset(X_train,y_train)
test_ds=ToyDataset(X_test,y_test)

- ***The purpose of this ToyDataset class is to instantiate a PyTorch DataLoader**
-**__init__ method we set up attributes that we want to access in __getitem__ and __len__ methods.**

In [37]:
## Instantiating data loaders
from torch.utils.data import DataLoader
torch.manual_seed(123)

train_loader=DataLoader(
  dataset=train_ds,
  batch_size=2,              # The ToyDataset instance created earlier serves as input to the dataloader
  shuffle=True,
  num_workers=0  # The number of background processes
)
test_loader=DataLoader(
  dataset=test_ds,
  batch_size=2,
  shuffle=False,
  num_workers=0
)



In [38]:
for idx, (X,y) in enumerate(train_loader):
  print(F"Batch {idx+1}:",X,y)

Batch 1: tensor([[ 2.3000, -1.1000],
        [-0.9000,  1.5000]]) tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.3000,  2.3000]]) tensor([0, 0])
Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])


- **From the preceding output teh train_loader iterates over the training dataset, visiting each training example exactly once. This is known as training epoch.**
- **As we have 5training examples we got three batches even though we put batch=2 as 5 is not evenly divisible by 2**
- **In practice, having a substantially smaller batch as the last batch in a training epoch can disturb the convergence during training. To prevent this, set drop_last=True which will drop the last batch in each epoch**

In [39]:
# A training loader that drops the last batch
train_loader=DataLoader(
  dataset=train_ds,
  batch_size=2,              # The ToyDataset instance created earlier serves as input to the dataloader
  shuffle=True,
  num_workers=0, # The number of background processes
  drop_last=True
)

In [40]:
for idx, (X,y) in enumerate(train_loader):
  print(F"Batch {idx+1}:",X,y)

Batch 1: tensor([[-1.2000,  3.1000],
        [-0.3000,  2.3000]]) tensor([0, 0])
Batch 2: tensor([[ 2.3000, -1.1000],
        [-0.9000,  1.5000]]) tensor([1, 0])


- **num_workers parameter in PyTorch's DataLoader function is crucial for parallelizing data loading and preprocessing**
- **when num_workers is set to 0, the data loading will be done in the main process and not in the sepaarte worker processes**
- **setting this to 0 for largeer networks lead to significant slowdowns during model training on GPU.**


### A typical training loop

In [44]:
import torch.nn.functional as F
torch.manual_seed(123)
model=NeuralNetwork(num_inputs=2,num_outputs=2) # The datset has 2 features and 2 clasess
optimizer=torch.optim.SGD(
  model.parameters(),lr=0.5
)
num_epochs=3
for epoch in range(num_epochs):
  model.train()

  for batch_idx,(features,labels) in enumerate(train_loader):
    logits=model(features)
    loss=F.cross_entropy(logits,labels)
    optimizer.zero_grad()  # sets the gradients from the previous round to 0 to prevent unitended gradient accumulation
    loss.backward()
    optimizer.step() # The optimizer uses the gradients to update the model parameters
    ### LOGGING
    print(f"Epoch:{epoch+1:03d}/{num_epochs:03d}"
          f" | Batch{batch_idx:03d}/{len(train_loader):03d}"
          f" | Train Loss:{loss:.2f}")
model.eval()

Epoch:001/003 | Batch000/002 | Train Loss:0.78
Epoch:001/003 | Batch001/002 | Train Loss:0.63
Epoch:002/003 | Batch000/002 | Train Loss:0.49
Epoch:002/003 | Batch001/002 | Train Loss:0.27
Epoch:003/003 | Batch000/002 | Train Loss:0.04
Epoch:003/003 | Batch001/002 | Train Loss:0.01


NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=2, bias=True)
  )
)

**As we can see loss almost reaches 0 after 3 epochs , a sign that the model converged on the training set**.

In [45]:
num_params=sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:",num_params)

Total number of trainable model parameters: 752


In [46]:
# Predictions
model.eval()
with torch.no_grad():
  outputs=model(X_train)
print(outputs)

tensor([[ 3.2642, -4.6650],
        [ 1.8481, -2.6799],
        [ 2.1177, -3.1727],
        [-1.3868,  1.3825],
        [-1.6208,  1.6361]])


In [47]:
#To obtain the class membership probabilities , apply PyTorch's softmax function
torch.set_printoptions(sci_mode=False)
probas=torch.softmax(outputs,dim=1)
print(probas)

tensor([[    0.9996,     0.0004],
        [    0.9893,     0.0107],
        [    0.9950,     0.0050],
        [    0.0590,     0.9410],
        [    0.0371,     0.9629]])


In [48]:
# We can convert these value sinto class label predictions uisng PyTorch's argmax function
predictions=torch.argmax(probas,dim=1)
print(predictions)

tensor([0, 0, 0, 1, 1])


### Prediction accuracy

In [51]:
from functools import total_ordering
def compute_accuracy(model,dataloader):
  model=model.eval()
  correct=0.0
  total_examples=0

  for idx,(features,labels) in enumerate(dataloader):
    with torch.no_grad():
      logits=model(features)
    predictions=torch.argmax(logits,dim=1)
    compare=labels==predictions
    correct+=torch.sum(compare)
    total_examples+=len(compare)
  return (correct/total_examples).item()

In [52]:
print(compute_accuracy(model,train_loader))

1.0


In [53]:
print(compute_accuracy(model,test_loader))

0.5


### Saving and Loading models

In [54]:
torch.save(model.state_dict(),"model.pth")

**The model's state_dict is a python dictionary object that maps each layer in the model to its trainable paramters(weights and biases)**

In [55]:
model=NeuralNetwork(2,2)
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>