In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
from torchvision.io import read_image
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


- Basics - backward, requires_grad_, Multivariate, reshape, view, min, max, argmax, argmin, class, cat, stack, squeeze, unsqueeze, np.expand_dims
- medium - changing dtype of tensors, handle boolean, any, all, detach, to numpy, from numpy, dim = -1, splitting train test, dataset, transform, transform compose, transform normalize with training data, convolve (numpy, torch)
- Advanced - relu, leakyrelu, variable lr - cosine, batchnorm, dropout, CNN, RNN, LSTM, customdataset, number of trainable weights, freezing some layers in CNN


- ==`main`
- `Pandas` - sep, header ignore, datetime, index set, etc.

### Basic Torch - Numpy primer

In [47]:
# tensor from list
a = [1,2,4,5]
print(torch.tensor(a))

a2 = [[1,2,4,5],[2,4,5,6]]
print(torch.tensor(a2))
tns_a2 = torch.tensor(a2)

# tensor from numpy array
a3 = np.array(a2)
print(a3)
print(torch.from_numpy(a3))

## numpy from tensor
print(torch.rand(2,3).numpy())

# tensor from another tensor
print(torch.ones_like(tns_a2))
print(torch.rand_like(tns_a2, dtype = torch.float))

# tensor from known shapes
shape = (2,3)
print(torch.zeros(shape))
print(torch.ones(shape))
print(torch.rand(shape))

print(torch.ones(3,3), '\n', torch.ones((3,3)))

# joining tensors
tensor1 = torch.rand(3,4)
tensor2 = torch.rand(3,4)
## cat
cat_0 = torch.cat([tensor1, tensor2], dim=0)
cat_1 = torch.cat([tensor1, tensor2], dim=1)
print(cat_0.shape, cat_1.shape)

## stack
stack_0 = torch.stack([tensor1, tensor2],dim=0)
stack_1 = torch.stack([tensor1, tensor2],dim=1)
stack_2 = torch.stack([tensor1, tensor2],dim=2)
stack__1 = torch.stack([tensor1, tensor2],dim=-1)
print(stack_0.shape, stack_1.shape, stack_2.shape, stack__1.shape)

print('\n----------------------- Arithmetic Operations -----------------------')
a = torch.rand(3,4)
b = a.T

# matrix multiplication
c = a.matmul(b); print(c.shape)
d = b.matmul(a); print(d.shape)
## or
e = torch.matmul(a,b); print(e.shape)

# element-wise multiplication
f = a * a; print(f.shape)

# sum along axis
print(a.sum())
print(a.sum(dim=0))
print(a.sum(dim=1))

# squeeze
a = torch.randn(3,4)
b = a.squeeze() # remove axis that has 1 in it
print(a.shape, b.shape)

a = torch.randn(64,1,28,28)
b = a.squeeze() # remove axis that has 1 in it
print(a.shape, b.shape)


tensor([1, 2, 4, 5])
tensor([[1, 2, 4, 5],
        [2, 4, 5, 6]])
[[1 2 4 5]
 [2 4 5 6]]
tensor([[1, 2, 4, 5],
        [2, 4, 5, 6]], dtype=torch.int32)
[[0.861423   0.64127576 0.5306234 ]
 [0.40121812 0.538812   0.4806022 ]]
tensor([[1, 1, 1, 1],
        [1, 1, 1, 1]])
tensor([[0.6963, 0.6340, 0.4430, 0.3859],
        [0.0474, 0.5547, 0.1294, 0.8793]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.1546, 0.0309, 0.1632],
        [0.1995, 0.7144, 0.1431]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) 
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
torch.Size([6, 4]) torch.Size([3, 8])
torch.Size([2, 3, 4]) torch.Size([3, 2, 4]) torch.Size([3, 4, 2]) torch.Size([3, 4, 2])

----------------------- Arithmetic Operations -----------------------
torch.Size([3, 3])
torch.Size([4, 4])
torch.Size([3, 3])
torch.Size([3, 4])
tensor(6.0451)
tensor([1.7931, 0.3510, 1.7563, 2.1447])
tensor([1.5683, 1.88

#### Primer on Autograd

In [71]:
w = torch.randn(5, 3, requires_grad = True)
b = torch.randn(3, requires_grad = True)
print('w: ', w)
print('b: ', b)

x = torch.ones(5)
y = torch.zeros(3)
print('w.grad: ', w.grad)
print('b.grad: ', b.grad)

z = torch.matmul(x, w) + b

loss_fn = nn.MSELoss()
loss = loss_fn(z, y)
print('loss: ', loss)

print('w: ', w)
print('b: ', b)
print('w.grad: ', w.grad)
print('b.grad: ', b.grad)


loss.backward()

print('w.grad: ', w.grad)
print('b.grad: ', b.grad)

w:  tensor([[ 0.1376,  0.5324,  0.3588],
        [-0.2198, -1.8693, -0.2780],
        [-0.5774,  0.1756, -1.3968],
        [ 0.3604,  0.7431, -0.4732],
        [-0.9053, -0.7781, -0.0913]], requires_grad=True)
b:  tensor([-1.6597, -0.1103,  0.5868], requires_grad=True)
w.grad:  None
b.grad:  None
loss:  tensor(3.8616, grad_fn=<MseLossBackward0>)
w:  tensor([[ 0.1376,  0.5324,  0.3588],
        [-0.2198, -1.8693, -0.2780],
        [-0.5774,  0.1756, -1.3968],
        [ 0.3604,  0.7431, -0.4732],
        [-0.9053, -0.7781, -0.0913]], requires_grad=True)
b:  tensor([-1.6597, -0.1103,  0.5868], requires_grad=True)
w.grad:  None
b.grad:  None
w.grad:  tensor([[-1.9095, -0.8712, -0.8624],
        [-1.9095, -0.8712, -0.8624],
        [-1.9095, -0.8712, -0.8624],
        [-1.9095, -0.8712, -0.8624],
        [-1.9095, -0.8712, -0.8624]])
b.grad:  tensor([-1.9095, -0.8712, -0.8624])


In [8]:
x = torch.tensor([4.])
x.requires_grad_()
y  = 4*x**2 + 3
y.backward()
print(x.grad)

# or

x = torch.tensor([4.], requires_grad = True)
y  = 4*x**2 + 3
y.backward()
print(x.grad)


# how can you freeze some selected layers of a model in Pytorch


tensor([32.])
tensor([32.])


#### High level Deep Learning Primer

In [None]:
train ds = Dataset(......, transform = ToTensor())  # list of tuples(x,y)
test_ds = Dataset(......, transform = ToTensor())

bs = 64

train_loader = DataLoader(train_ds, batch_size= bs)
test_loader = DataLoader(test_ds, batch_size= bs)

for X,y in test_loader:
    print(X.shape, y.shape)
    break
## or
X, y = next(iter(train_loader))
print(X.shape, y.shape)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 64),
            nn.ReLU(),
            nn.Linear(64,10)
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

loss_fn = nn.CrossEntropyLoss() # GT as class indices # Pred as logits
optimizer = optim.SGD(model.parameters(), lr = 1e-3)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader)
    model.train()
    train_loss, correct = 0, 0
    for b, (X,y) in enumerate(dataloader):
        X, y =  X.to(device), y.to(device)
        # get predictions
        pred = model(X)
        
        # compute loss and accuracy
        loss = loss_fn(pred, y)
        train_loss += loss.item()

        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        
        # backprop
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    # getting final metrics for epoch
    train_loss /= size
    correct /= size
    print(f'train loss: {train_loss}, train accuracy: {correct*100}')

def test(dataloader, model, loss_fn):
    size = len(dataloader)
    model.eval()
    test_loss, corr = 0, 0
    with torch.no_grad():
        for X,y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            loss = loss_fn(pred, y)
            test_loss += loss.item()
            correct = (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(test_loss, correct*100)

## We didn't return anything from training loop and test loops

epochs = 5
for epoch in range(epochs):
    print(f'epoch: {epoch+1}\n----------------------------------------')
    train(train_loader, model, loss_fn, optimizer)
    test(test_loader, model, loss_fn)

# Save and Load model - Method-1
## save model
torch.save(model.state_dict(), 'model.pth')
print('Model saved!')

## load model 
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load('model.pth'))

# Save and Load model - Method-2 (In this we don't need to instantiate the model while loading, unlike method-1)
## save model
torch.save(model, 'model.pth')
## load model
model = torch.load('model.pth')


# predictions
classes = []

model.eval()
x,y = test_ds[0][0], test_ds[0][1]
x = x.to(device)
pred_probab_dist = model(x)
pred_label = pred_probab_dist.argmax(0)#.type(torch.Long) and not float because list cannot be indexed using float
pred_label_name = classes[pred_label]
print(f'Actual- {classes[y]}, Prediction - {pred_label_name}')

        

#### writing a custom `Dataset`

In [None]:
# High level overview
class CustomImageDataset(Dataset):
    def __init__(self, datasources, transform=None, target_transform = None)
    def __len__(self):
        return len(self.img_labels)
    def __getitem__(self, idx):
        return image, label

# sample-1: Dataset for image classification for images in a directory and label details in a .csv file
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform = None, target_transform = None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        
    def __len__(self):
        return len(self.img_labels)
        
    def __getitem__(self, idx):
        im_pth = os.path.join(self.img_dir, self.img_labels.iloc[idx,0])
        image = read_image(im_pth)
        label = self.img_labels.iloc[idx,1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.transform(label)
        return image, label

# transform
transform = ToTensor()
target_transform = Lambda(lambda y: torch.zeros(10, dtype = torch.float).scatter_(dim=0, index=torch.tensor(y), value=1)) # we dont need it
# we can ignore target_transform. Dont use it. keep targets as class indices instead of one-hot encoding and use CEloss with model logits and class indices as gt.




In [None]:
# FC for binary classification
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 2) ######
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logit = self.linear_relu_stack(x)
        return logit

        

# FC for multiclass classification
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 10) ####
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logit = self.linear_relu_stack(x)
        return logit

# case-1 loss is CrossEntropyLoss
loss = nn.CrossEntropyLoss()
for b, (X,y) in enumerate(dataloader):
    X, y =  X.to(device), y.dtype(torch.long).to(device) # make sure y is long type and not float
    logits = model(X)
    probabs = nn.Softmax(dim = 1)(logits)
    pred_label = probabs.argmax(1)

# ANN for Regression 1 independent variable

class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 1) #### for predicting 1 value
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logit = self.linear_relu_stack(x)
        return logit


# FC for Regression 2 or more independent variable
        # - There are tons of ways to do it. 
        # - 1. Having n neurons in output layer
        # - 2. completely independent n separate NNs
        # - 3. common base and n separate heads (last few layers separate)
        

# CNN for binary classification

# CNN for multiclass classification



# RNN for binary classification

# RNN for multiclass classification

# RNN for Regression 

In [None]:

# Prebuilt-CNN for binary classification

# Prebuilt-CNN for multiclass classification

# class imbalance

# lr scheduler


#### working of loss functions

In [67]:
# 1. CrossEntropyLoss -  GT as class indices # Pred as logits
loss = nn.CrossEntropyLoss()
logits = torch.randn(32,10)
gt = torch.empty(32, dtype = torch.long).random_(10)
print(gt)
loss(logits, gt)

tensor([4, 9, 9, 8, 9, 4, 2, 8, 0, 0, 8, 5, 3, 6, 9, 3, 1, 4, 3, 3, 5, 6, 4, 8,
        4, 4, 8, 5, 3, 0, 9, 7])


tensor(2.4427)

In [68]:
nn.Softmax(dim = 1)(logits) # probability distribution

tensor([[0.3209, 0.0211, 0.0347, 0.1563, 0.0221, 0.1124, 0.1148, 0.2059, 0.0030,
         0.0089],
        [0.0292, 0.2299, 0.0497, 0.1478, 0.0716, 0.1460, 0.1361, 0.0801, 0.0152,
         0.0945],
        [0.2013, 0.0754, 0.2527, 0.0430, 0.0993, 0.1244, 0.0180, 0.1191, 0.0445,
         0.0225],
        [0.2758, 0.2679, 0.0712, 0.0696, 0.0071, 0.1234, 0.0409, 0.0088, 0.0497,
         0.0856],
        [0.0119, 0.0872, 0.0566, 0.0623, 0.0607, 0.2982, 0.0809, 0.0963, 0.1792,
         0.0666],
        [0.0626, 0.0527, 0.0171, 0.0458, 0.1325, 0.0415, 0.0316, 0.0607, 0.0787,
         0.4768],
        [0.0382, 0.0817, 0.1429, 0.0442, 0.0474, 0.3767, 0.0660, 0.1314, 0.0300,
         0.0415],
        [0.0846, 0.2117, 0.0170, 0.0404, 0.0749, 0.0110, 0.1018, 0.0585, 0.3610,
         0.0391],
        [0.2916, 0.2613, 0.1002, 0.0517, 0.0256, 0.0706, 0.0392, 0.0490, 0.0756,
         0.0351],
        [0.1848, 0.1889, 0.1394, 0.0589, 0.0787, 0.0136, 0.0249, 0.2205, 0.0436,
         0.0467],
        [0

In [29]:

class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 1) #### for predicting 1 value
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logit = self.linear_relu_stack(x)
        return logit

## how to freeze weights of certain layers?
model = NeuralNet()

num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('total params: ', num_trainable_params)

# Freeze the first linear layer
for param in model.linear_relu_stack[0].parameters():
    param.requires_grad = False

# Find the number of trainable parameters
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Number of trainable parameters: {num_trainable_params}')

total params:  418369
Number of trainable parameters: 16449


In [30]:
count=0
for p in model.parameters():
    print(p.flatten().shape[0])
    if p.requires_grad:
        count+=p.flatten().shape[0]

count

401408
512
16384
32
32
1


16449

##### using LR scheduler

In [None]:
# Define your model, loss function, and optimizer as before
model = NeuralNet()
loss_fn = nn.CrossEntropyLoss() # GT as class indices, Pred as logits
optimizer = optim.SGD(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Adjust step_size and gamma as needed

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def train(dataloader, model, loss_fn, optimizer, scheduler):
    size = len(dataloader.dataset)
    model.train()
    train_loss, correct = 0, 0
    for b, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # Get predictions
        pred = model(X)
        
        # Compute loss and accuracy
        loss = loss_fn(pred, y)
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        
        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    # Step the scheduler
    scheduler.step()
    
    # Getting final metrics for epoch
    train_loss /= size
    correct /= size
    print(f'Train loss: {train_loss}, Train accuracy: {correct * 100}%')

# Example usage:
# Assume `train_dataloader` is your DataLoader for the training dataset
for epoch in range(num_epochs):
    train(train_dataloader, model, loss_fn, optimizer, scheduler)


#### custom loss functions

In [34]:
def my_mse_loss1(output, target):
    loss = torch.mean((output - target)**2)
    return loss


model = nn.Linear(2,1)
x = torch.randn(3,2)
target = torch.randn(3,1)
output = model(x)

print('target: ', target,'\noutput', output)

loss = my_mse_loss1(output, target)
print('loss:', loss)
loss.backward()
print(model.weight.grad)

target:  tensor([[ 0.3408],
        [-2.3544],
        [-0.3219]]) 
output tensor([[-0.1327],
        [-0.5044],
        [-0.7863]], grad_fn=<AddmmBackward0>)
loss: tensor(1.2875, grad_fn=<MeanBackward0>)
tensor([[ 1.3618, -1.1399]])


In [35]:
class my_mse_loss2(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, output, target):
        self.output = output
        self.target = target
        loss = torch.mean((self.output - self.target)**2)
        return loss

In [37]:
def my_mse_loss1(output, target):
    loss = torch.mean((output - target)**2)
    return loss


model = nn.Linear(2,1)
x = torch.randn(3,2)
target = torch.randn(3,1)
output = model(x)

print('target: ', target,'\noutput', output)

loss_fn = my_mse_loss2()
loss = loss_fn(output, target)
print('loss:', loss)
loss.backward()
print(model.weight.grad)

target:  tensor([[-0.6287],
        [-0.6630],
        [ 1.3269]]) 
output tensor([[-0.0800],
        [ 0.2978],
        [ 0.3465]], grad_fn=<AddmmBackward0>)
loss: tensor(0.7285, grad_fn=<MeanBackward0>)
tensor([[0.3061, 0.5611]])
