<a href="https://colab.research.google.com/github/thebhulawat/DPO/blob/main/MNIST_Lora_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
_ = torch.manual_seed(42)

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size = 10, shuffle = True)
mnist_testset = datasets.MNIST(root='./data', train = False, download = True, transform = True)
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size = 10, shuffle=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
class Net(nn.Module):
  def __init__(self, hidden_size_1 = 1000, hidden_size_2 = 2000):
    super(Net, self).__init__()
    self.linear1 = nn.Linear(28*28, hidden_size_1)
    self.linear2 = nn.Linear(hidden_size_1, hidden_size_2)
    self.linear3 = nn.Linear(hidden_size_2, 10)
    self.relu = nn.ReLU()

  def forward(self, img):
    x = img.view(-1, 28* 28)
    x = self.relu(self.linear1(x))
    x = self.relu(self.linear2(x))
    x = self.linear3(x)
    return x

net = Net().to(device)

In [None]:
def train(train_loader, net, epochs = 5, total_iterations_limit = None):
  cross_el = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
  total_iterations = 0
  for epoch in range(epochs):
    net.train()
    loss_sum = 0
    num_iterations = 0
    data_iterator = tqdm(train_loader, desc = f'Epoch {epoch + 1}')
    if total_iterations_limit is not None:
      data_iterator.total = total_iterations_limit
    for data in data_iterator:
      num_iterations += 1
      total_iterations += 1
      x,y = data
      x = x.to(device)
      y = y.to(device)
      optimizer.zero_grad()
      output = net(x.view(-1, 28*28))
      loss = cross_el(output, y)
      loss_sum += loss.item()
      avg_loss = loss_sum / num_iterations
      data_iterator.set_postfix(loss = avg_loss)
      loss.backward()
      optimizer.step()
      if total_iterations_limit is not None and total_iterations >= total_iterations_limit:
        return

train(train_loader, net, epochs = 1)


Epoch 1: 100%|██████████| 6000/6000 [00:45<00:00, 132.42it/s, loss=0.238]


In [None]:
original_weights = {}
for name, param in net.named_parameters():
  original_weights[name]= param.clone().detach()

for name, weight in list(original_weights.items())[:10]:  # Show the first 3 weights
    print(f"Layer: {name}, Weight Sample: {weight[:2]}")

Layer: linear1.weight, Weight Sample: tensor([[0.0465, 0.0313, 0.0355,  ..., 0.0257, 0.0197, 0.0605],
        [0.0298, 0.0224, 0.0726,  ..., 0.0790, 0.0660, 0.0112]],
       device='cuda:0')
Layer: linear1.bias, Weight Sample: tensor([-0.0423, -0.0587], device='cuda:0')
Layer: linear2.weight, Weight Sample: tensor([[-0.0276, -0.0600, -0.0258,  ..., -0.0189,  0.0199, -0.0524],
        [ 0.0065, -0.0807,  0.0214,  ...,  0.0205, -0.0339,  0.0217]],
       device='cuda:0')
Layer: linear2.bias, Weight Sample: tensor([-0.0371,  0.0244], device='cuda:0')
Layer: linear3.weight, Weight Sample: tensor([[-0.1084,  0.0051, -0.0878,  ..., -0.0393,  0.0234, -0.0323],
        [ 0.0933,  0.0217,  0.0515,  ...,  0.0249, -0.0814, -0.0472]],
       device='cuda:0')
Layer: linear3.bias, Weight Sample: tensor([-0.0137, -0.0638], device='cuda:0')


In [None]:
def test():
  correct = 0
  total = 0
  wrong_counts = [0 for in range(10)]

  with torch.no_grad():
    for data in tqdm(test_laoder, desc= "trainin")