In [None]:
!pip uninstall -y torch
!pip install torch torchvision


Found existing installation: torch 2.5.1+cu124
Uninstalling torch-2.5.1+cu124:
  Successfully uninstalled torch-2.5.1+cu124
Collecting torch
  Downloading torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuf

In [None]:
import torch
from torchvision import transforms
import matplotlib.pyplot as plt
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transformation = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transforms.Compose([transforms.RandomCrop(size=32, padding=4),
                                                                                    transforms.RandomHorizontalFlip(), transformation]))

train_dataset_fast = CIFAR10(root='./data', train=True, download=True,transform=transformation)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transformation)

train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True, num_workers=2)
train_loader_fast = DataLoader(train_dataset_fast, batch_size=1000, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 43.4MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        self.model = nn.Sequential(
          nn.Conv2d(3, 5, 5),
          nn.ReLU(),
          nn.MaxPool2d(2, 2),
          nn.Conv2d(5, 5, 5),
          nn.ReLU(),
          nn.MaxPool2d(2, 2),
          nn.Flatten(),
          nn.Linear(125, 30),
          nn.ReLU(),
          nn.Linear(30, 10),
          )
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
        self.loss = nn.CrossEntropyLoss()
        self.scheduler = StepLR(self.optimizer, step_size=4, gamma=0.1)



    def forward(self, x):

       return self.model(x)



In [None]:
def plot_results(epochs, acc, lss):

  fig, ax1 = plt.subplots()
  plt.plot(range(1, epochs + 1), lss, marker='o', color='tab:blue', label='Loss')
  ax1.tick_params(axis='y', labelcolor='tab:blue')

  # Create a second y-axis to plot accuracy
  ax2 = ax1.twinx()
  ax2.set_ylabel('Accuracy (%)', color='tab:orange')
  ax2.plot(range(1, epochs + 1), acc, marker='x', color='tab:orange', label='Accuracy')
  ax2.tick_params(axis='y', labelcolor='tab:orange')

  fig.tight_layout()
  plt.title('Epoch vs Loss and Accuracy')

  plt.show()


In [None]:
def train_model(model, epochs, trainloader, testloader , device):

  losses = []
  accuracies = []
  model.to(device)
  for epoch in range(epochs):
    total_loss = 0

    right,total  = 0, 0

    train_total_loss = 0

    train_right,train_total  = 0, 0

    model.optimizer.zero_grad()
    model.train()
    print("STARTING LOADER" + "="*70)
    for X_batch, y_batch in trainloader:
      #print("TEAT: " + '-'*80)


      # print(X_batch.shape)

      outputs = model.forward(X_batch.to(device))
      #print("OUTPUT: " '-'*80)
      #print(outputs[0])

      #print(outputs[0])
      # print("CALCULATING LOSS")




      # print(f"exp: {y_batch.shape}.   , actual: {outputs.shape}")
      # print(f"type: {type(y_batch[0])},       type: {type(outputs[0])}")

      # print(f"type: {type(y_batch)},       type: {type(outputs)}")

      # print(outputs[0])


      loss = model.loss(outputs.to(device), y_batch.to(device))

      pred = torch.argmax(outputs, dim=1)

      train_right += (pred == y_batch.to(device)).sum()
      train_total += len(y_batch)


      train_total_loss += loss

      #print("GOING BACKWARDS")
      loss.backward()
      model.optimizer.step()
      model.optimizer.zero_grad()
      #print("BACKWARD DONE")
    #model.scheduler.step()
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_total_loss}, Train Accuracy: {train_right/train_total}")

    model.eval()
    with torch.no_grad():
      print("STARTING TEST LOADER")
      for X_batch_test, y_batch_test in testloader:


        outputs_test = model.forward(X_batch_test.to(device))


        pred = torch.argmax(outputs_test, dim=1)
        loss = model.loss(outputs_test, y_batch_test.to(device))

        right += (pred == y_batch_test.to(device)).sum()
        total += len(y_batch_test)

        total_loss += loss

    losses.append(loss.to(torch.device('cpu')).detach().numpy())
    accuracies.append((right/total).to(torch.device('cpu')).detach().numpy())


    print(f"Epoch {epoch+1}/{epochs}, Test Loss: {total_loss}, Test Accuracy: {right/total}")

  print(type(loss))

  plot_results(epochs, accuracies, losses)
  return losses, accuracies


In [None]:
def train_modeltwo(model, epochs, trainloader, testloader , device):

  losses = []
  accuracies = []
  model.to(device)
  for epoch in range(epochs):
    total_loss = 0

    right,total  = 0, 0

    train_total_loss = 0

    train_right,train_total  = 0, 0

    model.optimizer.zero_grad()
    model.train()
    print("STARTING LOADER" + "="*70)
    for X_batch, y_batch in trainloader:
      #print("TEAT: " + '-'*80)


      # print(X_batch.shape)

      outputs = model.forward(X_batch.to(device))
      #print("OUTPUT: " '-'*80)
      #print(outputs[0])

      #print(outputs[0])
      # print("CALCULATING LOSS")




      # print(f"exp: {y_batch.shape}.   , actual: {outputs.shape}")
      # print(f"type: {type(y_batch[0])},       type: {type(outputs[0])}")

      # print(f"type: {type(y_batch)},       type: {type(outputs)}")

      # print(outputs[0])


      loss = model.loss(outputs.to(device), y_batch.to(device))

      pred = torch.argmax(outputs, dim=1)

      train_right += (pred == y_batch.to(device)).sum()
      train_total += len(y_batch)


      train_total_loss += loss

      #print("GOING BACKWARDS")
      loss.backward()
      model.optimizer.step()
      model.optimizer.zero_grad()
      #print("BACKWARD DONE")
    #model.scheduler.step()
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_total_loss}, Train Accuracy: {train_right/train_total}")




  return model


In [None]:
md = ConvNet()
new_md = train_modeltwo(md, 2, train_loader, test_loader, device)

Epoch 1/2, Train Loss: 950.27001953125, Train Accuracy: 0.28459998965263367
Epoch 2/2, Train Loss: 820.862060546875, Train Accuracy: 0.3854199945926666


In [None]:

new_md.eval()



for X_fast, y_fast in train_loader_fast:
  loss = new_md.loss(new_md(X_fast.to(device)), y_fast.to(device))


  g = torch.autograd.grad(loss, new_md.parameters(), create_graph=True)
  break

In [None]:
g

In [None]:
# for grad in g:
#   flt = grad.flatten()
#   for flt_item in flt:
#     hes = torch.autograd.grad(flt_item, new_md.parameters(), retain_graph=True)
#     print(f"hessian shape: {hes}")

flattened_g = torch.cat([grad.view(-1) for grad in g])
print(flattened_g.shape)

torch.Size([5100])


In [None]:
hessian = []

for g in flattened_g:
        hessian_row = torch.autograd.grad(g, new_md.parameters(), retain_graph=True)
        hessian.append(torch.cat([hr.reshape(-1) for hr in hessian_row]))

hessian_matrix = torch.stack(hessian)

In [None]:

eigenvalues, _ = torch.linalg.eig(hessian_matrix)

In [None]:
eigenvalues = eigenvalues.real

# Get 5 smallest and 5 largest eigenvalues
eigenvalues = torch.sort(eigenvalues).values

In [None]:
eigenvalues.cpu().detach()

tensor([-0.9763, -0.9377, -0.8863,  ..., 25.2600, 32.3954, 50.5742])

In [None]:
print(f"five largest eignevalues: {eigenvalues[-5:].cpu().detach().tolist()}")

five largest eignevalues: [14.031505584716797, 17.89615821838379, 25.25998878479004, 32.3953857421875, 50.57416915893555]


In [None]:
print(f"five smallests eignevalues: {eigenvalues[:5].cpu().detach().tolist()}")

five smallests eignevalues: [-0.9762833714485168, -0.9376521110534668, -0.886317789554596, -0.8202689290046692, -0.7870007753372192]


In [None]:
def power_method(model, loader, iterations=50, beta=0.9, lss = None):
    model.eval()
    images, labels = next(iter(loader))
    images, labels = images.to(device), labels.to(device)

    v = torch.randn(sum(p.numel() for p in model.parameters())).to(device)  # Random init
    v /= torch.norm(v)

    for i in range(iterations):
        loss = lss(model(images), labels) if lss is not None  else model.loss(model(images), labels)
        grads = torch.autograd.grad(loss, model.parameters(), create_graph=True)
        grads = torch.cat([g.view(-1) for g in grads])

        u = torch.autograd.grad(grads @ v, model.parameters(), retain_graph=True)
        u = torch.cat([ui.reshape(-1) for ui in u])

        u_norm = torch.norm(u)
        v = beta * v + (1 - beta) * u / u_norm
        v /= torch.norm(v)

        eigenvalue = (v @ u).item()
        #print(f"Iteration {i+1}: Eigenvalue Estimate = {eigenvalue}")

    return eigenvalue

largest_eigenvalue = power_method(new_md, train_loader_fast)
print("Largest Eigenvalue from Power Method:", largest_eigenvalue)


Largest Eigenvalue from Power Method: 47.05564880371094


In [None]:
from torchvision.models import resnet50
import torch.optim as optim


resnet_model = resnet50().to(device)
optimizer = optim.SGD(resnet_model.parameters(), lr=0.01, momentum=0.9)

criterion = nn.CrossEntropyLoss()


before_iter = power_method(resnet_model, train_loader_fast, lss = criterion)
print("Eigenvalue Before Training:", before_iter)


for epoch in range(10):
    resnet_model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Compute largest eigenvalue after 10 epochs
after_10 = power_method(resnet_model, train_loader_fast, lss = criterion)
print("Eigenvalue After 10 Epochs:", after_10)

# Continue training for 90 more epochs (total 100)
for epoch in range(90):
    resnet_model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

after_100 = power_method(resnet_model, train_loader_fast, lss = criterion)

# Compute largest eigenvalue after 100 epochs
print("Eigenvalue After 100 Epochs:", after_100)

Eigenvalue Before Training: 5312.294921875
Eigenvalue After 10 Epochs: 151.6533203125
Eigenvalue After 100 Epochs: 254.95614624023438
