In [65]:
#!pip install pyhessian
#!pip install pytorchcv

import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from pyhessian import hessian
import numpy as np

from pytorchcv.model_provider import get_model as ptcv_get_model # model



In [53]:
# 예제를 위한 간단한 CNN 모델 정의
class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 16, 3, 1, padding=1)
        self.conv2 = torch.nn.Conv2d(16, 32, 3, 1, padding=1)
        self.fc1 = torch.nn.Linear(32 * 8 * 8, 128)
        self.fc2 = torch.nn.Linear(128, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.conv1(x))
        x = torch.nn.functional.max_pool2d(x, 2, 2)
        x = torch.nn.functional.relu(self.conv2(x))
        x = torch.nn.functional.max_pool2d(x, 2, 2)
        x = x.view(-1, 32 * 8 * 8)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# CIFAR-10 데이터셋 불러오기
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

Files already downloaded and verified


In [58]:
# 모델 생성 및 학습
model = SimpleCNN()
model = model.cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(2):  # 학습 횟수는 임의로 설정
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 200 == 199:    # print every 200 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))

[1,   200] loss: 2.289
[1,   400] loss: 4.460
[1,   600] loss: 6.453
[1,   800] loss: 8.330
[1,  1000] loss: 10.165
[1,  1200] loss: 11.951
[1,  1400] loss: 13.701
[2,   200] loss: 1.642
[2,   400] loss: 3.271
[2,   600] loss: 4.826
[2,   800] loss: 6.364
[2,  1000] loss: 7.881
[2,  1200] loss: 9.360
[2,  1400] loss: 10.800


In [59]:
it = iter(trainloader)
first = next(it)
second = next(it)

hessian_comp = hessian(model, criterion, data=first, cuda=True)
top_eigenvalues, top_eigenvector = hessian_comp.eigenvalues()

In [60]:
# This is a simple function, that will allow us to perturb the model paramters and get the result
def get_params(model_orig,  model_perb, direction, alpha):
    for m_orig, m_perb, d in zip(model_orig.parameters(), model_perb.parameters(), direction):
        m_perb.data = m_orig.data + alpha * d
    return model_perb

In [66]:
# lambda is a small scalar that we use to perturb the model parameters along the eigenvectors 
lams = np.linspace(-0.5, 0.5, 21).astype(np.float32)

loss_list = []

# create a copy of the model
model_perb = ptcv_get_model("resnet20_cifar10", pretrained=True)
model_perb.eval()
model_perb = model_perb.cuda()

for lam in lams:
    model_perb = get_params(model, model_perb, top_eigenvector[0], lam)
    loss_list.append(criterion(model_perb(inputs), targets).item())

plt.plot(lams, loss_list)
plt.ylabel('Loss')
plt.xlabel('Perturbation')
plt.title('Loss landscape perturbed based on top Hessian eigenvector')

Downloading C:\Users\JM\.torch\models\resnet20_cifar10-0597-9b0024ac.pth.zip from https://github.com/osmr/imgclsmob/releases/download/v0.0.163/resnet20_cifar10-0597-9b0024ac.pth.zip...


RuntimeError: bias should contain 16 elements not 4608