In [1]:
from matplotlib import pyplot as plt
import matplotlib.colors as colors
%matplotlib inline

import torch
from dqn_arch.model import *
import torchvision
from torchvision import datasets, transforms
import numpy as np
import torch.nn as nn
import copy
from itertools import product

device = 'cuda'

# CIFAR10

In [7]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

randomed_transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='~/data/cifar10', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, num_workers=2)


testset = torchvision.datasets.CIFAR10(root='~/data/cifar10', train=True, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [6]:
testloader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/home/leesy714/data/mnist/', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),batch_size=4096, shuffle=True)
trainloader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/home/leesy714/data/mnist/', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),batch_size=128, shuffle=True)

In [8]:
def test(net, loader):
    net.eval()
    correct = 0
    total =  0
    test_loss = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = loss_function(outputs,targets)
            test_loss.append(loss.item())
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            
    acc = 100.*correct/total
    test_loss_mean = np.mean(test_loss)
    test_loss_std = np.std(test_loss)
    return test_loss_mean, test_loss_std, acc

In [9]:
loss_function = nn.CrossEntropyLoss()

In [10]:
def eval_plot_2d_landscape(net, nettype, loader, basis, r=1.0, scale=11,title='Title'):
    b1,b2 = basis
    x = np.linspace(-r, r, scale)
    y = np.linspace(-r, r,scale)
    nx,ny = len(x), len(y)
    loss_map = np.zeros((nx,ny))
    acc_map = np.zeros((nx,ny))
    std_map = np.zeros((nx,ny))

    tnet = nettype
    tnet.load_state_dict(net.state_dict())
    origin = tnet.get_weight_vector()    

    for i,j in product(range(nx),range(ny)):
        vec = origin + b1 * x[i] + b2 * y[j]
        tnet.set_weight_vector(vec)
        loss, std, acc = test(tnet, loader)
        loss_map[i, j] = loss
        acc_map[i, j] = acc
        std_map[i, j] = std
        print('\r',i, j, x[i], y[j], acc, loss,end='')
    print()
    X,Y = np.meshgrid(x,y)

    
    fig, axes = plt.subplots(1,2, figsize=(15,10))

    cs = axes[0].contour(X,Y,loss_map, levels=np.logspace(np.log(loss_map.min()), np.log(loss_map.max()),15,base=np.e))
    plt.clabel(cs, inline=1, fontsize=10)
    axes[0].set_title('Loss')
    cs = axes[1].contour(X,Y,std_map, levels=np.logspace(np.log(std_map.min()), np.log(std_map.max()),15,base=np.e))
    plt.clabel(cs, inline=1, fontsize=10)
    axes[1].set_title('Std')
    fig.suptitle(title)

    return loss_map, acc_map, std_map

In [11]:
net = Model(in_size=(3,32,32))
#net = Model(in_size=(1,28,28))
#checkpoint = torch.load('./dqn_arch/checkpoint/data_MNIST_optim_sgd_lr_0.01_wd_0.0005_batch-size_128_seed_2018/best.t7')
checkpoint = torch.load('./dqn_arch/checkpoint/data_CIFAR10_optim_sgd_lr_0.01_wd_0.0005_batch-size_128_seed_2018/best.t7')
net.load_state_dict(checkpoint['net'])    
net.to(device)

Model(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (dense): Linear(in_features=1600, out_features=512, bias=False)
  (out): Linear(in_features=512, out_features=10, bias=True)
)

In [12]:
#net = Model(in_size=(3,32,32))
init_net = Model(in_size=(3,32,32))

init_net.to(device)

Model(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (dense): Linear(in_features=1600, out_features=512, bias=False)
  (out): Linear(in_features=512, out_features=10, bias=True)
)

In [13]:
zero_vector = np.zeros(net.get_weight_vector().shape)
zero_vector.shape


(880490,)

In [14]:
print('Train loss mean: {:.6f} std: {:.6f} acc:{:.2f}'.format(*test(init_net, trainloader)))
#print('Fixed Train loss mean: {:.6f} std: {:.6f} acc:{:.2f}'.format(*test(net, fixed_trainloader)))
print('Test loss mean: {:.6f} std: {:.6f} acc:{:.2f}'.format(*test(init_net, testloader)))

Train loss mean: 2.303066 std: 0.003631 acc:9.98
Test loss mean: 2.303061 std: 0.000993 acc:10.14


In [15]:
print('Train loss mean: {:.6f} std: {:.6f} acc:{:.2f}'.format(*test(net, trainloader)))
#print('Fixed Train loss mean: {:.6f} std: {:.6f} acc:{:.2f}'.format(*test(net, fixed_trainloader)))
print('Test loss mean: {:.6f} std: {:.6f} acc:{:.2f}'.format(*test(net, testloader)))

Train loss mean: 0.375391 std: 0.078059 acc:87.00
Test loss mean: 0.293495 std: 0.016658 acc:90.32


In [16]:
bb = net.get_weight_vector()- init_net.get_weight_vector()
while True:
    r = np.random.normal(loc = zero_vector, scale=1)
    r = r / np.linalg.norm(r)
    if np.dot(bb,r) <=1e-100000000:
        break
r=r*np.linalg.norm(bb)/2
b1 = bb / 2 + r
print(np.linalg.norm(bb))
b2 = bb-b1


#print(init_net.get_weight_vector())

tnet = copy.deepcopy(net)
tnet.set_weight_vector(net.get_weight_vector())
origin = tnet.get_weight_vector()   
#print(origin+b1+b2)
b1_norm, b2_norm = np.linalg.norm(b1), np.linalg.norm(b2)
print(b1_norm, b2_norm)
b1=b1/b1_norm 
b2 =b2/b2_norm

28.324614
20.024901370377798 20.032206688347795


In [None]:
rx = b1_norm
ry = b2_norm
scale=21
x = np.linspace(-rx * 0.5, rx*2, scale)
y = np.linspace(-ry * 0.5, ry*2, scale)

nx,ny = len(x), len(y)
loss_map = np.zeros((nx,ny))
acc_map = np.zeros((nx,ny))

tnet = copy.deepcopy(net)
#tnet.set_weight_vector(net.get_weight_vector())
origin = init_net.get_weight_vector()

for i,j in product(range(nx),range(ny)):
    vec = origin + b1 * x[i] + b2 * y[j]
    tnet.set_weight_vector(vec)
    loss, std, acc = test(tnet, testloader)
    loss_map[i, j] = loss
    acc_map[i, j] = acc
    print('\r{} {}    {:.2f} {:.2f}    {:.2f}  {:.4f}'.format(i, j, x[i], y[j], acc, loss),end='')
print()
X,Y = np.meshgrid(x,y)

    
plt.figure(figsize=(10,10))
cs = plt.contour(X,Y,loss_map, levels=np.logspace(np.log(loss_map.min()), np.log(loss_map.max()),20,base=np.e))
plt.clabel(cs, inline=1, fontsize=10)



2 1    -5.01 -7.51    10.34  2.411496

In [None]:
def projection(v, b1,b2):
    p1 = np.dot(v-origin,b1)
    p2 = np.dot(v-origin,b2)
    return p1,p2

In [None]:
b2_norm
b2_norm

In [None]:
many_init=[]
for k in range(1000):
    tnet = Model((3,32,32))
    x,y = projection(tnet.get_weight_vector(),b1,b2)
    many_init.append([x,y])
many_init = np.array(many_init).transpose()

In [None]:
many_init.shape

In [None]:
plt.figure(figsize=(10,10))
levels = np.concatenate((np.linspace(0.4,1.6,7),np.logspace(np.log(1.8), np.log(10),5, base=np.e)))
levels = np.logspace(np.log2(0.25), np.log2(8), 13, base=2)
#levels = np.linspace(0.2,8.0,40)

cs = plt.contour(X,Y,loss_map, levels=levels,cmap='plasma')
plt.clabel(cs, inline=1, fontsize=10)
plt.plot(many_init[0], many_init[1],'co',markersize=5, alpha=0.5)
plt.plot(b1_norm, b2_norm,'ro',markersize=5, alpha=0.5)

In [None]:
import pickle
pickle

In [None]:
dist_from_init=np.linalg.norm(net.get_weight_vector() - init_net.get_weight_vector())
dist_from_init

In [None]:
def plot_2d_landscape(loss_map, std_map,r=1.0, title='Title',):
    scale = loss_map.shape[0]
    x = np.linspace(-r, r, scale)
    y = np.linspace(-r, r,scale)
    X,Y = np.meshgrid(x,y)

    
    fig, axes = plt.subplots(1,2, figsize=(15,10))
    

    cs = axes[0].contour(X,Y,loss_map, levels=np.logspace(np.log(loss_map.min()), np.log(loss_map.max()),15,base=np.e))
    plt.clabel(cs, inline=1, fontsize=10)
    axes[0].set_title('Loss')
    cs = axes[1].contour(X,Y,std_map, levels=np.logspace(np.log(std_map.min()), np.log(std_map.max()),15,base=np.e))
    plt.clabel(cs, inline=1, fontsize=10)
    axes[1].set_title('Std')
    fig.suptitle(title)


In [None]:
 mnist_sgd_1_11 = eval_plot_2d_landscape(net, Model(in_size=(1,28,28)), trainloader, (b1,b2), r=dist_from_init, scale=11, title='Title')

In [None]:
cifar10_sgd_1_101 = eval_plot_2d_landscape(net, Model(in_size=(3,32,32)), fixed_trainloader, (b1,b2), r=dist_from_origin, scale=11, title='Title')

In [None]:
import pickle
pickle.dump((cifar10_sgd_1_11,cifar10_sgd_1_101),open('cifar10_11_51','wb'))