In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import torch
import torch.nn.functional as F
import torch.nn as nn
import torchvision.transforms as transforms

from torch.optim import Optimizer
from torch.optim import SGD
import mass
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt
seed = 7
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import numpy as np
np.random.seed(seed)

from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV, ShuffleSplit
import numpy as np

In [None]:
# MINIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))
])

trainset = MNIST(".", train=True, download=True, transform=transform)
testset = MNIST(".", train=False, download=True, transform=transform)

# create data loaders
batch_size = 64
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

In [None]:
class FCN(nn.Module):
    def __init__(self, input_size=784, hidden_size=100, num_classes=10):
        super(FCN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        
        out = self.fc2(out)
        out = F.relu(out)
        out = F.dropout(out, 0.5)

        out = self.fc3(out)
        out = F.relu(out)
        out = F.dropout(out, 0.5)

        out = self.fc4(out)
        out = F.relu(out)
        out = F.dropout(out, 0.5)

        if not self.training:
            out = F.softmax(out, dim=1)
        return out

In [None]:
def fit(model_instance, loss_fn, optim, data_loader, n_iter = 100):
    train_loss = torch.zeros(n_iter)

    for epoch in range(n_iter):
        model_instance.train()
        running_loss = 0.0
        for data in data_loader:

            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optim.zero_grad()
            outputs = model_instance(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optim.step()

            running_loss += loss.item()
        print("Epoch %d, loss %4.2f" % (epoch, running_loss))
        train_loss[epoch] = running_loss

    print('**** Finished Training ****')
    return train_loss

In [None]:
# SGD
torch.cuda.empty_cache()
device = "cuda:0"

model = FCN().to(device)

loss_function = nn.CrossEntropyLoss()
sgd = SGD(model.parameters(), lr = 0.01)
train_loss_sgd = fit(model_instance = model, loss_fn = loss_function, optim = sgd, data_loader=trainloader)

In [None]:
# Nesterov
torch.cuda.empty_cache()
device = "cuda:0"

model = FCN().to(device)

loss_function = nn.CrossEntropyLoss()
sgd_nesterov = SGD(model.parameters(), lr = 0.001,momentum=0.9, nesterov=True, weight_decay=1)
train_loss_nesterov = fit(model_instance = model, loss_fn = loss_function, optim = sgd_nesterov, data_loader=trainloader)
torch.save(train_loss_sgd, "./train_loss_nes.mnist")

In [None]:
# Mass
torch.cuda.empty_cache()
device = "cuda:0"

model = FCN().to(device)

loss_function = nn.CrossEntropyLoss()
mass = mass.Mass(model.parameters(), lr = 0.01, alpha = 0.05, kappa_t = 12)
train_loss_mass = fit(model, loss_function, mass, trainloader)

In [None]:
torch.save(train_loss_sgd, "./train_loss_mas.mnist")

In [None]:
plt.plot(torch.log10(train_loss_sgd), c = 'red', label = 'sgd')
plt.plot(torch.log10(train_loss_nesterov), c = 'blue', label = 'nesterov')
plt.plot(torch.log10(train_loss_mass), c = 'green', label = 'mass')
plt.legend()

In [None]:
parameters_sgd = {
    'lr' : np.arange(0.01, 0.3, 0.05),
    'optimizer__weight_decay' : np.arange(0,5,1)
}

#cv_split = ShuffleSplit(n_splits = 10, test_size = .3, train_size = .7, random_state = 0 )

net = NeuralNetClassifier(
    FCN,
    max_epochs=5,
    batch_size=64,
    optimizer = SGD,
    criterion = nn.CrossEntropyLoss,
    device = 'cuda:0'
)

In [None]:

gs = GridSearchCV(net, parameters_sgd, cv=3, scoring='accuracy')

gs.fit(trainset.data.float().view(60000,-1), trainset.targets)
print(gs.best_score_, gs.best_params_)