In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np

batch_size = 32
test_batch_size = 1000
epochs = 10
lr = 0.01
momentum = 0.5
no_cuda = False
seed = 1
log_interval = 10

cuda = not no_cuda and torch.cuda.is_available()

torch.manual_seed(seed)

if cuda:
    torch.cuda.manual_seed(seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)


class MLPNetModified(nn.Module):
    def __init__(self, f1, f2, f3):
        super(MLPNetModified, self).__init__()
        self.f1 = f1
        self.f2 = f2
        self.f3 = f3
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 256)
        self.fc3 = nn.Linear(256, 10)
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc1(x)
        x = self.f1(x)
        x = self.fc2(x)
        x = self.f2(x)
        x = self.fc3(x)
        x = self.f3(x)
        return F.log_softmax(x)
    def name(self):
        return 'mlpnet'

plots_test_loss = []
plots_train_loss = []
plots_test_accuracy = []
epoch_model_parameters = []

def solve(f1, f2, f3):
    layers = str(f1).split()[1]+"_"+str(f2).split()[1]+"_"+str(f3).split()[1]
    print (layers)
    model = MLPNetModified(f1, f2, f3)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    train_loss = []
    test_losses = []
    test_accuracy = []
    train_min_values = []
    def train(epoch):
        model.train()
        loss_to_print = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            if cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            loss_to_print = loss.data[0]
            if batch_idx % log_interval == 0:
                train_loss.append(loss.data[0])
        print (epoch, loss_to_print)
        params=model.state_dict()
        train_min_values.append([torch.min(params['fc1.weight']), torch.min(params['fc2.weight']), torch.min(params['fc3.weight'])])
        return train_min_values
    def test(epoch):
        model.eval()
        test_loss = 0
        correct = 0
        for data, target in test_loader:
            if cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data, volatile=True), Variable(target)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        test_loss /= len(test_loader.dataset)
        test_losses.append(test_loss)
        test_accuracy.append(100. * correct / len(test_loader.dataset))
        if (epoch == epochs):
            print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                test_loss, correct, len(test_loader.dataset),
                100. * correct / len(test_loader.dataset)))
    for epoch in range(1, epochs + 1):
        train_min_values = train(epoch)
        test(epoch)
    return train_min_values,layers

paramA = solve(F.relu, F.relu, F.relu)
paramB = solve(F.relu, F.sigmoid, F.relu)
paramC = solve(F.relu, F.sigmoid, F.tanh)

test_accuracy_last = []

for a in plots_test_accuracy:
    test_accuracy_last.append(['_'.join(a[0].split('_')[0:3]), a[1][len(a[1]) - 1]])


test_accuracy_last.sort(key=lambda x: x[1])
for a in test_accuracy_last:
    print(a)

arr0 = np.array(paramA[0])
arr1 = np.array(paramB[0])
arr2 = np.array(paramC[0])
plt.figure()
plt.plot(arr0[:,0],label=paramA[1])
plt.plot(arr1[:,0],label=paramB[1])
plt.plot(arr2[:,0],label=paramC[1])
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
plt.savefig("min_weights_per_iteration/"+str(epochs)+"_iterations.png")

relu_relu_relu
1 0.13197508454322815
2 0.061797771602869034
3 0.047406766563653946
4 0.04274652153253555
5 0.07503548264503479
6 0.031121335923671722
7 0.0016826121136546135
8 0.04030686616897583
9 0.03174635022878647
10 0.0027607083320617676

Test set: Average loss: 0.0619, Accuracy: 9795/10000 (98%)

relu_sigmoid_relu
1 1.5898154973983765
2 1.1462095975875854
3 1.1316478252410889
4 0.6600117087364197
5 0.6857587695121765
6 0.7237051725387573
7 0.612843930721283
8 0.8079507946968079
9 0.6040196418762207
10 0.8599200248718262

Test set: Average loss: 0.7589, Accuracy: 7825/10000 (78%)

relu_sigmoid_tanh
1 1.034830093383789
2 1.0283654928207397
3 0.9301056265830994
4 0.869178295135498
5 0.9020575284957886
6 0.8179965615272522
7 0.8151505589485168
8 0.8926700949668884
9 0.9116194248199463
10 0.8165391087532043

Test set: Average loss: 0.8809, Accuracy: 9566/10000 (96%)

