In [3]:
# Code influenced by: https://github.com/pytorch/examples/blob/master/mnist/main.py

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np

batch_size = 32
test_batch_size = 1000
epochs = 10
lr = 0.01
momentum = 0.5
no_cuda = False
seed = 1
log_interval = 10

cuda = not no_cuda and torch.cuda.is_available()

torch.manual_seed(seed)

if cuda:
    torch.cuda.manual_seed(seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)

class MLPNetModified(nn.Module):
    def __init__(self, f1, f2, f3):
        super(MLPNetModified, self).__init__()
        self.f1 = f1
        self.f2 = f2
        self.f3 = f3
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 256)
        self.fc3 = nn.Linear(256, 10)
        # self.ceriation = nn.CrossEntropyLoss()
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc1(x)
        half = int(len(x[0])/2)
        first_part = x[:, 0:half]
        second_part = x[:, half:]
        first_part = self.f1(first_part)
        second_part = self.f1(second_part)
        x = torch.cat((first_part, second_part), 1)
        x = self.fc2(x)
        x3 = torch.min(x,1)
        x3_min = x3[0]
        half = int(len(x[0])/2)
        first_part = x[:, 0:half]
        second_part = x[:, half:]
        first_part = self.f2(first_part)
        second_part = self.f2(second_part)
        x = torch.cat((first_part, second_part), 1)
        x = self.fc3(x)
        x2 = torch.min(x,1)
        x2_min = x2[0]
        half = int(len(x[0])/2)
        first_part = x[:, 0:half]
        second_part = x[:, half:]
        first_part = self.f3(first_part)
        second_part = self.f3(second_part)
        x = torch.cat((first_part, second_part), 1)
        return F.log_softmax(x), [torch.min(x,1)[0].data.numpy(), x2_min.data.numpy(), x3_min.data.numpy()]
    def name(self):
        return 'mlpnet'

plots_test_loss = []
plots_train_loss = []
plots_test_accuracy = []

def solve(f1, f2, f3):
    print (str(f1).split()[1], str(f2).split()[1], str(f3).split()[1])
    model = MLPNetModified(f1, f2, f3)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    train_loss = []
    test_losses = []
    test_accuracy = []
    def train(epoch):
        model.train()
        loss_to_print = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            if cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output,minvalues = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            loss_to_print = loss.data[0]
            if batch_idx % log_interval == 0:
                train_loss.append(loss.data[0])
                # print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                #     epoch, batch_idx * len(data), len(train_loader.dataset),
                #     100. * batch_idx / len(train_loader), loss.data[0]))
        print (epoch, loss_to_print)
    def test(epoch):
        model.eval()
        test_loss = 0
        correct = 0
        minvalues_list=[np.empty([1,0]),np.empty([1,0]),np.empty([1,0])]
        count=0
        for data, target in test_loader:
            count+=1
            if cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data, volatile=True), Variable(target)
            output,minvalues = model(data)
            if(epoch ==epochs):
                print(minvalues_list[0].shape)
                print(minvalues[0].shape)
                minvalues_list[0] = np.append(minvalues_list[0],minvalues[0])
                minvalues_list[1] = np.append(minvalues_list[1],minvalues[1])
                minvalues_list[2] = np.append(minvalues_list[2],minvalues[2])
            test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        test_loss /= len(test_loader.dataset)
        test_losses.append(test_loss)
        test_accuracy.append(100. * correct / len(test_loader.dataset))
        if (epoch == epochs):
            print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                test_loss, correct, len(test_loader.dataset),
                100. * correct / len(test_loader.dataset)))
            print(len(minvalues_list))
            return minvalues_list
        return 0
    for epoch in range(1, epochs + 1):
        train(epoch)
        MINVALUES=test(epoch)
    # fig = plt.figure()
    # plt.plot(train_loss)
    plots_train_loss.append([str(f1).split()[1]+'_'+str(f2).split()[1]+'_'+str(f3).split()[1]+'_'+'test_loss' + '.png', train_loss])
    # fig.savefig(str(f1).split()[1]+'_'+str(f2).split()[1]+'_'+str(f3).split()[1]+'_'+'train_loss' + '.png', dpi=fig.dpi)
    # fig = plt.figure()
    # plt.plot(test_losses)
    plots_test_loss.append([str(f1).split()[1]+'_'+str(f2).split()[1]+'_'+str(f3).split()[1]+'_'+'test_loss' + '.png', test_losses])
    # fig.savefig(str(f1).split()[1]+'_'+str(f2).split()[1]+'_'+str(f3).split()[1]+'_'+'test_loss' + '.png', dpi=fig.dpi)
    # fig = plt.figure()
    # plt.plot(test_accuracy)
    plots_test_accuracy.append([str(f1).split()[1]+'_'+str(f2).split()[1]+'_'+str(f3).split()[1]+'_'+'test_loss' + '.png', test_accuracy])
    # fig.savefig(str(f1).split()[1]+'_'+str(f2).split()[1]+'_'+str(f3).split()[1]+'_'+'test_accu' + '.png', dpi=fig.dpi)
    return MINVALUES
final_min_valuesA = solve(F.relu, F.relu, F.relu)
final_min_valuesB = solve(F.relu, F.sigmoid, F.relu)
final_min_valuesC = solve(F.relu, F.sigmoid, F.tanh)



fig = plt.figure()
plt.plot(final_min_valuesA[0],color="r")
plt.savefig("min_values_plots/relu_relu_relu_minvalues_final_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesB[0],color="g")
plt.savefig("min_values_plots/relu_sigmoid_relu_minvalues_final_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesC[0],color="b")
plt.savefig("min_values_plots/relu_sigmoid_tanh_minvalues_final_layer.png")

fig = plt.figure()
plt.plot(final_min_valuesA[1],color="r")
plt.savefig("min_values_plots/relu_relu_relu_minvalues_middle_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesB[1],color="g")
plt.savefig("min_values_plots/relu_sigmoid_relu_minvalues_middle_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesC[1],color="b")
plt.savefig("min_values_plots/relu_sigmoid_tanh_minvalues_middle_layer.png")

fig = plt.figure()
plt.plot(final_min_valuesA[2],color="r")
plt.savefig("min_values_plots/relu_relu_relu_minvalues_first_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesB[2],color="g")
plt.savefig("min_values_plots/relu_sigmoid_relu_minvalues_first_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesC[2],color="b")
plt.savefig("min_values_plots/relu_sigmoid_tanh_minvalues_first_layer.png")


fig = plt.figure()
plt.plot(final_min_valuesA[0],color="r",label="relu_relu_relu")
plt.plot(final_min_valuesB[0],color="g",label="relu_sigmoid_relu")
plt.plot(final_min_valuesC[0],color="b",label="relu_sigmoid_tanh")
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
# plt.show()
plt.savefig("min_values_plots/overlappedplots/Minvalues_final_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesA[1],color="r",label="relu_relu_relu")
plt.plot(final_min_valuesB[1],color="g",label="relu_sigmoid_relu")
plt.plot(final_min_valuesC[1],color="b",label="relu_sigmoid_tanh")
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
# plt.legend(handles=[red_patch])
plt.savefig("min_values_plots/overlappedplots/Minvalues_middle_layer.png")
fig = plt.figure()
plt.plot(final_min_valuesA[2],color="r",label="relu_relu_relu")
plt.plot(final_min_valuesB[2],color="g",label="relu_sigmoid_relu")
plt.plot(final_min_valuesC[2],color="b",label="relu_sigmoid_tanh")
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
plt.savefig("min_values_plots/overlappedplots/Minvalues_first_layer.png")


relu relu relu
1 0.13197508454322815
2 0.061797771602869034
3 0.047406766563653946
4 0.04274652153253555
5 0.07503548264503479
6 0.031121335923671722
7 0.0016826121136546135
8 0.04030686616897583
9 0.03174635022878647
10 0.0027607083320617676
(1, 0)
(1000,)
(1000,)
(1000,)
(2000,)
(1000,)
(3000,)
(1000,)
(4000,)
(1000,)
(5000,)
(1000,)
(6000,)
(1000,)
(7000,)
(1000,)
(8000,)
(1000,)
(9000,)
(1000,)

Test set: Average loss: 0.0619, Accuracy: 9795/10000 (98%)

3
relu sigmoid relu
1 1.5898154973983765
2 1.1462095975875854
3 1.1316478252410889
4 0.6600117087364197
5 0.6857587695121765
6 0.7237051725387573
7 0.612843930721283
8 0.8079507946968079
9 0.6040196418762207
10 0.8599200248718262
(1, 0)
(1000,)
(1000,)
(1000,)
(2000,)
(1000,)
(3000,)
(1000,)
(4000,)
(1000,)
(5000,)
(1000,)
(6000,)
(1000,)
(7000,)
(1000,)
(8000,)
(1000,)
(9000,)
(1000,)

Test set: Average loss: 0.7589, Accuracy: 7825/10000 (78%)

3
relu sigmoid tanh
1 1.034830093383789
2 1.0283654928207397
3 0.9301056265830994
4 0.8