In [2]:
import torch
import numpy as np
import pandas as pd
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import time

In [1]:
class Dataset(object):
    """An abstract class representing a Dataset.
    All other datasets should subclass it. All subclasses should override
    ``__len__``, that provides the size of the dataset, and ``__getitem__``,
    supporting integer indexing in range from 0 to len(self) exclusive.
    """

    def __getitem__(self, index):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

    def __add__(self, other):
        return ConcatDataset([self, other])

In [3]:
class TrainMNIST(Dataset):
    
    def __init__(self, file_path, transform=None):
        self.data = pd.read_csv(file_path)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, we use ToTensor(), so we define the numpy array like (H, W, C)
        image = self.data.iloc[index, 1:].values.astype(np.uint8).reshape((28, 28, 1))
        label = self.data.iloc[index, 0]
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

In [4]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training set to use as validation
valid_size = 0.2

In [5]:
train_data0 = TrainMNIST("./out0.csv", transform=transforms.ToTensor())
train_data1 = TrainMNIST("./out1.csv", transform=transforms.ToTensor())
train_data2 = TrainMNIST("./out2.csv", transform=transforms.ToTensor())
train_data3 = TrainMNIST("./out3.csv", transform=transforms.ToTensor())
train_data4 = TrainMNIST("./out4.csv", transform=transforms.ToTensor())
train_data5 = TrainMNIST("./out5.csv", transform=transforms.ToTensor())
train_data6 = TrainMNIST("./out6.csv", transform=transforms.ToTensor())
train_data7 = TrainMNIST("./out7.csv", transform=transforms.ToTensor())
train_data8 = TrainMNIST("./out8.csv", transform=transforms.ToTensor())
train_data9 = TrainMNIST("./out9.csv", transform=transforms.ToTensor())


FileNotFoundError: [Errno 2] No such file or directory: './out0.csv'

In [10]:
td = pd.read_csv("./mnist_test.csv")

In [6]:
# obtain training indices that will be used for validation

num_train = len(train_data0)

indices = list(range(num_train))

np.random.shuffle(indices)

split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)

# prepare data loaders
train_loader0 = torch.utils.data.DataLoader(train_data0, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader1 = torch.utils.data.DataLoader(train_data1, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader2 = torch.utils.data.DataLoader(train_data2, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader3 = torch.utils.data.DataLoader(train_data3, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader4 = torch.utils.data.DataLoader(train_data4, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader5 = torch.utils.data.DataLoader(train_data5, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader6 = torch.utils.data.DataLoader(train_data6, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader7 = torch.utils.data.DataLoader(train_data7, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader8 = torch.utils.data.DataLoader(train_data8, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)
train_loader9 = torch.utils.data.DataLoader(train_data9, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers,)

In [5]:
import torch.nn as nn
import torch.nn.functional as F
# define NN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # number of hidden nodes in each layer (512)
        hidden_1 = 512
        hidden_2 = 512
        # linear layer (784 -> hidden_1)
        self.fc1 = nn.Linear(28*28, 512)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(512,512)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(512,1)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        # self.droput = nn.Dropout(0.2)
        
    def forward(self,x):
        # flatten image input
        x = x.view(-1,28*28)
        # add hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add dropout layer
        #x = self.droput(x)
         # add hidden layer, with relu activation function
        x = F.relu(self.fc2(x))
        # add dropout layer
        #x = self.droput(x)
        # add output layer
        x = self.fc3(x)
        return x

# initialize the NN
model0 = Net()
model1 = Net()
model2 = Net()
model3 = Net()
model4 = Net()
model5 = Net()
model6 = Net()
model7 = Net()
model8 = Net()
model9 = Net()

In [8]:
# specify loss function (categorical cross-entropy)
criterion = nn.BCEWithLogitsLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer0 = torch.optim.Adam(model0.parameters(),lr = 0.001)
optimizer1 = torch.optim.Adam(model1.parameters(),lr = 0.001)
optimizer2 = torch.optim.Adam(model2.parameters(),lr = 0.001)
optimizer3 = torch.optim.Adam(model3.parameters(),lr = 0.001)
optimizer4 = torch.optim.Adam(model4.parameters(),lr = 0.001)
optimizer5 = torch.optim.Adam(model5.parameters(),lr = 0.001)
optimizer6 = torch.optim.Adam(model6.parameters(),lr = 0.001)
optimizer7 = torch.optim.Adam(model7.parameters(),lr = 0.001)
optimizer8 = torch.optim.Adam(model8.parameters(),lr = 0.001)
optimizer9 = torch.optim.Adam(model9.parameters(),lr = 0.001)

In [9]:
n_epochs = 100

In [10]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    model0.train() # prep model for training
    for data,label in train_loader0:
        # clear the gradients of all optimized variables
        optimizer0.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model0(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer0.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)
    if train_loss < 0.00000001:
        break

torch.save(model0.state_dict(),"1vA0.pt")
        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  1314.7392196288947
train_loss:  610.0603990245032
train_loss:  430.3833579944823
train_loss:  316.361058386926
train_loss:  250.99071701696792
train_loss:  211.62202945141857
train_loss:  246.57617576187354
train_loss:  193.1184278549968
train_loss:  185.8167427530442
train_loss:  134.7316269218431
train_loss:  105.48945804632649
train_loss:  110.36889811131776
train_loss:  123.21038194086665
train_loss:  91.10601509707061
train_loss:  82.30077194359922
train_loss:  96.30772745049092
train_loss:  95.9614367080674
train_loss:  4.6233051157789085
train_loss:  7.709361126802907
train_loss:  191.5532350636593
train_loss:  48.431722589753726
train_loss:  87.12018003138957
train_loss:  49.17152283056199
train_loss:  48.76957406116925
train_loss:  32.37987608095246
train_loss:  62.37546276191165
train_loss:  63.515461876134886
train_loss:  67.81451567912399
train_loss:  60.26312602559806
train_loss:  0.2110456260214022
train_loss:  0.002027662737962288
train_loss:  0.000208253696

In [11]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model1.train() # prep model for training
    for data,label in train_loader1:
        # clear the gradients of all optimized variables
        optimizer1.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model1(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer1.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)
    if train_loss < 0.00000001:
        break

torch.save(model1.state_dict(),"1vA1.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  1324.0814468786743
train_loss:  618.5068852758673
train_loss:  432.96082455137895
train_loss:  276.88444626471426
train_loss:  249.20798008566246
train_loss:  199.80496474635888
train_loss:  233.94871574398152
train_loss:  166.23898156640814
train_loss:  147.93640385481308
train_loss:  105.87853439520785
train_loss:  136.68003464511787
train_loss:  89.30785263553801
train_loss:  117.91307838687715
train_loss:  75.59077567457086
train_loss:  72.64236838493022
train_loss:  46.03348489868135
train_loss:  124.50901005821692
train_loss:  89.61577445868886
train_loss:  20.377671355777185
train_loss:  61.54369122160929
train_loss:  35.33921032872813
train_loss:  107.78307800639917
train_loss:  35.60230588698455
train_loss:  57.90849529602895
train_loss:  4.470573880341
train_loss:  20.656410433863037
train_loss:  98.619417452984
train_loss:  33.87575136286954
train_loss:  3.3041252553773504
train_loss:  77.96991954430788
train_loss:  96.36282316063519
train_loss:  1.4401126853739

In [12]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model2.train() # prep model for training
    for data,label in train_loader2:
        # clear the gradients of all optimized variables
        optimizer2.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model2(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer2.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)
    if train_loss < 0.00000001:
        break

torch.save(model2.state_dict(),"1vA2.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  2214.565202165031
train_loss:  1041.2735111653296
train_loss:  763.8278479066241
train_loss:  567.1449773734082
train_loss:  461.7640383348025
train_loss:  375.4903426338373
train_loss:  286.8904433065578
train_loss:  276.1371469380859
train_loss:  211.65379761684375
train_loss:  174.96892964958943
train_loss:  185.99854462434848
train_loss:  166.50956660105138
train_loss:  135.02556372712633
train_loss:  97.31168298910582
train_loss:  140.61975940653292
train_loss:  117.94085073535035
train_loss:  147.55969898655343
train_loss:  101.08833708715623
train_loss:  155.794673762466
train_loss:  78.64713645942246
train_loss:  84.34357114439592
train_loss:  145.48841222213323
train_loss:  50.91842619001113
train_loss:  123.03991341651306
train_loss:  86.52536339813436
train_loss:  52.49970977452982
train_loss:  37.341656514111946
train_loss:  80.74044173718889
train_loss:  136.32165083946575
train_loss:  148.6356912776132
train_loss:  63.72642457490237
train_loss:  78.2921512432

In [13]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model3.train() # prep model for training
    for data,label in train_loader3:
        # clear the gradients of all optimized variables
        optimizer3.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model3(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer3.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)   
    if train_loss < 0.00000001:
        break

torch.save(model3.state_dict(),"1vA3.pt")    
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  2773.3420979382936
train_loss:  1286.5525435027416
train_loss:  899.9850413524331
train_loss:  686.0126300973786
train_loss:  519.983785110267
train_loss:  425.6022817560597
train_loss:  328.9258818704076
train_loss:  357.2159484785228
train_loss:  254.1295428811111
train_loss:  286.49140966140754
train_loss:  231.0375194748326
train_loss:  195.04065265803635
train_loss:  191.93634735402358
train_loss:  211.65892611974056
train_loss:  171.35809154745658
train_loss:  139.7642382618015
train_loss:  124.84896590260895
train_loss:  142.38407982793214
train_loss:  128.1912815785575
train_loss:  161.88428553013568
train_loss:  196.99769408006807
train_loss:  92.74064181672642
train_loss:  128.6556156130524
train_loss:  98.08603167884613
train_loss:  120.04937260835229
train_loss:  143.57050073844235
train_loss:  115.59011751409741
train_loss:  44.46819749987916
train_loss:  125.12578861667441
train_loss:  167.60058518446286
train_loss:  150.0188234833906
train_loss:  38.77393502

In [14]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model4.train() # prep model for training
    for data,label in train_loader4:
        # clear the gradients of all optimized variables
        optimizer4.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model4(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer4.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)    
    if train_loss < 0.00000001:
        break

torch.save(model4.state_dict(),"1vA4.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  2008.4292898032845
train_loss:  878.3777637196215
train_loss:  632.2027687528937
train_loss:  502.546023355595
train_loss:  363.2351045067647
train_loss:  354.9513756382304
train_loss:  284.79236005506914
train_loss:  290.05262752049714
train_loss:  217.8620191996819
train_loss:  160.44941367786956
train_loss:  168.17355896391757
train_loss:  180.58825976646676
train_loss:  116.35070989917975
train_loss:  83.49961657082446
train_loss:  157.43037809682656
train_loss:  171.21341749172055
train_loss:  124.219550277802
train_loss:  107.51467745056917
train_loss:  145.01169321063446
train_loss:  90.1944108218289
train_loss:  85.7808303727102
train_loss:  110.3658047424041
train_loss:  129.1790398024699
train_loss:  48.24255741921102
train_loss:  49.70353438315669
train_loss:  102.5249006564566
train_loss:  49.56824770340555
train_loss:  59.06711677241337
train_loss:  103.62651882834322
train_loss:  176.06913705499477
train_loss:  69.4920701230352
train_loss:  43.100901704341105

In [15]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model5.train() # prep model for training
    for data,label in train_loader5:
        # clear the gradients of all optimized variables
        optimizer5.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model5(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer5.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    if train_loss < 0.00000001:
        break

torch.save(model5.state_dict(),"1vA5.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

RUNTIME:29.54m


In [16]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model6.train() # prep model for training
    for data,label in train_loader6:
        # clear the gradients of all optimized variables
        optimizer6.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model6(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer6.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)    
    if train_loss < 0.00000001:
        break

torch.save(model6.state_dict(),"1vA6.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  1515.6080651654975
train_loss:  681.7469765241574
train_loss:  471.2277821481304
train_loss:  364.2931436490879
train_loss:  302.34252340080457
train_loss:  265.92443176296234
train_loss:  221.04416528195668
train_loss:  222.17781515814275
train_loss:  143.2388746300736
train_loss:  134.8261598868095
train_loss:  177.7373786235088
train_loss:  130.41679876102543
train_loss:  128.8532160818957
train_loss:  67.2742446983809
train_loss:  187.2430817025596
train_loss:  96.05383899372765
train_loss:  121.81998770972328
train_loss:  43.348347533833895
train_loss:  152.16781086308663
train_loss:  91.89149874225339
train_loss:  91.3480412809418
train_loss:  38.98400982084237
train_loss:  46.981889890819836
train_loss:  108.09521965270028
train_loss:  79.84005803688041
train_loss:  29.874191692762224
train_loss:  50.16583170374322
train_loss:  134.6050261401988
train_loss:  25.36549236963541
train_loss:  0.16659294294731453
train_loss:  0.011344946861662208
train_loss:  0.001652765

In [17]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model7.train() # prep model for training
    for data,label in train_loader7:
        # clear the gradients of all optimized variables
        optimizer7.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model7(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer7.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)    
    if train_loss < 0.00000001:
        break

torch.save(model7.state_dict(),"1vA7.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  2045.9472260611165
train_loss:  1140.1946531244266
train_loss:  763.9229139498855
train_loss:  583.4575274081976
train_loss:  473.0209475095982
train_loss:  345.63680243034423
train_loss:  240.39019542412075
train_loss:  265.25442368502115
train_loss:  260.3951600260342
train_loss:  194.2810157170783
train_loss:  187.92129939907397
train_loss:  196.72461862685515
train_loss:  85.2083297365173
train_loss:  143.68026750016196
train_loss:  169.93631357567187
train_loss:  157.38824072106488
train_loss:  78.9568637870023
train_loss:  87.7604801897294
train_loss:  95.77387693034262
train_loss:  94.58875622983587
train_loss:  116.50925763633956
train_loss:  114.50558172769942
train_loss:  63.05956081203133
train_loss:  89.57804597292599
train_loss:  2.8216156887591914
train_loss:  96.23196705796572
train_loss:  176.1943646727157
train_loss:  76.91794196382938
train_loss:  54.51476283195956
train_loss:  67.4749103687501
train_loss:  73.41587957236953
train_loss:  27.59021113995562

In [18]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model8.train() # prep model for training
    for data,label in train_loader8:
        # clear the gradients of all optimized variables
        optimizer8.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model8(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer8.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)   
    if train_loss < 0.00000001:
        break

torch.save(model8.state_dict(),"1vA8.pt")     
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  2856.367921546407
train_loss:  1284.107532825176
train_loss:  984.042816597908
train_loss:  754.0874921505252
train_loss:  642.4661081995055
train_loss:  521.1657772207349
train_loss:  435.2637877969286
train_loss:  360.898500075672
train_loss:  342.4198613699077
train_loss:  301.71634746240534
train_loss:  322.67883377415137
train_loss:  286.01069675124967
train_loss:  258.5578033704085
train_loss:  204.15176397175603
train_loss:  156.85776829551008
train_loss:  175.95371576720234
train_loss:  181.20546639893504
train_loss:  158.70393841109734
train_loss:  142.77380837889052
train_loss:  156.04330896001815
train_loss:  155.19609142877624
train_loss:  71.10412086229803
train_loss:  154.62796985898473
train_loss:  88.11463379045765
train_loss:  151.83617291736283
train_loss:  86.44220893816313
train_loss:  85.65159146489597
train_loss:  90.51255054809388
train_loss:  130.2062792561705
train_loss:  65.39517926275587
train_loss:  81.36484310245216
train_loss:  65.991382350249

In [10]:
start = time.time()
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    
    model9.train() # prep model for training
    for data,label in train_loader9:
        # clear the gradients of all optimized variables
        optimizer9.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model9(data)
        # calculate the loss
        loss = criterion(output.view(20),label.float())
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer9.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
    print("train_loss: ",train_loss)    
    if train_loss < 0.00000001:
        break

torch.save(model9.state_dict(),"1vA9.pt")        
end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

train_loss:  2899.3432612545075
train_loss:  1406.832280722083
train_loss:  1047.4885530558004
train_loss:  787.2264593605041
train_loss:  682.4731100198076
train_loss:  544.3151176310641
train_loss:  388.427544208934
train_loss:  397.0303505838763
train_loss:  302.33096697554413
train_loss:  284.4231291227095
train_loss:  257.3880902277075
train_loss:  217.09021209651607
train_loss:  218.48487431448106
train_loss:  180.54216615488096
train_loss:  185.62142455397844
train_loss:  145.13959472558645
train_loss:  158.2806650132885
train_loss:  166.62450722713533
train_loss:  135.00048384879716
train_loss:  122.63598789539589
train_loss:  117.41127871282379
train_loss:  95.43857672082372
train_loss:  126.5289026946835
train_loss:  115.19777845100481
train_loss:  117.22447041140005
train_loss:  98.88039820905642
train_loss:  131.47203053280617
train_loss:  37.03271440053802
train_loss:  151.9444411677788
train_loss:  35.97456648506494
train_loss:  52.53795533337074
train_loss:  128.86612145

In [6]:
models = []
model0.load_state_dict(torch.load('./ova/1vA0.pt'))
model1.load_state_dict(torch.load('./ova/1vA1.pt'))
model2.load_state_dict(torch.load('./ova/1vA2.pt'))
model3.load_state_dict(torch.load('./ova/1vA3.pt'))
model4.load_state_dict(torch.load('./ova/1vA4.pt'))
model5.load_state_dict(torch.load('./ova/1vA5.pt'))
model6.load_state_dict(torch.load('./ova/1vA6.pt'))
model7.load_state_dict(torch.load('./ova/1vA7.pt'))
model8.load_state_dict(torch.load('./ova/1vA8.pt'))
model9.load_state_dict(torch.load('./ova/1vA9.pt'))

models.append(model0)
models.append(model1)
models.append(model2)
models.append(model3)
models.append(model4)
models.append(model5)
models.append(model6)
models.append(model7)
models.append(model8)
models.append(model9)

In [38]:
params = []
fc1weight = []
fc1bias = []
fc2weight = []
fc2bias = []
fc3weight = []
fc3bias = []

params.append(fc1weight)
params.append(fc1bias)
params.append(fc2weight)
params.append(fc2bias)
params.append(fc3weight)
params.append(fc3bias)
print(params)

[[], [], [], [], [], []]


In [39]:
for i in range(10): 
    fc1weight.append(models[i].state_dict()['fc1.weight'])
    fc1bias.append(models[i].state_dict()['fc1.bias'])
    fc2weight.append(models[i].state_dict()['fc2.weight'])
    fc2bias.append(models[i].state_dict()['fc2.bias'])
    fc3weight.append(models[i].state_dict()['fc3.weight'])
    fc3bias.append(models[i].state_dict()['fc3.bias'])

In [40]:
means = []
for i in range(6):
    means.append(sum(params[i])/10)


In [41]:
vars=[]
for i in range(6):
    sqrdiff =  [(x-means[i])**2 for x in params[i]]
    vars.append(sum(sqrdiff))

In [50]:
thresh = 0.5

In [52]:
notmul = []
mult = []
for i in range(6):
    print(torch.numel(vars[i][vars[i]>thresh]))
    mult.append(vars[i]>thresh)
    notmul.append(vars[i] <=thresh)
    mult[i] = mult[i].float()
    notmul[i] = notmul[i].float()

34793
25
8066
130
0
1


In [53]:
print(means[0] * mult[0])

tensor([[-0., -0., 0.,  ..., 0., 0., 0.],
        [-0., -0., 0.,  ..., 0., -0., -0.],
        [-0., 0., 0.,  ..., -0., -0., 0.],
        ...,
        [0., -0., 0.,  ..., 0., 0., 0.],
        [-0., 0., 0.,  ..., -0., -0., -0.],
        [-0., 0., 0.,  ..., -0., -0., 0.]])


In [13]:
test = np.array(td)

x_test , y_test = test[:,1:], test[:,0]
y = torch.from_numpy(y_test)
x = torch.from_numpy(x_test)
print(len(y))
print(len(x))

NameError: name 'td' is not defined

In [12]:
start = time.time()
result_lst = []
for i in range(10000):
    output0 = model0(x[i].float()).item()
    output1 = model1(x[i].float()).item()
    output2 = model2(x[i].float()).item()
    output3 = model3(x[i].float()).item()
    output4 = model4(x[i].float()).item()
    output5 = model5(x[i].float()).item()
    output6 = model6(x[i].float()).item()
    output7 = model7(x[i].float()).item()
    output8 = model8(x[i].float()).item()
    output9 = model9(x[i].float()).item()
    max_val = max(output0,output1,output2,output3,output4,output5,output6,output7,output8,output9)
    if(max_val == output0):
        result_lst.append(0)
    elif(max_val == output1):
        result_lst.append(1)
    elif(max_val == output2):
        result_lst.append(2)
    elif(max_val == output3):
        result_lst.append(3)
    elif(max_val == output4):
        result_lst.append(4)
    elif(max_val == output5):
        result_lst.append(5)
    elif(max_val == output6):
        result_lst.append(6)
    elif(max_val == output7):
        result_lst.append(7)
    elif(max_val == output8):
        result_lst.append(8)
    elif(max_val == output9):
        result_lst.append(9)
    else:
        None

#     print(1/(1+np.exp(-output0)))
#     print(1/(1+np.exp(-output1)))
#     print(1/(1+np.exp(-output2)))
#     print(1/(1+np.exp(-output3)))
#     print(1/(1+np.exp(-output4)))
#     print(1/(1+np.exp(-output5)))
#     print(1/(1+np.exp(-output6)))
#     print(1/(1+np.exp(-output7)))
#     print(1/(1+np.exp(-output8)))
#     print(1/(1+np.exp(-output9)))
#     print(output1)
#     print(output2)
#     print(output3)
#     print(output4)
#     print(output5)
#     print(output6)
#     print(output7)
#     print(output8)
#     print(output9)

    
print(result_lst)

end = time.time()
print("RUNTIME:%5.2fm"%((end-start)/60))

[7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5, 4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2, 4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3, 7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 4, 1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 1, 9, 4, 8, 7, 3, 9, 7, 4, 4, 4, 9, 2, 5, 4, 7, 6, 7, 9, 0, 5, 8, 5, 6, 6, 5, 7, 8, 1, 0, 1, 6, 4, 6, 7, 3, 1, 7, 1, 8, 2, 0, 9, 9, 9, 5, 5, 1, 5, 6, 0, 3, 4, 4, 6, 5, 4, 6, 5, 4, 5, 1, 4, 4, 7, 2, 3, 2, 7, 1, 8, 1, 8, 1, 8, 5, 0, 8, 9, 2, 5, 0, 1, 1, 1, 0, 9, 0, 3, 1, 6, 4, 2, 3, 6, 1, 1, 1, 3, 9, 5, 2, 9, 4, 5, 9, 3, 9, 0, 3, 6, 5, 5, 7, 2, 2, 7, 1, 2, 8, 4, 1, 7, 3, 3, 8, 8, 7, 9, 2, 2, 4, 1, 5, 9, 8, 7, 2, 3, 0, 6, 4, 2, 4, 1, 9, 5, 7, 7, 2, 8, 2, 6, 8, 5, 7, 7, 9, 1, 8, 1, 8, 0, 3, 0, 1, 9, 9, 4, 1, 8, 2, 1, 2, 9, 7, 5, 9, 2, 6, 4, 1, 5, 8, 2, 9, 2, 0, 4, 0, 0, 2, 8, 4, 7, 1, 2, 4, 0, 2, 7, 4, 3, 3, 0, 0, 3, 1, 9, 6, 5, 2, 5, 9, 7, 9, 3, 0, 4, 2, 0, 7, 1, 1, 2, 1, 

In [13]:
match = 0
total = 10000
for i in range(total):
    if(y[i]==result_lst[i]):
            match = match+1
print("Accuracy:",match/total)

Accuracy: 0.9843


In [7]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training set to use as validation
valid_size = 0.2
# convert data to torch.FloatTensor
transform = transforms.ToTensor()
# choose the training and testing datasets
train_data = datasets.MNIST(root = 'data', train = True, download = True, transform = transform)
test_data = datasets.MNIST(root = 'data', train = False, download = True, transform = transform)
# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_index, valid_index = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_index)
valid_sampler = SubsetRandomSampler(valid_index)
# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, 
                                           sampler = train_sampler, num_workers = num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size,
                                          sampler = valid_sampler, num_workers = num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size,
                                         num_workers = num_workers)

In [55]:
import torch.nn as nn
import torch.nn.functional as F
# define NN architecture
class FixNet(nn.Module):
    def __init__(self):
        super(FixNet,self).__init__()
        # number of hidden nodes in each layer (512)
        hidden_1 = 512
        hidden_2 = 512
        # linear layer (784 -> hidden_1)
        self.fc1 = nn.Linear(28*28, 512)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(512,512)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(512,10)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        # self.droput = nn.Dropout(0.2)
        
    def forward(self,x):
        # flatten image input
        x = x.view(-1,28*28)
        # add hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add dropout layer
        #x = self.droput(x)
         # add hidden layer, with relu activation function
        x = F.relu(self.fc2(x))
        # add dropout layer
        #x = self.droput(x)
        # add output layer
        x = self.fc3(x)
        return x

In [56]:
newmodel = FixNet()

newmodel.fc1.weight = torch.nn.Parameter(means[0])
newmodel.fc1.bias = torch.nn.Parameter(means[1])
newmodel.fc2.weight = torch.nn.Parameter(means[2])
newmodel.fc2.bias = torch.nn.Parameter(means[3])

In [13]:
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer = torch.optim.Adam(newmodel.parameters(),lr = 0.001)

In [58]:
import copy
# number of epochs to train the model
n_epochs = 100
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf  # set initial "min" to infinity
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    valid_loss = 0
    
     
    ###################
    # train the model #
    ###################
    newmodel.train() # prep model for training
    for data,label in train_loader:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = newmodel(data)
        # calculate the loss
        loss = criterion(output,label)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        prev = []
        prev.append(copy.deepcopy(newmodel.fc1.weight))
        prev.append(copy.deepcopy(newmodel.fc1.bias))
        prev.append(copy.deepcopy(newmodel.fc2.weight))
        prev.append(copy.deepcopy(newmodel.fc2.bias))
        curr = []
        curr.append(copy.deepcopy(newmodel.fc1.weight))
        curr.append(copy.deepcopy(newmodel.fc1.bias))
        curr.append(copy.deepcopy(newmodel.fc2.weight))
        curr.append(copy.deepcopy(newmodel.fc2.bias))
        
        for i in range(4):
            prev[i] = prev[i] * notmul[i]
        
        optimizer.step()

        curr[0] = curr[0]*mult[0]
        curr[1] = curr[1]*mult[1]
        curr[2] = curr[2]*mult[2]
        curr[3] = curr[3]*mult[3]
        
        newmodel.fc1.weight = torch.nn.Parameter(curr[0] + prev[0])
        newmodel.fc1.bias = torch.nn.Parameter(curr[1] + prev[1])
        newmodel.fc2.weight = torch.nn.Parameter(curr[2] + prev[2])
        newmodel.fc2.bias = torch.nn.Parameter(curr[3] + prev[3])
        # update running training loss
        train_loss += loss.item() * data.size(0)
        
        
     ######################    
    # validate the model #
    ######################
    newmodel.eval()  # prep model for evaluation
    for data,label in valid_loader:
        # forward pass: compute predicted outputs by passing inputs to the model
        output = newmodel(data)
        # calculate the loss
        loss = criterion(output,label)
        # update running validation loss 
        valid_loss = loss.item() * data.size(0)
    
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = train_loss / len(train_loader.sampler)
    valid_loss = valid_loss / len(valid_loader.sampler)
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        valid_loss
        ))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(newmodel.state_dict(), './fixnet/modelmean.pt')
        valid_loss_min = valid_loss
    if train_loss < 0.00000001:
        break

Epoch: 1 	Training Loss: 1.399738 	Validation Loss: 0.001751
Validation loss decreased (inf --> 0.001751).  Saving model ...
Epoch: 2 	Training Loss: 1.015103 	Validation Loss: 0.001707
Validation loss decreased (0.001751 --> 0.001707).  Saving model ...
Epoch: 3 	Training Loss: 0.894935 	Validation Loss: 0.001492
Validation loss decreased (0.001707 --> 0.001492).  Saving model ...
Epoch: 4 	Training Loss: 0.827743 	Validation Loss: 0.000785
Validation loss decreased (0.001492 --> 0.000785).  Saving model ...
Epoch: 5 	Training Loss: 0.782266 	Validation Loss: 0.000874
Epoch: 6 	Training Loss: 0.749263 	Validation Loss: 0.001082
Epoch: 7 	Training Loss: 0.724209 	Validation Loss: 0.001696
Epoch: 8 	Training Loss: 0.702961 	Validation Loss: 0.001816
Epoch: 9 	Training Loss: 0.685066 	Validation Loss: 0.001849
Epoch: 10 	Training Loss: 0.670683 	Validation Loss: 0.001414
Epoch: 11 	Training Loss: 0.658166 	Validation Loss: 0.001880
Epoch: 12 	Training Loss: 0.646750 	Validation Loss: 0.0

In [60]:
newmodel.load_state_dict(torch.load('./fixnet/modelmean.pt'))

<All keys matched successfully>

In [61]:
# initialize lists to monitor test loss and accuracy
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
newmodel.eval() # prep model for evaluation
for data, target in test_loader:
    output = newmodel(data)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(len(target)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1
# calculate and print avg test loss
test_loss = test_loss/len(test_loader.sampler)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.489185

Test Accuracy of     0: 95% (933/980)
Test Accuracy of     1: 97% (1112/1135)
Test Accuracy of     2: 87% (899/1032)
Test Accuracy of     3: 82% (836/1010)
Test Accuracy of     4: 72% (710/982)
Test Accuracy of     5: 81% (731/892)
Test Accuracy of     6: 90% (868/958)
Test Accuracy of     7: 87% (898/1028)
Test Accuracy of     8: 72% (702/974)
Test Accuracy of     9: 76% (769/1009)

Test Accuracy (Overall): 84% (8458/10000)


In [8]:
import torch.nn as nn
import torch.nn.functional as F
# define NN architecture
class FixNetMean(nn.Module):
    def __init__(self):
        super(FixNetMean,self).__init__()
        # number of hidden nodes in each layer (512)
        hidden_1 = 512
        hidden_2 = 512
        # linear layer (784 -> hidden_1)
        self.fc1 = nn.Linear(10, 512)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(512,512)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(512,10)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        # self.droput = nn.Dropout(0.2)
        
    def forward(self,x):
        # flatten image input
        # add hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add dropout layer
        #x = self.droput(x)
         # add hidden layer, with relu activation function
        x = F.relu(self.fc2(x))
        # add dropout layer
        #x = self.droput(x)
        # add output layer
        x = self.fc3(x)
        return x

In [15]:
newmodel = FixNetMean()
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer = torch.optim.Adam(newmodel.parameters(),lr = 0.001)
n_epochs = 100
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf  # set initial "min" to infinity
for epoch in range(n_epochs):
    # monitor losses
    train_loss = 0
    valid_loss = 0
    
     
    ###################
    # train the model #
    ###################
    newmodel.train() # prep model for training
    for data,label in train_loader:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        input = []
        for i,model in enumerate(models):            
            input.append(model(data))
        input = torch.cat(input,dim=1)
        output = newmodel(input)
        # calculate the loss
        loss = criterion(output,label)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item() * data.size(0)
        
        
     ######################    
    # validate the model #
    ######################
    newmodel.eval()  # prep model for evaluation
    for data,label in valid_loader:
        # forward pass: compute predicted outputs by passing inputs to the model
        input = []
        for i,model in enumerate(models):            
            input.append(model(data))
        input = torch.cat(input,dim=1)
        output = newmodel(input)
        # calculate the loss
        loss = criterion(output,label)
        # update running validation loss 
        valid_loss = loss.item() * data.size(0)
    
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = train_loss / len(train_loader.sampler)
    valid_loss = valid_loss / len(valid_loader.sampler)
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        valid_loss
        ))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(newmodel.state_dict(), './fixnet/modelmean.pt')
        valid_loss_min = valid_loss
    if train_loss < 0.00000001:
        break

Epoch: 1 	Training Loss: 0.339501 	Validation Loss: 0.000011
Validation loss decreased (inf --> 0.000011).  Saving model ...
Epoch: 2 	Training Loss: 0.096339 	Validation Loss: 0.000000
Validation loss decreased (0.000011 --> 0.000000).  Saving model ...
Epoch: 3 	Training Loss: 0.090018 	Validation Loss: 0.000003
Epoch: 4 	Training Loss: 0.035976 	Validation Loss: 0.000000
Epoch: 5 	Training Loss: 0.028270 	Validation Loss: 0.000000
Epoch: 6 	Training Loss: 0.029549 	Validation Loss: 0.000002
Epoch: 7 	Training Loss: 0.026488 	Validation Loss: 0.000000
Epoch: 8 	Training Loss: 0.027351 	Validation Loss: 0.000000
Epoch: 9 	Training Loss: 0.021588 	Validation Loss: 0.000000
Epoch: 10 	Training Loss: 0.023199 	Validation Loss: 0.000000
Epoch: 11 	Training Loss: 0.022579 	Validation Loss: 0.000000
Epoch: 12 	Training Loss: 0.023297 	Validation Loss: 0.001774
Epoch: 13 	Training Loss: 0.019368 	Validation Loss: 0.000000
Epoch: 14 	Training Loss: 0.019253 	Validation Loss: 0.000000
Epoch: 1

In [10]:
newmodel = FixNetMean()
newmodel.load_state_dict(torch.load("./fixnet/modelmean.pt"))

<All keys matched successfully>

In [14]:
# initialize lists to monitor test loss and accuracy
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
newmodel.eval() # prep model for evaluation
for data, target in test_loader:
    input = []
    for i,model in enumerate(models):            
        input.append(model(data))
    input = torch.cat(input,dim=1)
    output = newmodel(input)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(len(target)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1
# calculate and print avg test loss
test_loss = test_loss/len(test_loader.sampler)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))


Test Loss: 1.664680

Test Accuracy of     0: 98% (965/980)
Test Accuracy of     1: 98% (1123/1135)
Test Accuracy of     2: 97% (1002/1032)
Test Accuracy of     3: 99% (1002/1010)
Test Accuracy of     4: 98% (967/982)
Test Accuracy of     5: 95% (856/892)
Test Accuracy of     6: 96% (920/958)
Test Accuracy of     7: 96% (988/1028)
Test Accuracy of     8: 95% (933/974)
Test Accuracy of     9: 96% (972/1009)

Test Accuracy (Overall): 97% (9728/10000)
