In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
#Training parameters
n_epochs = 14
batch_size_train = 25
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

In [None]:
random_seed = 3
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)


<torch._C.Generator at 0x7fa020f78a10>

In [None]:
#Load datasets
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(root='./data', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.RandomAffine(degrees = (-0,0),translate=(0.1,0.1)),  
                               torchvision.transforms.ToTensor(),
                               
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(root='./data', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.RandomAffine(degrees = (-0,0),translate=(0.1,0.1)),  
                               torchvision.transforms.ToTensor(),
                               
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 5, kernel_size=5)
    self.conv2 = nn.Conv2d(5, 5, kernel_size=5)
    self.conv2_drop = nn.Dropout2d()
    self.fc1 = nn.Linear(80, 10)
    
#The forward part needs to be implemented manualy
  def forward(self, x):
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    x = x.view(-1, 80)
    x = F.relu(self.fc1(x))
    return F.log_softmax(x) 

In [None]:
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
print(network.named_parameters)
print( sum(p.numel() for p in network.parameters() if p.requires_grad) )
print("Versus How many trainable parameters there are in the model in tiny dnn: 25450")

<bound method Module.named_parameters of Net(
  (conv1): Conv2d(1, 5, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(5, 5, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=80, out_features=10, bias=True)
)>
1570
Versus How many trainable parameters there are in the model in tiny dnn: 25450


In [None]:
def train(epoch):
  network.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = network(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset),100. * batch_idx / len(train_loader), loss.item()))
     # train_losses.append(loss.item())
      #train_counter.append((batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
      torch.save(network.state_dict(), 'model.pth')
      torch.save(optimizer.state_dict(), 'optimizer.pth')

def test():
  network.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))

In [None]:
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()

  from ipykernel import kernelapp as app



Test set: Avg. loss: 2.2994, Accuracy: 1189/10000 (12%)


Test set: Avg. loss: 0.7300, Accuracy: 7856/10000 (79%)


Test set: Avg. loss: 0.4587, Accuracy: 8616/10000 (86%)


Test set: Avg. loss: 0.3687, Accuracy: 8899/10000 (89%)


Test set: Avg. loss: 0.3309, Accuracy: 9006/10000 (90%)


Test set: Avg. loss: 0.2965, Accuracy: 9134/10000 (91%)


Test set: Avg. loss: 0.2856, Accuracy: 9136/10000 (91%)


Test set: Avg. loss: 0.2625, Accuracy: 9215/10000 (92%)


Test set: Avg. loss: 0.2589, Accuracy: 9211/10000 (92%)


Test set: Avg. loss: 0.2644, Accuracy: 9205/10000 (92%)


Test set: Avg. loss: 0.2566, Accuracy: 9231/10000 (92%)


Test set: Avg. loss: 0.2535, Accuracy: 9235/10000 (92%)


Test set: Avg. loss: 0.2583, Accuracy: 9193/10000 (92%)


Test set: Avg. loss: 0.2475, Accuracy: 9272/10000 (93%)


Test set: Avg. loss: 0.2380, Accuracy: 9227/10000 (92%)



In [None]:
test()

  from ipykernel import kernelapp as app



Test set: Avg. loss: 0.2401, Accuracy: 9269/10000 (93%)



In [None]:
""" Tests of variouse architectures: manual CNN power archi 97% in 5 epochs  :             21840 parameters
2 Conv layers with 8 filters of size 5 : 1 fc : 128 entry of fc : 95% after 7 epochs :     3106 parameters 
2 Conv layers with 6 filters each of size 5: 1 fc : 96 entry of fc : 94% after 14 epochs : 2032 parameters *
1 Conv layer with 6 filters of size 5 : 1fc: 864 :  93% after 12 epochs:                   8806 parameters     The MLP is memory eats all memory
2 Conv layers with 5 filters each of size 5 : 1fc 80 : 93% after  14 epochs :              1570 parameters

We can conclude that the general approach on Starknet is to grow a NN in width not height. Computation is cheap memory not.

2 Conv layers with 6 filters each of size 7 : 1fc 24 : 91% after 13 epochs :            2320 parameters


"""

' Tests of variouse architectures: manual CNN power archi 97% in 5 epochs  :             21840 parameters\n2 Conv layers with 8 filters of size 5 : 1 fc : 128 entry of fc : 95% after 7 epochs :     3106 parameters \n2 Conv layers with 6 filters each of size 5: 1 fc : 96 entry of fc : 94% after 14 epochs : 2032 parameters\n1 Conv layer with 6 filters of size 5 : 1fc: 864 :  93% after 12 epochs:                   8806 parameters\n'