In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

## Lets get the data, model and setup training code

In [2]:
train_loader = DataLoader(datasets.MNIST("./", train=True, transform=transforms.ToTensor(), download=True), batch_size=128, shuffle=True)
test_loader = DataLoader(datasets.MNIST("./", train=False, transform=transforms.ToTensor(), download=True), batch_size=128, shuffle=False)

print(f"Training images {len(train_loader.dataset)}, Test images {len(test_loader.dataset)}")

Training images 60000, Test images 10000


In [3]:
class mnist_model(nn.Module):
  def __init__(self):
    super(mnist_model, self).__init__()
    self.layer1 = nn.Conv2d(1, 5, kernel_size=2, stride=2, padding=0)
    self.layer2 = nn.Linear(980, 100, bias=True)
    self.layer3 = nn.Linear(100, 10, bias=True)
    self.act = nn.ReLU()

  def forward(self, x):
    out = self.act(self.layer1(x))
    out = out.view(-1, 980)
    out = self.act(self.layer2(out))
    out = self.layer3(out)
    return out

  def output(self, x):
    out1 = self.act(self.layer1(x))
    out1 = out1.view(-1, 980)
    out2 = self.act(self.layer2(out1))
    out3 = self.layer3(out2)
    return out1, out2, out3

In [4]:
model = mnist_model()
print(model)

epochs = 15
lr = 0.1

USE_MSE = False
optimizer = optim.SGD(model.parameters(), lr=lr)
criterion = nn.MSELoss() if USE_MSE else nn.CrossEntropyLoss()
lrs = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)

mnist_model(
  (layer1): Conv2d(1, 5, kernel_size=(2, 2), stride=(2, 2))
  (layer2): Linear(in_features=980, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (act): ReLU()
)


## Training

In [5]:
def get_acc(model, loader):
  correct = 0
  total = 0
  for img, label in loader:
    correct += torch.sum(torch.argmax(model(img), -1) == label).item()
    total += len(img)
  return 100*correct/total

In [7]:
for e in range(epochs):
  print("lr", optimizer.param_groups[0]["lr"])
  for img, label in train_loader:
    # print(img.shape, label.shape)
    out = model(img)
    label_onehot = nn.functional.one_hot(label, num_classes=10).float()
    # print(out.shape)
    optimizer.zero_grad()
    loss = criterion(out, label_onehot)
    loss.backward()
    optimizer.step()
  lrs.step()
  print(f"Epoch {e}, training accuracy {get_acc(model, train_loader)}, test accuracy {get_acc(model, test_loader)}")

lr 0.1
Epoch 0, training accuracy 93.02, test accuracy 93.27
lr 0.09890738003669029
Epoch 1, training accuracy 95.65666666666667, test accuracy 95.84
lr 0.09567727288213004
Epoch 2, training accuracy 96.67, test accuracy 96.49
lr 0.09045084971874738
Epoch 3, training accuracy 97.29, test accuracy 97.21
lr 0.08345653031794292
Epoch 4, training accuracy 97.71666666666667, test accuracy 97.21
lr 0.07500000000000001


KeyboardInterrupt: 

## Extract weights

In [None]:
params = [(name, p.data.cpu().numpy()) for (name, p) in model.named_parameters()]

In [None]:
for (name, p) in params:
  print(f"Layer {name.split('.')[0]}, type {name.split('.')[1]}, shape {p.shape}")

In [None]:
#print(params)

## Visualize hidden activations

In [None]:
# print(model.children())
# out = list(model.children())[0](img.cuda()).data.cpu().numpy()

In [None]:
# import matplotlib.pyplot as plt
# %matplotlib inline

# for _ in range(out.shape[1]):
#   plt.figure(figsize=(1, 1))
#   plt.imshow(out[0, 0], cmap="gray")

In [None]:
for img, label in train_loader:
  break

In [None]:
from google.colab import drive
drive.mount('/content/drive')
path = "/content/drive/My Drive/Colab Notebooks/Falcon Neural Network/Sarda/"

In [None]:
import os

np.savetxt(fname=path+"input_0", delimiter=" ", X=img.cuda().view(-1, 784).tolist())
np.savetxt(fname=path+"outputlayer1_0", delimiter=" ", X=model.output(img.cuda().view(128, 1, 28, 28))[0].tolist())
np.savetxt(fname=path+"outputlayer2_0", delimiter=" ", X=model.output(img.cuda().view(128, 1, 28, 28))[1].tolist())
np.savetxt(fname=path+"outputlayer3_0", delimiter=" ", X=model.output(img.cuda().view(128, 1, 28, 28))[2].tolist())

np.savetxt(fname=path+"weight1_0", delimiter=" ", X=params[0][1].reshape(2*2*1, 5).tolist())
np.savetxt(fname=path+"bias1_0", delimiter=" ", X=params[1][1].tolist())
np.savetxt(fname=path+"weight2_0", delimiter=" ", X=params[2][1].tolist())
np.savetxt(fname=path+"bias2_0", delimiter=" ", X=params[3][1].tolist())
np.savetxt(fname=path+"weight3_0", delimiter=" ", X=params[4][1].tolist())
np.savetxt(fname=path+"bias3_0", delimiter=" ", X=params[5][1].tolist())