In [2]:
from timeit import default_timer as timer
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import ExponentialLR
import torch
import torch.nn as nn

from torchvision import datasets
from torch.optim import SGD
from torch.optim import Adam

import neptune
import lovely_tensors as lt

from neptune_creds import api_token


lt.monkey_patch()

device = "cuda" if torch.cuda.is_available() else "cpu"
print( f"device: {device}" )


# Umschalten zwischen Colab oder lokaler Installation
USING_COLAB = False

if USING_COLAB:
  from google.colab import drive
  from google.colab.patches import cv2_imshow
  drive.mount('/content/drive')

device: cpu


Download and load the training data

In [3]:
train_set = datasets.MNIST('data/', download=True, train=True)
train_images = train_set.data
train_targets = train_set.targets

In [4]:
test_set = datasets.MNIST('data/', download=True, train=False)
test_images = test_set.data
test_targets = test_set.targets

In [8]:
class MNISTDataset(Dataset):
    def __init__(self, x, y):
        x = x.float()/255
        x = x.unsqueeze(1)
        x = x.view(-1,28*28)
        self.x, self.y = x, y
    def __getitem__(self, ix):
        x, y = self.x[ix], self.y[ix]
        return x.to(device), y.to(device)
    def __len__(self):
        return len(self.x)


def init_weights(m):
  if type(m) == nn.Linear:
    # m.weight.data.fill_(1)
    # m.weight.data.uniform_(-0.1, 0.1)
    m.weight.data.normal_(0.0, 0.1)
    if m.bias is not None:
      m.bias.data.fill_(0)
      
      
def train_batch(x, y, model, optimizer, loss_fn):
  model.train()
  prediction = model(x)
  batch_loss = loss_fn(prediction, y)
  batch_loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  return batch_loss.item()


def accuracy(x, y, model):
  model.eval()
  with torch.no_grad():
    prediction = model(x)
  max_values, argmaxes = prediction.max(-1)
  is_correct = argmaxes == y
  return is_correct.cpu().numpy().tolist()


def loss(x, y, model, loss_fn):
  model.eval()
  with torch.no_grad():
    prediction = model(x)
    loss = loss_fn(prediction, y)
  return loss.item()


def get_data(batch_size=32):
    train = MNISTDataset(train_images, train_targets)
    train_dl = DataLoader(train, batch_size=batch_size, shuffle=True)
    test = MNISTDataset(test_images, test_targets)
    test_dl = DataLoader(test, batch_size=len(test_images), shuffle=True)
    return train_dl, test_dl


In [11]:
def get_model():
  """return a model, loss_fn and optimizer and implement a weight decay"""
  model = nn.Sequential(
        # nn.Conv2d(1, 32, 3, 1),
        # nn.ReLU(),
        # # nn.BatchNorm2d(32),
        # nn.Conv2d(32, 64, 3, 1),
        # nn.ReLU(),
        # # nn.BatchNorm2d(64),
        # nn.MaxPool2d(2),
        # nn.Dropout(0.25),
        # nn.Flatten(),
        # nn.Linear(9216, 128),
        # nn.ReLU(),
        # nn.Dropout(0.5),
        # nn.Linear(128, 10),
        # nn.LogSoftmax(dim=1)
    nn.Linear(28 * 28, 30),
    nn.Tanh(),
    nn.Linear(30, 20),
    nn.Tanh(),
    nn.Linear(20, 10),
    
    # nn.Linear(28 * 28, 600),
    # nn.Tanh(),
    
    # nn.Linear(600, 300),
    # nn.Tanh(),
  
    # nn.Linear(300, 150),
    # nn.Tanh(),
    
    # nn.Linear(150, 50),
    # nn.Sigmoid(),
    
    # nn.Linear(50, 30),
    # nn.Sigmoid(),
    
    # nn.Linear(30, 20),
    # nn.Sigmoid(),
    
    # nn.Linear(20, 10),
    nn.LogSoftmax(dim=1)
    ).to(device)
  loss_fn = nn.CrossEntropyLoss()
  # loss_fn = nn.NLLLoss()
  optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
  # optimizer = SGD(model.parameters(), lr=1e-2)
  return model, loss_fn, optimizer

run = neptune.init_run(
    project="radlfabs/DLO-MNIST-OLD-SCHOOL",
    api_token=api_token,
)

batch_size = 64
epochs = 150

train_dl, test_dl = get_data(batch_size)
model, loss_fn, optimizer = get_model()
# scheduler = ExponentialLR(optimizer, gamma=0.9)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, verbose=True)

run["init_weights"] = "normal"
run["model"] = model.__str__()
run["optimizer"] = optimizer.__str__()
run["loss_function"] = loss_fn.__str__()
run["dataset"] = "MNIST"
run["batch_size"] = batch_size
run["lr_scheduler"] = scheduler.__str__() if scheduler else None
run["epochs"] = epochs
#----------------------------------------------
# Training >>>
#
print('Starting training...')

model.apply(init_weights)  # hier werden die initialen Gewichte des Netzes zufällig gesetzt

# epochs = 150

early_stop = False
best_loss = float('inf')
epochs_no_improve = 0
consecutive_epochs_criterion = 3
epsilon = 1e-3

arrPlotX = []
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []
for epoch in range(epochs):
  timeBeginEpoch = timer()
  train_epoch_losses, train_epoch_accuracies = [], []
  
  for ix, batch in enumerate(iter(train_dl)):
    x, y = batch
    batch_loss = train_batch(x, y, model, optimizer, loss_fn)
    train_epoch_losses.append(batch_loss)
    is_correct = accuracy(x, y, model)
    train_epoch_accuracies.extend(is_correct)

  train_epoch_loss = np.array(train_epoch_losses).mean()
  train_epoch_accuracy = np.mean(train_epoch_accuracies)
 
  for ix, batch in enumerate(iter(test_dl)):
    x, y = batch
    val_is_correct = accuracy(x, y, model)
    validation_loss = loss(x, y, model, loss_fn)
    
  val_epoch_accuracy = np.mean(val_is_correct)
  arrPlotX.append(epoch)
  train_losses.append(train_epoch_loss)
  train_accuracies.append(train_epoch_accuracy)
  test_losses.append(validation_loss)
  test_accuracies.append(val_epoch_accuracy)
  run["train/loss"].log(train_epoch_loss)
  run["train/accuracy"].log(train_epoch_accuracy)
  run["test/loss"].log(validation_loss)
  run["test/accuracy"].log(val_epoch_accuracy)
  timeEndEpoch = timer()
  print( f"epoch: {epoch}  train_acc: {100 * train_epoch_accuracy:.2f}%  test_acc: {100 * val_epoch_accuracy:.2f}%  took {timeEndEpoch-timeBeginEpoch:.1f}s" )   
  scheduler.step(validation_loss)
  
    # Check for early stopping
  if validation_loss < best_loss - epsilon:
      best_loss = validation_loss
      epochs_no_improve = 0
  else:
      epochs_no_improve += 1
      if epochs_no_improve == consecutive_epochs_criterion:
          early_stop = True
          print("Early stopping criterion met")
          break
        
    # Check if early stopping criterion is met
  if early_stop:
    break
  
if USING_COLAB:
  torch.save(model.state_dict(), '/content/drive/My Drive/ColabNotebooks/results/nnMnist_exp01.pt')
else:
  torch.save(model.state_dict(), 'nnMnist_exp01.pt')
  run["model_file"].upload('nnMnist_exp01.pt')

fig = plt.figure()
plt.plot(arrPlotX, train_accuracies, label='train accuracy')
plt.plot(arrPlotX, test_accuracies,   label='test accuracy')
plt.legend()
run["plot/accuracy"].upload(fig)
plt.show()

if USING_COLAB:
  plt.savefig('/content/drive/My Drive/ColabNotebooks/results/accuracies_exp0.png')
else:
  plt.savefig('accuracies_exp0.png')

fig = plt.figure()
plt.plot(arrPlotX, train_losses, label='train')
plt.plot(arrPlotX, test_losses, label='test')
plt.legend()
run["plot/loss"].upload(fig)
plt.show()

if USING_COLAB:
  plt.savefig('/content/drive/My Drive/ColabNotebooks/results/losses_exp0.png')
else:
  plt.savefig('losses_exp0.png')
  
run.stop()

https://app.neptune.ai/radlfabs/DLO-MNIST-OLD-SCHOOL/e/DLOM-47
Starting training...
epoch: 0  train_acc: 85.94%  test_acc: 92.62%  took 5.6s
epoch: 1  train_acc: 93.86%  test_acc: 94.24%  took 5.2s
epoch: 2  train_acc: 95.13%  test_acc: 95.07%  took 5.0s
epoch: 3  train_acc: 95.79%  test_acc: 95.52%  took 5.1s
epoch: 4  train_acc: 96.30%  test_acc: 96.01%  took 7.2s
epoch: 5  train_acc: 96.66%  test_acc: 96.33%  took 6.0s
epoch: 6  train_acc: 96.95%  test_acc: 96.30%  took 5.5s
epoch: 7  train_acc: 97.23%  test_acc: 96.63%  took 5.8s
epoch: 8  train_acc: 97.32%  test_acc: 96.30%  took 5.8s
epoch: 9  train_acc: 97.47%  test_acc: 96.56%  took 7.9s
epoch: 10  train_acc: 97.50%  test_acc: 96.68%  took 6.4s
epoch: 11  train_acc: 97.63%  test_acc: 96.60%  took 5.2s
epoch: 12  train_acc: 97.75%  test_acc: 96.68%  took 6.6s
epoch: 13  train_acc: 97.79%  test_acc: 96.58%  took 6.8s
epoch: 14  train_acc: 97.81%  test_acc: 96.72%  took 6.3s
epoch: 15  train_acc: 97.88%  test_acc: 96.75%  took 6.3

KeyboardInterrupt: 

In [14]:
# discover the tensor shapes of the layers in the model
for name, param in model.named_parameters():
  print(name, param.shape)

0.weight torch.Size([600, 784])
0.bias torch.Size([600])
2.weight torch.Size([300, 600])
2.bias torch.Size([300])
4.weight torch.Size([150, 300])
4.bias torch.Size([150])
6.weight torch.Size([50, 150])
6.bias torch.Size([50])
8.weight torch.Size([30, 50])
8.bias torch.Size([30])
10.weight torch.Size([20, 30])
10.bias torch.Size([20])
12.weight torch.Size([10, 20])
12.bias torch.Size([10])


In [12]:
model

Sequential(
  (0): Linear(in_features=784, out_features=30, bias=True)
  (1): Tanh()
  (2): Linear(in_features=30, out_features=20, bias=True)
  (3): Tanh()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): LogSoftmax(dim=1)
)

In [13]:
# save the network
torch.save(model.state_dict(), 'models/nnMnist97p.pt')