In [1]:
import torch
# import matplotlib.pyplot as plt

from utility import data_prep, evaluation, common
from models.cnn_mnist import CNN_Network
from models.ann_fixed import FixedFullyConnectedNetwork
from models.ann import FullyConnectedNetwork
from models.model_utility import train_network, test_network, visualize_network
from pprint import pformat

In [6]:
# Experiment parameters

params = {}
params['data_dir'] = 'data'
params['device'] = 'cpu'    # cpu, cuda
params['epochs'] = 3
params['batch_size'] = 20 # 256
params['optimizer'] = 'entropy_per_layer'     # sgd, sgd_momentum, adagrad, adam, entropy_per_layer, entropy_per_neuron ...
params['learning_rate'] = 0.1

params['ann_input_nodes'] = 28 * 28
params['output_nodes'] = 10
# params['ann_layer_units'] = [64, 32, 16]
# params['ann_layer_units'] = [128, 64]
params['ann_layer_units'] = [50, 50]
params['ann_layer_activations'] = ['relu', 'relu', 'relu']
# params['ann_layer_dropout_rates'] = [0.2, 0.2, 0.2]
params['ann_layer_dropout_rates'] = [0.0, 0.0, 0.0]

In [7]:
# Init
print('Experiment parameters')
print(pformat(params))

assert params['device'] in ['cpu', 'cuda']
if params['device'] == 'cuda':
    print('Running on GPU (CUDA)')
    assert torch.cuda.is_available()
elif params['device'] == 'cpu':
    print('Running on CPU')
device = torch.device(params['device'])

Experiment parameters
{'ann_input_nodes': 784,
 'ann_layer_activations': ['relu', 'relu', 'relu'],
 'ann_layer_dropout_rates': [0.0, 0.0, 0.0],
 'ann_layer_units': [50, 50],
 'batch_size': 20,
 'data_dir': 'data',
 'device': 'cpu',
 'epochs': 3,
 'learning_rate': 0.1,
 'optimizer': 'entropy_per_layer',
 'output_nodes': 10}
Running on CPU


In [4]:
# Load data
trainset, testset = data_prep.get_mnist_preprocessed(params['data_dir'], params['batch_size'])

Preprocessing MNIST training set
Preprocessing MNIST test set


In [8]:
# Create and train model
# cnn_net = CNN_Network()
# cnn_net = FixedFullyConnectedNetwork(input_size=28*28, output_size=10)
cnn_net = FullyConnectedNetwork(params)
visualize_network(cnn_net, trainset)

history = train_network(cnn_net, trainset, params, device)

ModuleList(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.0, inplace=False)
  (3): Linear(in_features=50, out_features=50, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.0, inplace=False)
  (6): Linear(in_features=50, out_features=10, bias=True)
)
Number of model parameters =  42310
Training neural network
[epoch: 1/3]	time: 3.2	train_loss: 0.4097
[epoch: 2/3]	time: 3.1	train_loss: 0.2052
[epoch: 3/3]	time: 3.1	train_loss: 0.1692
Finished Training. Time taken: 9.33 sec, 0.16 min


In [9]:
# Test model
test_network(cnn_net, testset, device)

Accuracy of the network on the 10000 test images: 95 %


In [None]:
# Visualization
evaluation.plot_training_history(history)
evaluation.plot_entropy_history(history)

Plotting training history
