# Shahir Rahman

In [1]:
import os
import json

import numpy as np
from joblib import Parallel, delayed

import torch
import torchvision
from torchsummary import summary

from project_18408.datasets import *
from project_18408.evaluation import *
from project_18408.experiments import *
from project_18408.utils import *

In [2]:
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)
# Detect if we have a GPU available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")

PyTorch Version: 1.8.0
Torchvision Version: 0.9.0
Using the GPU!


In [3]:
configs_fname = "experiment_configs_mnist.json"

In [4]:
with open(configs_fname, 'r') as f:
    configs = json.load(f)
configs = [ExperimentConfig.from_dict(c) for c in configs]

In [5]:
print(configs[0])

{'dataset_config': {'dataset_type': 'img', 'dataset_config': {'img_dataset_type': 'mnist', 'num_train_samples': 10000, 'num_test_samples': 10000, 'new_input_size': None, 'flatten': True, 'augment': False, 'corrupt_frac': 1.0, 'seed': 0}}, 'model_config': {'model_type': 'relu_toy', 'model_config': {'input_dim': 784, 'output_dim': 10, 'layer_dims': [100], 'bias': False, 'seed': None}}, 'training_config': {'optimizer_type': 'sgd_momentum', 'loss_type': 'cross_entropy', 'lr': 0.01, 'num_epochs': 20, 'clip_grad_norm': False, 'weight_decay': 0.0, 'use_lr_schedule': False, 'epoch_lr_decay_steps': None, 'lr_decay_gamma': None}, 'trial_index': 0}


In [6]:
data_dir = get_rel_pkg_path("dataset/")
experiment_dir = get_rel_pkg_path("experiments/")

In [7]:
manager = ExperimentManager(data_dir, experiment_dir)

In [8]:
def process_training(config):
    manager.add_experiment(config, exist_ok=True)
    setup, state = manager.run_training(config,
                                        device,
                                        num_workers=2, # this is required for joblib
                                        pin_memory=False,
                                        completed_ok=True)

In [None]:
for c in configs:
    process_training(c)

  0%|                                                                                          | 0/469 [00:00<?, ?it/s]

Epoch 1/20
----------
Training


Avg. Loss: 2.3042, Total Loss: 2.3001, Loss Parts: [2.3001]: 100%|███████████████████| 469/469 [00:12<00:00, 38.39it/s]
  0%|                                                                                           | 0/79 [00:00<?, ?it/s]

Training Loss: 2.3042
Training Accuracy: 0.0993
Testing


100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:10<00:00,  7.41it/s]
  0%|                                                                                          | 0/469 [00:00<?, ?it/s]

Testing loss 2.3029
Testing accuracy 0.0971

Epoch 2/20
----------
Training


Avg. Loss: 2.3016, Total Loss: 2.3070, Loss Parts: [2.3070]: 100%|███████████████████| 469/469 [00:11<00:00, 40.40it/s]
  0%|                                                                                           | 0/79 [00:00<?, ?it/s]

Training Loss: 2.3016
Training Accuracy: 0.1079
Testing


100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:10<00:00,  7.24it/s]
  0%|                                                                                          | 0/469 [00:00<?, ?it/s]

Testing loss 2.3035
Testing accuracy 0.0992

Epoch 3/20
----------
Training


Avg. Loss: 2.3004, Total Loss: 2.3041, Loss Parts: [2.3041]: 100%|███████████████████| 469/469 [00:11<00:00, 40.69it/s]
  0%|                                                                                           | 0/79 [00:00<?, ?it/s]

Training Loss: 2.3004
Training Accuracy: 0.1124
Testing


100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:09<00:00,  8.05it/s]
  0%|                                                                                          | 0/469 [00:00<?, ?it/s]

Testing loss 2.3038
Testing accuracy 0.1019

Epoch 4/20
----------
Training


In [None]:
#out = Parallel(n_jobs=3, verbose=100, prefer="threads")(delayed(process_training)(c) for c in configs)