In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd "/content/drive/MyDrive/deep_learning_hw/foundations-dl-hw1/"

In [1]:
import torch
import itertools
import pandas as pd

from src.train import train, MultipleTrainResults
from src.cifar10_dataset import trainloader, testloader
from src.models import CNN

from src.utils import init_func__zero_mean_gaussian, get_sgd_optimizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

>> Initializing data with parameters: BATCH_SIZE=64, DATA_FRACTION=0.1
Files already downloaded and verified
Files already downloaded and verified


device(type='cpu')

In [10]:
from torch import nn
p_dropout = 0
hidden_layer_dim = 784
num_classes = 10
layers = [
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3,3)),
            nn.Dropout(p=p_dropout),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),

            nn.Conv2d(in_channels=64, out_channels=16, kernel_size=(3,3)),
            nn.Dropout(p=p_dropout),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),

            nn.Flatten(),
            nn.Linear(576, hidden_layer_dim),
            nn.Dropout(p=p_dropout),
            nn.ReLU(),

            nn.Linear(hidden_layer_dim, num_classes)
        ]

model = nn.Sequential(*layers)
x = torch.randn(1, 3, 32, 32)
model(x).shape

torch.Size([1, 10])

In [9]:
def get_optimal_baseline_training_params(hidden_layer_dim: int = 784, 
                                         hidden_layers_count: int = 1,
                                         flattened_img_dim: int = 3072):
  model = CNN(hidden_layer_dim = hidden_layer_dim, 
              hidden_layers_count = hidden_layers_count,
              flattened_img_dim = flattened_img_dim)
  optimizer = get_sgd_optimizer(model, lr=0.001, momentum=0.9)
  baseline_parameters = dict(
    model=model,
    optimizer=optimizer,
    init_func=init_func__zero_mean_gaussian(std=0.1),
    trainloader=trainloader,
    testloader=testloader,
    num_epochs=100,
  )

  return baseline_parameters

## [0] Run training sanity check

In [None]:
model = CNN()
results = train(
    model=model,
    init_func=init_func__zero_mean_gaussian(std=1),
    optimizer=get_sgd_optimizer(model, lr=0.001, momentum=0.9),
    trainloader=trainloader,
    testloader=testloader,
    num_epochs=3,
    device=device
)

best_epoch, best_acc = results.test_accuracies.argmax(), results.test_accuracies.max()
print(f"Best test-set accuracy: {best_acc} in epoch {best_epoch}")

results.report()

## [1] Grid search for baseline HParams

In [None]:
# Full grid-search that was saved in CSV

lrs = [0.001, 0.01, 0.1, 0.2, 0.3]
momentums = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
stds = [0.1, 0.5, 1.0]

grid_search_results = []  # params --> best-test-accuracy

for lr, momentum, std in itertools.product(lrs, momentums, stds):
    print(f">> Training with: lr={lr}, momentum={momentum}, std={std} --------------------------")
    model = CNN()
    results = train(
      model=model,
      init_func=init_func__zero_mean_gaussian(std=std),
      optimizer=get_sgd_optimizer(model, lr=lr, momentum=momentum),
      trainloader=trainloader,
      testloader=testloader,
      num_epochs=50,
    )    
    results.get_accuracies_curve().show()

    grid_search_results.append(dict(lr=lr, momentum=momentum, std=std, 
                                    best_test_acc=results.test_accuracies.max(), 
                                    best_train_acc=results.train_accuracies.max()))
# Save results to CSV
pd.DataFrame(grid_search_results).to_csv('./report/q3-cnn-grid-search.csv')


# Grid search results:
# pd.read_csv('./report/q3-cnn-grid-search.csv')

#### Optimal baseline hyper parameters found:

In [None]:
def get_optimal_baseline_training_params(hidden_layer_dim: int = 784, 
                                         hidden_layers_count: int = 1,
                                         flattened_img_dim: int = 3072):
  model = BaselineNN(hidden_layer_dim = hidden_layer_dim, 
                     hidden_layers_count = hidden_layers_count,
                     flattened_img_dim = flattened_img_dim)
  optimizer = get_sgd_optimizer(model, lr=0.001, momentum=0.9)
  baseline_parameters = dict(
    model=model,
    optimizer=optimizer,
    init_func=init_func__zero_mean_gaussian(std=0.1),
    trainloader=trainloader,
    testloader=testloader,
    num_epochs=100,
  )

  return baseline_parameters

results = train(**get_optimal_baseline_training_params())

results.report()

## [2] Optimization Impact

In [None]:
from src.utils import get_adam_optimizer

train_params = get_optimal_baseline_training_params()

# Override the optimizer of the model
for lr in [train_params['optimizer'].defaults['lr'], 0.005]: #[0.0001, 0.001, 0.01, 0.05]
  adam_opt = get_adam_optimizer(model=train_params['model'], lr=lr)
  train_params.update({'optimizer': adam_opt})

  print(train_params)
  results = train(**train_params)

  results.report()

## [3] Initialization Impact

In [None]:
from src.utils import init_func__xavier

train_params = get_optimal_baseline_training_params()
train_params.update({'init_func': init_func__xavier()})
print(train_params)

results = train(**train_params)
results.report()

## [4] Regularization

## [5] Preprocessing

In [10]:
from src.cifar10_dataset import trainloader_PCA, testloader_PCA

train_params = get_optimal_baseline_training_params(flattened_img_dim = 500)
train_params.update({'trainloader': trainloader_PCA, 'testloader': testloader_PCA})
print(train_params)

results = train(**train_params)
results.report()

## [6] Network Width

In [None]:
train_params = get_optimal_baseline_training_params()

result_data = {}

for dim in [6, 10, 12]:
  train_params = get_optimal_baseline_training_params(hidden_layer_dim = 2**dim)
  print(train_params)
  result_data[dim] = train(**train_params)

In [None]:
import pickle
with open("3_6_res.pkl", "wb") as f:
  pickle.dump(result_data, f)

In [None]:
result_comparison = MultipleTrainResults(result_data)
result_comparison.get_accuracies_curve().show()
result_comparison.get_losses_curve().show()

None


## [7] Network Depth

In [None]:
result_data = {}

for dim in [3]:
  train_params = get_optimal_baseline_training_params(hidden_layer_dim = 64, hidden_layers_count = 1)
  print(train_params)
  result_data[dim] = train(**train_params)

In [None]:
import pickle
with open("3_7_res.pkl", "wb") as f:
  pickle.dump(result_data, f)

In [None]:
result_comparison = MultipleTrainResults(result_data)
result_comparison.get_accuracies_curve().show()
result_comparison.get_losses_curve().show()