In [1]:
print('..running')
import os
import torch
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
from util import cross_entropy_loss_fn
import models
import wandb
from train import evaluation, training 
from data import load_data

os.environ['WANDB_NOTEBOOK_NAME'] = 'hyperparameter_optimization.ipynb'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

result_dir = 'models'
if not(os.path.exists(result_dir)):
    os.mkdir(result_dir)
name = 'made'#Change to regularized

sweep_config = {
    'method': 'grid'
    }
metric = {
'name': 'test_loss',
'goal': 'minimize'   
}

sweep_config['metric'] = metric

parameters_dict = {
'D': {
    'value': 784
    },
'M': {
    'values': [4000, 8000]
    },
'lam': {
    'values': [0.1, 0.5, 1.0]
    },
'num_epochs': {
    'value': 30
    },
'lr': {
    'values': [1e-2, 1e-3, 1e-4]
    },
'batch_size': {
    'values': [64, 128]
    },
'n_masks': {
    'value': 1
    },
'max_patience': {
    'value': 10
    },
}


sweep_config['parameters'] = parameters_dict
sweep_id = wandb.sweep(sweep_config, project="made_hyperparameter_optimization")

def hyperparameter_sweep(config=None):
    with wandb.init(config=config):
        config = wandb.config
        train_data, val_data, test_data = load_data('mnist', binarize = True)
        train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=os.cpu_count())
        val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False, num_workers=os.cpu_count())
        test_loader = DataLoader(test_data, batch_size=config.batch_size, shuffle=False, num_workers=os.cpu_count())
        model = models.MADE(input_dim=config.D, hidden_dims=[config.M], n_masks=config.n_masks).to(device)
        optimizer = torch.optim.Adam([p for p in model.parameters() if p.requires_grad == True], lr = config.lr)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5)
        _ = training(name=name, result_dir=result_dir, max_patience=config.max_patience, num_epochs=config.num_epochs, model=model, loss_fn=cross_entropy_loss_fn, optimizer=optimizer,
                        scheduler=scheduler, training_loader=train_loader, val_loader=val_loader, device=device, lam=config.lam)
        model.eval()
        test_val = evaluation(test_loader, cross_entropy_loss_fn, model_best=model)
        wandb.log({"test_loss": test_val * config.batch_size})

wandb.agent(sweep_id, hyperparameter_sweep)

..running


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 96t4czhe
Sweep URL: https://wandb.ai/rajpal906/made_hyperparameter_optimization/sweeps/96t4czhe


[34m[1mwandb[0m: Agent Starting Run: tl37k74j with config:
[34m[1mwandb[0m: 	D: 784
[34m[1mwandb[0m: 	M: 8000
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	lam: 10
[34m[1mwandb[0m: 	lr: 0.05428654335506235
[34m[1mwandb[0m: 	max_patience: 10
[34m[1mwandb[0m: 	n_masks: 1
[34m[1mwandb[0m: 	num_epochs: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33ms2592586[0m ([33mrajpal906[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch: 0, train nll=495.46337890625, val nll=3.4369427286783854
saved!
FINAL LOSS: nll=3.4465126251220704


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_loss,▁
train_loss,▁
val_loss,▁

0,1
epoch,0.0
test_loss,3.44651
train_loss,495.46338
val_loss,3.43694


[34m[1mwandb[0m: Agent Starting Run: i8t0wlty with config:
[34m[1mwandb[0m: 	D: 784
[34m[1mwandb[0m: 	M: 1000
[34m[1mwandb[0m: 	batch_size: 48
[34m[1mwandb[0m: 	lam: 1
[34m[1mwandb[0m: 	lr: 0.003210099737100103
[34m[1mwandb[0m: 	max_patience: 10
[34m[1mwandb[0m: 	n_masks: 1
[34m[1mwandb[0m: 	num_epochs: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 0, train nll=207.107666015625, val nll=2.2184019711812337
saved!
FINAL LOSS: nll=2.193197046661377


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_loss,▁
train_loss,▁
val_loss,▁

0,1
epoch,0.0
test_loss,2.1932
train_loss,207.10767
val_loss,2.2184


[34m[1mwandb[0m: Agent Starting Run: 8qviid0t with config:
[34m[1mwandb[0m: 	D: 784
[34m[1mwandb[0m: 	M: 1000
[34m[1mwandb[0m: 	batch_size: 48
[34m[1mwandb[0m: 	lam: 10
[34m[1mwandb[0m: 	lr: 0.060291400727110006
[34m[1mwandb[0m: 	max_patience: 10
[34m[1mwandb[0m: 	n_masks: 1
[34m[1mwandb[0m: 	num_epochs: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 0, train nll=478.0517578125, val nll=4.603689666748047
saved!
FINAL LOSS: nll=4.637733697509765


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_loss,▁
train_loss,▁
val_loss,▁

0,1
epoch,0.0
test_loss,4.63773
train_loss,478.05176
val_loss,4.60369


[34m[1mwandb[0m: Agent Starting Run: g06zi212 with config:
[34m[1mwandb[0m: 	D: 784
[34m[1mwandb[0m: 	M: 500
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	lam: 1
[34m[1mwandb[0m: 	lr: 0.009392026795759336
[34m[1mwandb[0m: 	max_patience: 10
[34m[1mwandb[0m: 	n_masks: 1
[34m[1mwandb[0m: 	num_epochs: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 0, train nll=216.70138549804688, val nll=3.6436473172505695
saved!
FINAL LOSS: nll=3.6130837928771973


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_loss,▁
train_loss,▁
val_loss,▁

0,1
epoch,0.0
test_loss,3.61308
train_loss,216.70139
val_loss,3.64365


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wez1s990 with config:
[34m[1mwandb[0m: 	D: 784
[34m[1mwandb[0m: 	M: 8000
[34m[1mwandb[0m: 	batch_size: 136
[34m[1mwandb[0m: 	lam: 1
[34m[1mwandb[0m: 	lr: 0.0637729418742657
[34m[1mwandb[0m: 	max_patience: 10
[34m[1mwandb[0m: 	n_masks: 1
[34m[1mwandb[0m: 	num_epochs: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch: 0, train nll=319.82501220703125, val nll=1.0572994028727214
saved!
FINAL LOSS: nll=1.0306237930297852


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁
test_loss,▁
train_loss,▁
val_loss,▁

0,1
epoch,0.0
test_loss,1.03062
train_loss,319.82501
val_loss,1.0573
