In [1]:
import os
import time
import torch
import sys
import torch.optim as optim
import itertools
import shutil
from torchvision import transforms as T
%matplotlib notebook

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


Sets all hyper-parameters options, and creates all of their combinations.

In [3]:
OVERFITTING_TRAINING     = True
OVERFITTING_TRAINING     = False

hyperparameters_filename = 'hyperparameters'
overfit_data_dir_path    = 'data_overfit/nyuv2'
normal_data_dir_path     = 'data/nyuv2'

IMAGE_SIZES              = [(224,224), (64, 64)] #(448, 448)
TRAIN_TEST_RATIOS        = [0.9]                                   # <-- MUST USE 0.5 FOR OVERFITTING
BATCH_SIZES              = [4]
NUM_WORKERSES            = [4]

BETASES                  = [(0.9, 0.99)]
LRS                      = [0.001]
MOMENTUMS                = [0.9]
WEIGHT_DECAYS            = [0.0005]

STEP_SIZES               = [1000]
GAMMAS                   = [0.1]

NUM_EPOCHSES             = [10]

all_combintations = list(itertools.product(*[IMAGE_SIZES, TRAIN_TEST_RATIOS, BATCH_SIZES, \
                                             NUM_WORKERSES, BETASES, LRS, MOMENTUMS, \
                                             WEIGHT_DECAYS, STEP_SIZES, GAMMAS, \
                                             NUM_EPOCHSES]))
CWD                 = os.getcwd()
if OVERFITTING_TRAINING:
    DATASET_DIR     = os.path.join(CWD, overfit_data_dir_path)
else:
    DATASET_DIR     = os.path.join(CWD, normal_data_dir_path)
print(DATASET_DIR)

C:\Users\tomav\Documents\GitHub\cs236781-project\data/nyuv2


Loops over all the combinations, trains, and saves both the models, and their hyper-parameters files.

In [4]:
current_model_id = 0
for combintation in all_combintations:
    IMAGE_SIZE       = combintation[0]
    TRAIN_TEST_RATIO = combintation[1]
    BATCH_SIZE       = combintation[2]
    NUM_WORKERS      = combintation[3]

    BETAS            = combintation[4]
    LR               = combintation[5]
    MOMENTUM         = combintation[6]
    WEIGHT_DECAY     = combintation[7]

    STEP_SIZE        = combintation[8]
    GAMMA            = combintation[9]

    NUM_EPOCHS       = combintation[10]

    with open(hyperparameters_filename + '.py', "w") as hyperparameters_file:
        print(f"IMAGE_SIZE={IMAGE_SIZE}", file=hyperparameters_file)
        print(f"TRAIN_TEST_RATIO={TRAIN_TEST_RATIO}", file=hyperparameters_file)
        print(f"BATCH_SIZE={BATCH_SIZE}", file=hyperparameters_file)
        print(f"NUM_WORKERS={NUM_WORKERS}", file=hyperparameters_file)
        print(f"", file=hyperparameters_file)
        print(f"BETAS={BETAS}", file=hyperparameters_file)
        print(f"LR={LR}", file=hyperparameters_file)
        print(f"MOMENTUM={MOMENTUM}", file=hyperparameters_file)
        print(f"WEIGHT_DECAY={WEIGHT_DECAY}", file=hyperparameters_file)
        print(f"", file=hyperparameters_file)
        print(f"STEP_SIZE={STEP_SIZE}", file=hyperparameters_file)
        print(f"GAMMA={GAMMA}", file=hyperparameters_file)
        print(f"", file=hyperparameters_file)
        print(f"NUM_EPOCHS={NUM_EPOCHS}", file=hyperparameters_file)

    # import here, to include the above new hyper-parameters file.
    from models import SpecialFuseNetModel
    from data_manager import rgbd_gradients_dataset, rgbd_gradients_dataloader
    from train import FuseNetTrainer

    print("Current Hyper-Parameters:")
    with open(hyperparameters_filename + '.py', "r") as hyperparameters_file:
        print(hyperparameters_file.read())
    
    rgbd_grads_ds = rgbd_gradients_dataset(root=DATASET_DIR, use_transforms=True)

    dl_train,dl_test = rgbd_gradients_dataloader(root=DATASET_DIR, use_transforms=True)

    # _ = plot.rgbd_gradients_dataset_first_n(dataset=rgbd_grads_ds,n=5)
    print(f'Found {len(rgbd_grads_ds)} images in dataset folder.')

    sample_batch = next(iter(dl_train))
    rgb_size = tuple(sample_batch['rgb'].shape[1:])
    depth_size = tuple(sample_batch['depth'].shape[1:])
    grads_size = tuple(sample_batch['x'].shape[1:])

    # Train
    fusenetmodel = SpecialFuseNetModel(rgb_size=rgb_size,depth_size=depth_size,grads_size=grads_size, device=device)
    trainer = FuseNetTrainer(model=fusenetmodel, device=device)
    checkpoint_folder = 'checkpoints/'
    checkpoint_file_name = 'special_fusenet_' + str(current_model_id)
    checkpoint_file = checkpoint_folder + checkpoint_file_name
    if os.path.isfile(f'{checkpoint_file}.pt'):
        os.remove(f'{checkpoint_file}.pt')
    res = trainer.fit(dl_train, dl_test, early_stopping=1000, print_every=10, checkpoints=checkpoint_file)
    
    # Save the current hyper-parameters file next to the current saved model.
    shutil.move(CWD + '/' + hyperparameters_filename + '.py', CWD + '/' + checkpoint_folder + hyperparameters_filename + '_' + checkpoint_file_name + '.py')
    
    print("-----------------------------------------------------------------")
    current_model_id += 1

Current Hyper-Parameters:
IMAGE_SIZE=(224, 224)
TRAIN_TEST_RATIO=0.9
BATCH_SIZE=4
NUM_WORKERS=4

BETAS=(0.9, 0.99)
LR=0.001
MOMENTUM=0.9
WEIGHT_DECAY=0.0005

STEP_SIZE=1000
GAMMA=0.1

NUM_EPOCHS=10

Found 1449 images in dataset folder.
[I] - default optimizer set: SGD(lr=0.001,momentum=0.9,weight_decay=0.0005)
[I] - default scheduler set: StepSR(step_size=1000,gamma=0.1)
--- EPOCH 1/10 ---
train_batch (0.069):   1%|▌                                                            | 3/327 [00:04<08:06,  1.50s/it]


KeyboardInterrupt: 