In [1]:
import torch
import os
from utils import set_up_logging
from config import LOGS_PATH

set_up_logging(LOGS_PATH)

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
f"Using device {device}"

'Using device cuda:0'

In [2]:
from scipy.stats import loguniform, uniform, randint
from models import MODELS, test_models


hyperparameters = [
    {
        "batch_size": [32, 64, 128],
        "edit_count": [12],
        "bin_count": [16],
        "learning_rate": loguniform(5e-4, 5e-3),
        "scheduler_gamma": uniform(loc=0.8, scale=0.15),
        "num_epochs": [12],
        "elu_alpha": uniform(0.5, 1.5),
        "leaky_relu_slope": uniform(0, 0.03),
        "dropout_prob": uniform(0, 0.1),
        "features": [[16, 32, 64], [32, 64, 128], [8, 16, 32], [8, 8, 8], [16, 16, 16]],
        "kernel_sizes": [[3, 3, 3]],
        "model_type": ["Residual3"],  # list(MODELS.keys()),
        "clip_gradients": [True, False],
        "use_instance_norm": [True, False],
        "use_elu": [True, False],
        "leaky_relu_alpha": uniform(0, 0.05),
    }
]

test_models()

2024-06-25 08:59:52,244 - INFO - Testing model Dummy
2024-06-25 08:59:52,249 - INFO - Test passed! Output shape matches input shape.
2024-06-25 08:59:52,249 - INFO - Testing model SimpleCNN
2024-06-25 08:59:52,746 - INFO - Test passed! Output shape matches input shape.
2024-06-25 08:59:52,752 - INFO - Testing model Residual
2024-06-25 08:59:53,853 - INFO - Test passed! Output shape matches input shape.
2024-06-25 08:59:53,917 - INFO - Testing model Residual3
2024-06-25 08:59:55,590 - INFO - Test passed! Output shape matches input shape.


In [3]:
# train(
#     {
#         "batch_size": 128,
#         "edit_count": 12,
#         "bin_count": 16,
#         "learning_rate": 1e-3,
#         "scheduler_gamma": 0.8,
#         "elu_alpha": 1,
#         "dropout_prob": 0.05,
#         "features": [8, 16, 32],
#         "kernel_sizes": [3, 3, 3],
#         "num_epochs": 12,
#         "model_type": "Residual3",
#         "clip_gradients": True,
#         "use_instance_norm": True,
#         "use_elu": False,
#         "leaky_relu_alpha": 0.01,
#     }
# )

In [4]:
from training import random_hparam_search
from config import RUNS_PATH, TRAIN_DATA, TEST_DATA, MODELS_PATH


random_hparam_search(
    hyperparameters=hyperparameters,
    train_data_paths=TRAIN_DATA,
    test_data_paths=TEST_DATA,
    models_path=MODELS_PATH,
    tensorboard_path=RUNS_PATH,
    timeout_hours=8,
    device=device,
)

2024-06-25 08:59:55,973 - INFO - Starting run_170 with hparams {
  "batch_size": 64,
  "bin_count": 16,
  "clip_gradients": true,
  "dropout_prob": 0.09784778880383105,
  "edit_count": 12,
  "elu_alpha": 0.5588538605400805,
  "features": [
    8,
    16,
    32
  ],
  "kernel_sizes": [
    3,
    3,
    3
  ],
  "leaky_relu_alpha": 0.012913890161555076,
  "leaky_relu_slope": 0.022615416455484896,
  "learning_rate": 0.002130094098871897,
  "model_type": "Residual3",
  "num_epochs": 12,
  "scheduler_gamma": 0.8142448793722726,
  "use_elu": true,
  "use_instance_norm": false
}
2024-06-25 08:59:55,976 - INFO - Loaded 1000 original images
2024-06-25 08:59:55,979 - INFO - Loaded 1000 original images
Tensor-likes are not close!

Mismatched elements: 442 / 262144 (0.2%)
Greatest absolute difference: 0.00028640031814575195 at index (14, 0, 3, 6, 1) (up to 1e-05 allowed)
Greatest relative difference: 0.03313953488372093 at index (52, 0, 2, 8, 3) (up to 1e-05 allowed)
  _check_trace(
2024-06-25 0