In [1]:
import torch
from model import load_resnet_model
from train_regression_weighted_loss_loaded import train_model
from dataloader import create_dataloader

from utils import sorted_file_paths
import torch.nn as nn
import torch.optim as optim

2024-02-02 14:54:35.921382: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-02 14:54:35.921430: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-02 14:54:36.143140: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-02 14:54:36.635198: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Setting up common variables for all experiments

In [2]:
LR = 0.001
EPOCH = 30

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_files = sorted_file_paths('./data/records/train/')
val_files = sorted_file_paths('./data/records/val/')

train_loaders = [create_dataloader(file, True, 64) for file in train_files]
val_loaders = [create_dataloader(file, True, 64) for file in val_files]

training_labels = []
for train_loader in train_loaders:
    for _, labels in train_loader:
        training_labels.extend(labels.detach().cpu().numpy())

# Experiment 1: L2 loss

In [8]:
ex = 'ex_1'
loss =  nn.MSELoss()
model = load_resnet_model('resnet50', num_classes=1)
optimizer = optim.Adam(model.parameters(), LR)

train_model(model, loss, optimizer, train_loaders, val_loaders, device, ex, epochs=EPOCH)



# Experiment 2: L3 loss

In [None]:
from weighted_L3_loss import L3Loss

ex = 'ex_2'
loss =  L3Loss()
model = load_resnet_model('resnet50', num_classes=1)
optimizer = optim.Adam(model.parameters(), LR)

train_model(model, loss, optimizer, train_loaders, val_loaders, device, ex, epochs=EPOCH)



KeyboardInterrupt: 

# Experiment 3: Weighted MSE with dense_weight model, alpha = 0.25

In [None]:
from dense_weight import DenseWeight
from weighted_MSE_loss import WeightedMSELoss

In [None]:
ex = 'ex_3'

dense_weight_model = DenseWeight(0.25)
dense_weight_model.fit(training_labels)
loss =  WeightedMSELoss(dense_weight_model)
model = load_resnet_model('resnet50', num_classes=1)
optimizer = optim.Adam(model.parameters(), LR)

train_model(model, loss, optimizer, train_loaders, val_loaders, device, ex, epochs=EPOCH)



MAE for bin 2: 1.5641959
MAE for bin 3: 1.1126256
MAE for bin 4: 1.0491617
MAE for bin 5: 0.9847947
MAE for bin 6: 0.9906604
MAE for bin 1: 60.09752
MAE for bin 2: 50.60432
MAE for bin 3: 54.209522
MAE for bin 4: 58.770176
MAE for bin 5: 61.25448
MAE for bin 6: 61.864296
Epoch 1/1, Train Loss: 2.0621, Train R2: -9.1192, Train weighted MAE: 1.4322,Val Loss: 4109.0274, Val R2: -22487.6166, Val weighted MAE: 58.1779
Model saved as checkpoint_epoch_1.pth
Model saved as final_model.pth
Training completed and final model saved.


# Experiment 4: Weighted MSE with dense_weight model, alpha = 0.5

In [None]:
ex = 'ex_4'

dense_weight_model = DenseWeight(0.5)
dense_weight_model.fit(training_labels)
loss =  WeightedMSELoss(dense_weight_model)
model = load_resnet_model('resnet50', num_classes=1)
optimizer = optim.Adam(model.parameters(), LR)

train_model(model, loss, optimizer, train_loaders, val_loaders, device, ex, epochs=EPOCH)



MAE for bin 2: 1.1206676
MAE for bin 3: 0.9540928
MAE for bin 4: 0.9106966
MAE for bin 5: 0.96642876
MAE for bin 6: 1.1671207
MAE for bin 1: 10.19127
MAE for bin 2: 19.229319
MAE for bin 3: 13.944123
MAE for bin 4: 13.980035
MAE for bin 5: 17.688894
MAE for bin 6: 25.890987
Epoch 1/1, Train Loss: 1.6738, Train R2: -7.6164, Train weighted MAE: 1.0850,Val Loss: 900.6723, Val R2: -5656.7559, Val weighted MAE: 12.1556
Model saved as checkpoint_epoch_1.pth
Model saved as final_model.pth
Training completed and final model saved.


# Experiment 5: Balanced MSE loss

In [None]:
from balanced_MSE_loss import BMCLoss

In [None]:
ex = 'ex_5'

init_noise_sigma = 1.0
sigma_lr = 0.001
loss = BMCLoss(init_noise_sigma)
model = load_resnet_model('resnet50', num_classes=1)
optimizer = optim.Adam(model.parameters(), lr=LR)
optimizer.add_param_group({'params': loss.noise_sigma, 'lr': sigma_lr, 'name': 'noise_sigma'})

train_model(model, loss, optimizer, train_loaders, val_loaders, device, ex, epochs=EPOCH)



MAE for bin 2: 1.4273541
MAE for bin 3: 1.1952238
MAE for bin 4: 1.7503417
MAE for bin 5: 1.7749552
MAE for bin 6: 2.318867
MAE for bin 1: 2.951308
MAE for bin 2: 1.9054776
MAE for bin 3: 4.6486855
MAE for bin 4: 4.2353673
MAE for bin 5: 2.5514913
MAE for bin 6: 2.5924032
Epoch 1/1, Train Loss: 8.6341, Train R2: -29.0568, Train weighted MAE: 1.4724,Val Loss: 11.1934, Val R2: -140.1235, Val weighted MAE: 2.7783
Model saved as checkpoint_epoch_1.pth
Model saved as final_model.pth
Training completed and final model saved.
