In [None]:
import os
from os.path import dirname, abspath, join

import sys
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# Load local libraries
# ROOT contais the project directory 
os.chdir(dirname(os.getcwd()))
os.getcwd()

In [None]:
# Import local libraries
from src.logger import set_logger as sl

# Load transformations
from src.transformations.CC_RRC_RVF_RHF_RA import get_transform as gt_train
from src.transformations.CC_R import get_transform as gt_valid
from src.dataset import FundusDataset
from src.RAMdatset import FundusDatasetRAM
from src.plot_images import plot_images
from src.train_validation import train_validation_phase

In [None]:
# Set-up logger
try:
    if logger is None:
        logger = sl("info")
except:
    logger = sl("info")

# Parameters

In [None]:
# Put data
epochs        = 100
lr            = 0.001
batch_size    = 512

num_workers   = os.cpu_count()

is_pretrained = False

im_size       = (300, 300)
is_grayscale  = False

data          = abspath('/mnt/data/ukbb_300x300/')
target_train  = abspath('./data/ukbb_metadata_qc_train.csv')
target_valid  = abspath('./data/ukbb_metadata_qc_validation.csv')

name = f"raw-bs{batch_size}-lr{str(lr)[2:]}"

model_pth     = abspath(f'./output/models/{name}.pth')

device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
######################################################################################
# Initial report
logger.info(f"System Python version  = {sys.version}")
logger.info(f"System Pytorch version = {torch.__version__}")
logger.info(f"System usable CPUs     = {os.cpu_count()}")

# Device setting and CUDA report 
logger.info(f"System training device = {device}")
if  torch.cuda.is_available():
    logger.info(f"System CUDA version    = {torch.version.cuda}")
    logger.info(f"System CUDA count      = {torch.cuda.device_count()}")
    logger.info(f"System CUDA name       = {torch.cuda.get_device_name()}")
    
# Load train and validation dataset
logger.info(f"Path working directory = {os.getcwd()}")
logger.info(f"Path input data        = {data}")
logger.info(f"Path to target train   = {target_train}")
logger.info(f"Path to target valid   = {target_valid}")
logger.info(f"Path model             = {model_pth}")

# Starting momentum and learning rate
logger.info(f"Model learning rate    = {lr}")
logger.info(f"Model batch_size       = {batch_size}")
logger.info(f"Model num_workers      = {num_workers}")
logger.info(f"Model pretrained       = {is_pretrained}")

# Dataset

In [None]:
#imgs_dir=None, target_file=None, scale=1, target_size=512, augment=True
logger.info(f"Loading datasets")
dataset = dict()
dataset["train"] = FundusDatasetRAM(imgs_dir=data, target_file=target_train, target="sex", augment=True)
dataset["valid"] = FundusDatasetRAM(imgs_dir=data, target_file=target_valid, target="sex", augment=False)

# Get the size of the datsets train and valid(ation)
size = dict()
size["train"] = len(dataset["train"])
size["valid"] = len(dataset["valid"])
logger.info(f"Dataset size training   = {size['train']}")
logger.info(f"Dataset size validation = {size['valid']}")
logger.info(f"Dataset labels/classes  = {dataset['train'].classes}")

In [None]:
# Dataloader for train and valid(ation)
logger.info(f"Loading dataloaders")
dataloader = dict()
dataloader["train"] = DataLoader(dataset["train"], batch_size=batch_size, shuffle=True,  num_workers=num_workers, pin_memory=True)
dataloader["valid"] = DataLoader(dataset["valid"], batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False)

######################################################################################
# Get sample images
logger.info(f"Plot sample images")
plot_images(dataloader["valid"], dataset["valid"].classes, title="Sample Fundus", save=name)

In [None]:
# Load model
model = models.inception_v3(pretrained=is_pretrained,
                            aux_logits=False)

logger.info(f"Model loading            = {model.__class__.__name__}")

# Get number of features in the model
n_ftrs = model.fc.in_features
logger.info(f"Model number of features = {n_ftrs}")

# Add linear layer to the data
model.fc = nn.Linear(n_ftrs, len(dataset["train"].classes))

######################################################################################
# Send model to device, 
# if CUDA count is greater than 1 use multiple GPUs
model = model.to(device)
usable_devices =list(range(torch.cuda.device_count()))
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model, device_ids=usable_devices, output_device=device)
    logger.info(f"Let's use {torch.cuda.device_count()} GPUs!")

######################################################################################
# Get Optimization and crossentropy
criterion = nn.CrossEntropyLoss()

#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))

######################################################################################
results = train_validation_phase(model=model,
                               dataset=dataset,
                               dataloader = dataloader,
                               device=device,
                               epochs=epochs,
                               criterion=criterion,
                               optimizer=optimizer,
                               save = model_pth,
                               logger=logger)