# Importing libraries

In [1]:
IS_EXPERIMENT = True

if IS_EXPERIMENT:
    
    from comet_ml import Experiment
    experiment = Experiment(
        api_key="VCyOxE7IjP9abOTTSwvn2gfs4",
        project_name="pointcloudfcd",
        workspace="mrugnivenko",
    )

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/mrugnivenko/pointcloudfcd/73f3c4f662224adca3fb8d56ff3f8cbe



In [2]:
import os
import time
import json
import uuid
import yaml
import utils
import pickle
import datetime
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import KFold

from utils.train import *

import warnings
warnings.filterwarnings("ignore")

%config Completer.use_jedi = False

# Specifying parameters of model

In [3]:
"""
There 3 types of model training:
- full: on the whole brain 
- temple: on the temple part of the brain
- nottemple: on the whole brain without temple part 
"""
BRAIN_TYPE = 'full'

"""
There are two MRI modalities:
- t1
- t2
"""
BRAIN_MODALITY = 't2'

"""Whether to use smart sampling"""
SMART_SAMPLING = True

"""Type of Local Agragation Operators"""
CFG = 'cfgs/brain_pospoolxyz.yaml'

"""
There 3 GPUs:
- 0
- 1
"""
DEVICE = 0
torch.cuda.set_device(f"cuda:{DEVICE}")

"""Batch size for loaders"""
BATCH_SIZE = 16

"""Crop size for loaders"""
CROP_SIZE = 64

"""Whether to use absolute coordinates of points or relative ones"""
IS_RETURN_ABS_COORDS = True

"""Whether to use only meaningful points, which are not air-points"""
IS_RETURN_PC_WITHOUT_AIR_POINTS = False

"""Percent of False return (take crop inside FCD region) in flip of biased coin"""
COIN_FLIP_THRESHOLD = 0.8

"""
There are 2 types of loss functions:
- BCE
- DICE
"""
LOSS_TYPE = 'BCE'

"""Whether to calculate weights for weighted loss function according to the quantity of positive class or not"""
WEIGHTED_LOSS = True

"""Maximal number of epochs"""
EPOCHS = 400

"""Whether to fine-tune model or to train from scratch"""
FINE_TUNE = False

"""Path to pretrained model"""
if FINE_TUNE:
    PRETRAINED_MODEL_PATH = ''
else:
    PRETRAINED_MODEL_PATH = None

"""Patience for early stopping"""
PATIENCE = 150

"""Date of experiment"""
DATE = str(datetime.date.today())

"""Unique name of experiment"""
EXP_NAME = str(uuid.uuid4())
if IS_EXPERIMENT:
    experiment.set_name(EXP_NAME)

"""How often to conduct validation stage"""
LOGGING_PERIOD = 10

In [4]:
config = config_seting(CFG)

for name, parametr in zip(['BRAIN_TYPE', 'BRAIN_MODALITY', 'BATCH_SIZE', 'CROP_SIZE', 'IS_RETURN_ABS_COORDS',
                           'IS_RETURN_PC_WITHOUT_AIR_POINTS', 'COIN_FLIP_THRESHOLD',
                           'LOSS_TYPE', 'WEIGHTED_LOSS', 'EPOCHS', 'FINE_TUNE', 'PRETRAINED_MODEL_PATH',
                           'PATIENCE', 'DATE', 'EXP_NAME', 'LOGGING_PERIOD', 'SMART_SAMPLING'],
                          [BRAIN_TYPE, BRAIN_MODALITY, BATCH_SIZE, CROP_SIZE, IS_RETURN_ABS_COORDS,
                           IS_RETURN_PC_WITHOUT_AIR_POINTS, COIN_FLIP_THRESHOLD,
                           LOSS_TYPE, WEIGHTED_LOSS, EPOCHS, FINE_TUNE, PRETRAINED_MODEL_PATH,
                           PATIENCE, DATE, EXP_NAME, LOGGING_PERIOD, SMART_SAMPLING]):
    config[name] = parametr

In [5]:
"""Specifying path to data"""
if SMART_SAMPLING:
    path_to_data = f"dataset_ready_to_use/data_only_usefull_areas/{BRAIN_TYPE}"
else:
    path_to_data = f"dataset_ready_to_use/data_full/{BRAIN_TYPE}"
config['path_to_data'] = path_to_data

"""Specifying subjects for training and testing"""
path_to_allowed_subjects = 'dataset_ready_to_use/sub_with_all_data.npy'
if path_to_allowed_subjects:
    allowed_subjects = np.load(path_to_allowed_subjects, allow_pickle=True).tolist()
else:
    allowed_subjects = [name for name in os.listdir(f"radiologist/preprocessed_data/label")]    
config['subjects'] = allowed_subjects

"""Specifying features"""
FEATURES = ['brains']
config['FEATURES'] = FEATURES
config['input_features_dim'] = 3 + len(FEATURES)

"""Specifying cross-validation split"""
kf = KFold(n_splits=5, shuffle=True, random_state=42)

"""Adding dictionaries for noramlization"""
MEANS, STDS = {}, {}
for feature in FEATURES:
    MEANS[feature] = 0
    STDS[feature] = 1  
    
config['MEANS'] = MEANS
config['STDS'] = STDS

In [6]:
"""Saving parameters of experiment"""
if IS_EXPERIMENT:
    os.makedirs(f'experiments/{EXP_NAME}')
    with open(f'experiments/{EXP_NAME}/config.json', 'w') as file:
        json.dump(config, file)
    pickle.dump(kf, open(f'experiments/{EXP_NAME}/kfold.pkl' ,'wb'))

# Data loading

In [7]:
data_dict = {}

for feature in FEATURES:
    data_dict[feature] = [f"{path_to_data}/{BRAIN_MODALITY}_{feature}/{subject}.nii" for subject in allowed_subjects]
data_dict['labels'] = [f"{path_to_data}/labels/{subject}.nii" for subject in allowed_subjects]
data_dict['brains'] = [f"{path_to_data}/{BRAIN_MODALITY}_brains/{subject}.nii" for subject in allowed_subjects]

# Training and validation

In [9]:
if IS_EXPERIMENT:
    os.makedirs(f'experiments/{EXP_NAME}/weights')

In [10]:
torch.cuda.empty_cache()

In [None]:
train(data_dict, kf, config, experiment, IS_EXPERIMENT)

0it [00:00, ?it/s]

Weights are: [0.021024703979492188, 0.9789752960205078]
MEANS are: {'brains': 287.81573486328125}
STDS are: {'brains': 240.9251251220703}
Train dataset created
Test dataset created
size of train dataset: 64
size of test dataset: 16



  0%|          | 0/400 [00:00<?, ?it/s][A
  0%|          | 1/400 [00:54<6:01:16, 54.33s/it][A
  0%|          | 2/400 [01:13<4:05:01, 36.94s/it][A
  1%|          | 3/400 [01:31<3:21:41, 30.48s/it][A
  1%|          | 4/400 [01:49<3:01:21, 27.48s/it][A
  1%|▏         | 5/400 [02:09<2:50:53, 25.96s/it][A
  2%|▏         | 6/400 [02:28<2:42:59, 24.82s/it][A
  2%|▏         | 7/400 [02:46<2:35:54, 23.80s/it][A
  2%|▏         | 8/400 [03:05<2:31:48, 23.24s/it][A
  2%|▏         | 9/400 [03:24<2:28:06, 22.73s/it][A
  2%|▎         | 10/400 [03:52<2:31:16, 23.27s/it][A
  3%|▎         | 11/400 [04:12<2:28:59, 22.98s/it][A
  3%|▎         | 12/400 [04:31<2:26:10, 22.60s/it][A
  3%|▎         | 13/400 [04:48<2:23:21, 22.23s/it][A
  4%|▎         | 14/400 [05:06<2:20:52, 21.90s/it][A
  4%|▍         | 15/400 [05:25<2:19:05, 21.68s/it][A
  4%|▍         | 16/400 [05:45<2:18:06, 21.58s/it][A
  4%|▍         | 17/400 [06:04<2:16:41, 21.41s/it][A
  4%|▍         | 18/400 [06:22<2:15:26, 21.27s/

Weights are: [0.01922607421875, 0.98077392578125]
MEANS are: {'brains': 283.073974609375}
STDS are: {'brains': 233.25994873046875}
Train dataset created
Test dataset created
size of train dataset: 64
size of test dataset: 16



  0%|          | 1/400 [00:30<3:24:46, 30.79s/it][A
  0%|          | 2/400 [00:49<2:42:51, 24.55s/it][A
  1%|          | 3/400 [01:07<2:29:32, 22.60s/it][A
  1%|          | 4/400 [01:27<2:23:43, 21.78s/it][A
  1%|▏         | 5/400 [01:45<2:18:52, 21.09s/it][A
  2%|▏         | 6/400 [02:06<2:17:54, 21.00s/it][A
  2%|▏         | 7/400 [02:25<2:16:01, 20.77s/it][A
  2%|▏         | 8/400 [02:44<2:14:27, 20.58s/it][A
  2%|▏         | 9/400 [03:04<2:13:40, 20.51s/it][A
  2%|▎         | 10/400 [03:33<2:18:43, 21.34s/it][A
  3%|▎         | 11/400 [03:52<2:17:11, 21.16s/it][A
  3%|▎         | 12/400 [04:11<2:15:17, 20.92s/it][A
  3%|▎         | 13/400 [04:29<2:13:43, 20.73s/it][A
  4%|▎         | 14/400 [04:48<2:12:35, 20.61s/it][A
  4%|▍         | 15/400 [05:08<2:11:59, 20.57s/it][A
  4%|▍         | 16/400 [05:27<2:11:06, 20.49s/it][A
  4%|▍         | 17/400 [05:46<2:09:59, 20.37s/it][A
  4%|▍         | 18/400 [06:05<2:09:11, 20.29s/it][A
  5%|▍         | 19/400 [06:24<2:08: