# Dataloaders

In [27]:
# Importing libraries
import torchio as tio
import glob
import numpy as np
import random
import os
import pickle

from collections import OrderedDict
from pathlib import Path

from tqdm import tqdm
import time

import torchio as tio
from torchio.transforms import (RescaleIntensity,RandomFlip,Compose, HistogramStandardization, CropOrPad, ToCanonical)

from sklearn.metrics import f1_score

from torch.utils.data import DataLoader
import torch
import torch.nn as nn

import matplotlib.pyplot as plt

from Functions_classification_training import UNet_1_layer, UNet_2_layer, Classifier

In [28]:
with open('../subjects_dict.pkl', 'rb') as f:
    subjects_dict = pickle.load(f)
    
# Remove CHP1 and ACH1 from dictionary
subjects_dict['CHIASM']['control'].remove('CHP1')
subjects_dict['CHIASM']['control'].remove('ACH1')

In [29]:
# Function used for splitting the list
def splitter(list_to_be_splitted, number_of_groups):
    a, b = divmod(len(list_to_be_splitted), number_of_groups)
    return (list_to_be_splitted[i*a+min(i,b):(i+1)*a+min(i+1,b)] for i in range(number_of_groups))

In [30]:
# Function returning trained model
def train_network(n_epochs, dataloaders, model, optimizer, criterion, device, save_path):
    
    track_train_loss = []
    track_dev_train_loss = []
    track_test_loss = []
    
    track_train_f1 = []
    track_dev_train_f1 = []
    track_test_f1 = []
    
    valid_loss_min = np.Inf
    
    model.to(device)
        
    for epoch in tqdm(range(1, n_epochs+1)):
        
        # Initialize loss monitoring variables
        train_loss = 0.0
        dev_train_loss = 0.0
        test_loss = 0.0
                
        # Training
        model.train()
        
        acc_targets=[]
        acc_predictions=[]
        
        for batch in dataloaders['train']:
            
            data = batch['chiasm']['data'].to(device)
            data.requires_grad = True
            
            optimizer.zero_grad()
            
            output=model(data)
            
            loss = criterion(output[:,0], batch['label'].to(device).float())
            loss.backward()
            
            optimizer.step()
            
            train_loss+= (loss.item()*len(batch['label']))
            
            acc_targets+=batch['label'][:].numpy().tolist()
            acc_predictions+=output.round().detach().cpu().numpy().tolist()
            
        track_train_loss.append(train_loss/len(dict_kfold_combined_training['train']))        
        track_train_f1.append(f1_score(acc_targets, acc_predictions, average='weighted')) 
            
        # Validation on dev_train dataset
        model.eval()
        
        acc_targets=[]
        acc_predictions=[]
        
        for batch in dataloaders['dev_train']:
            
            data = batch['chiasm']['data'].to(device)
            data.requires_grad = True
            
            with torch.no_grad():
                
                output = model(data)
                loss = criterion(output[:,0], batch['label'].to(device).float())
                
                dev_train_loss+= (loss.item()*len(batch['label']))
                
                acc_targets+=batch['label'][:].numpy().tolist()
                acc_predictions+=output.round().detach().cpu().numpy().tolist()
                
        track_dev_train_loss.append(dev_train_loss/len(dict_kfold_combined_training['dev_train']))
        track_dev_train_f1.append(f1_score(acc_targets, acc_predictions, average='weighted')) 
        
        acc_targets=[]
        acc_predictions=[]
        
        for batch in dataloaders['dev_test']:
            
            data = batch['chiasm']['data'].to(device)
            data.requires_grad = True
            
            with torch.no_grad():
                
                output = model(data)
                loss = criterion(output[:,0], batch['label'].to(device).float())
                
                test_loss+= (loss.item()*len(batch['label']))
                
                acc_targets+=batch['label'][:].numpy().tolist()
                acc_predictions+=output.round().detach().cpu().numpy().tolist()
                
        track_test_loss.append(test_loss/len(dict_kfold_combined_training['dev_test']))
        track_test_f1.append(f1_score(acc_targets, acc_predictions, average='weighted')) 
        
        if epoch%500 ==0:
            print('END OF EPOCH: {} \n Training loss per image: {:.6f}\n Training_dev loss per image: {:.6f}\n Test_dev loss per image: {:.6f}'.format(epoch, train_loss/len(dict_kfold_combined_training['train']),dev_train_loss/len(dict_kfold_combined_training['dev_train']),test_loss/len(dict_kfold_combined_training['dev_test'])))
            
        ## Save the model if reached min validation loss and save the number of epoch               
        if dev_train_loss < valid_loss_min:
            valid_loss_min = dev_train_loss
            torch.save(model.state_dict(),save_path+'optimal_weights')
            last_updated_epoch = epoch
        
            with open(save_path+'number_epochs.txt','w') as f:
                print('Epoch:', str(epoch), file=f)  
                
        # Early stopping
        if (epoch - last_updated_epoch) == 1000:
            break
                                
    # return trained model
    return track_train_loss, track_dev_train_loss, track_test_loss, track_train_f1, track_dev_train_f1, track_test_f1

In [31]:
# Dictionary with splits
'''
for dataset in subjects_dict.keys():
    for label in subjects_dict[dataset].keys():
        if(dataset=='CHIASM' and label=='albinism'):
            subjects_dict[dataset][label]=list(splitter(subjects_dict[dataset][label],9))
        else:
            subjects_dict[dataset][label]=list(splitter(subjects_dict[dataset][label],8))
            
# Save the dictionary
with open('design_kfold.pkl','wb') as f:
    pickle.dump(subjects_dict,f)
'''

"\nfor dataset in subjects_dict.keys():\n    for label in subjects_dict[dataset].keys():\n        if(dataset=='CHIASM' and label=='albinism'):\n            subjects_dict[dataset][label]=list(splitter(subjects_dict[dataset][label],9))\n        else:\n            subjects_dict[dataset][label]=list(splitter(subjects_dict[dataset][label],8))\n            \n# Save the dictionary\nwith open('design_kfold.pkl','wb') as f:\n    pickle.dump(subjects_dict,f)\n"

In [32]:
# Split the participants into 8 equal groups

#              train dev_train dev_test test1 test2
# control        6/8    1/8                     1/8
# UoN            7/8    1/8
# CHIASM                         1/8     6/8    1/8

groups=['train','dev_train','dev_test','test1','test2']

if not os.path.exists('../../1_Data/4_K-fold_combined'):
    os.makedirs('../../1_Data/4_K-fold_combined')

for i in range(8):
    
    output_folder='../../1_Data/4_K-fold_separated_classification'+'/'+str(i)

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Load the dictionary
    with open('design_kfold.pkl','rb') as f:
        kfold_design = pickle.load(f)

    design_kfold_combined={}

    
    # test2 - (i+1)-th group from CHIASM albinism + i-th group from all control groups

    design_kfold_combined['test2']={}
    design_kfold_combined['test2']['CHIASM']={}

    # CHIASM albinism
    design_kfold_combined['test2']['CHIASM']={}
    design_kfold_combined['test2']['CHIASM']['albinism']=kfold_design['CHIASM']['albinism'][i]
    design_kfold_combined['test2']['CHIASM']['control']=[]
    kfold_design['CHIASM']['albinism'].pop(i)
    
    # Other publicly available datasets of controls
    for dataset in ['ABIDE', 'Athletes', 'HCP', 'COBRE', 'Leipzig', 'MCIC']:
        design_kfold_combined['test2'][dataset]={}
        design_kfold_combined['test2'][dataset]['control']=kfold_design[dataset]['control'][i]
        kfold_design[dataset]['control'].pop(i)
    
    
    # dev_test

    design_kfold_combined['dev_test']={}

    # CHIASM albinism
    design_kfold_combined['dev_test']['CHIASM']={}
    if i==7:
        design_kfold_combined['dev_test']['CHIASM']['albinism']=kfold_design['CHIASM']['albinism'][0]
        design_kfold_combined['dev_test']['CHIASM']['control']=kfold_design['CHIASM']['control'][0]
        kfold_design['CHIASM']['albinism'].pop(0)
        kfold_design['CHIASM']['control'].pop(0)
    else:
        design_kfold_combined['dev_test']['CHIASM']['albinism']=kfold_design['CHIASM']['albinism'][i]
        design_kfold_combined['dev_test']['CHIASM']['control']=kfold_design['CHIASM']['control'][i]
        kfold_design['CHIASM']['albinism'].pop(i)
        kfold_design['CHIASM']['control'].pop(i)
    
    
    # test1
    
    design_kfold_combined['test1']={}
    design_kfold_combined['test1']['CHIASM']={}

    design_kfold_combined['test1']['CHIASM']['albinism']=[item for sublist in kfold_design['CHIASM']['albinism'] for item in sublist]
    design_kfold_combined['test1']['CHIASM']['control']=[item for sublist in kfold_design['CHIASM']['control'] for item in sublist]


    # dev_test

    design_kfold_combined['dev_train']={}

    design_kfold_combined['dev_train']['UoN']={}
    for label in kfold_design['UoN'].keys():
        design_kfold_combined['dev_train']['UoN'][label]=kfold_design['UoN'][label][i]
        kfold_design['UoN'][label].pop(i)
        
    for dataset in ['ABIDE', 'Athletes', 'HCP', 'COBRE', 'Leipzig', 'MCIC']:
        design_kfold_combined['dev_train'][dataset]={}
        if i==7:
            design_kfold_combined['dev_train'][dataset]['control']=kfold_design[dataset]['control'][0]
            kfold_design[dataset]['control'].pop(0)
        else:
            design_kfold_combined['dev_train'][dataset]['control']=kfold_design[dataset]['control'][i]
            kfold_design[dataset]['control'].pop(i)


    # train

    design_kfold_combined['train']={}

    for dataset in kfold_design.keys():
        
        if dataset=='CHIASM':
            continue
        else:   
            design_kfold_combined['train'][dataset]={}
            for label in kfold_design[dataset].keys():            
                design_kfold_combined['train'][dataset][label]=[item for sublist in kfold_design[dataset][label] for item in sublist]

    # Save the design
    with open(output_folder+'/kfold_design_'+str(i)+'.pkl','wb') as f:
        pickle.dump(design_kfold_combined, f)

    # Torchio's subjects' dictionary + upsample the albinism group, so it matches controls in train and dev_train + add labels

    print(i)
    #for group in design_kfold_combined.keys():
    #    total_con=0
    #    total_alb=0
    #    for dataset in design_kfold_combined[group].keys():
    #        for label in design_kfold_combined[group][dataset].keys():
    #            if label == 'control':
    #                total_con += len(design_kfold_combined[group][dataset][label])
    #            else:
    #                total_alb += len(design_kfold_combined[group][dataset][label])
    #            #print(group,dataset,label, len(design_kfold_combined[group][dataset][label]) )
    #    print(group, total_con, total_alb)
    #print('\n')
    
    dict_kfold_combined_training={}

    for group in design_kfold_combined.keys():

        dict_kfold_combined_training[group]=[]

        # Calculate the number of albinism and controls, calculate the scaling coefficient
        num_control=0
        num_albinism=0

        for dataset in design_kfold_combined[group].keys():

            num_control+=len(design_kfold_combined[group][dataset]['control'])

            if dataset in ['CHIASM', 'UoN']:
                num_albinism+=len(design_kfold_combined[group][dataset]['albinism'])

        scaling_factor=int(num_control/num_albinism)

        # Create Torchio's subject for listed IDs, for train & dev_train upsample the albinism
        for dataset in design_kfold_combined[group].keys():

            # If test just aggregate all the data
            if (group=='test2' or group == 'test1' or group =='dev_test'):

                for label in design_kfold_combined[group][dataset].keys():

                    if label=='albinism':
                        label_as=1
                    elif label=='control':
                        label_as=0

                    dict_kfold_combined_training[group]+=[tio.Subject(chiasm=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/chiasm.nii.gz', type=tio.INTENSITY),
                                                                        label=label_as) for subject in design_kfold_combined[group][dataset][label]]

            # otherwise upsample albinism by calculated scaling_factor
            else:

                for label in design_kfold_combined[group][dataset].keys():

                    if label=='control':

                        label_as=0

                        dict_kfold_combined_training[group]+=[tio.Subject(chiasm=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/chiasm.nii.gz', type=tio.INTENSITY),
                                                                        label=label_as) for subject in design_kfold_combined[group][dataset][label]]

                    if label=='albinism':

                        label_as=1

                        for i in range(scaling_factor):

                            dict_kfold_combined_training[group]+=[tio.Subject(chiasm=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/chiasm.nii.gz', type=tio.INTENSITY),
                                                                              label=label_as) for subject in design_kfold_combined[group][dataset][label]] 

                            
    #for group in dict_kfold_combined_training.keys():
    #    print(len(dict_kfold_combined_training[group]))
    #print('\n')
    
    # Histogram standardization (to mitigate cross-site differences) - shared by all datasets
    chiasm_paths=[]

    # Obtain paths of all chiasm images
    for dataset in design_kfold_combined['train'].keys():
        for label in design_kfold_combined['train'][dataset].keys():
            for subject in design_kfold_combined['train'][dataset][label]:
                chiasm_paths.append('../../1_Data/1_Input/'+dataset+'/'+subject+'/chiasm.nii.gz')

    chiasm_landmarks_path = Path('chiasm_landmarks.npy')    

    chiasm_landmarks = HistogramStandardization.train(chiasm_paths)
    torch.save(chiasm_landmarks, chiasm_landmarks_path)

    landmarks={'chiasm': chiasm_landmarks}

    standardize = HistogramStandardization(landmarks)
    
    
    # Data preprocessing and augmentation - shared by all datasets

    # Canonical
    canonical = ToCanonical()

    # Rescale
    rescale = RescaleIntensity((0,1))

    # Flip
    flip = RandomFlip((0,1,2), flip_probability=0.5, p=0.5)

    # Affine transformations
    affine = tio.RandomAffine(degrees=5, translation=(2,2,2), center='image')

    crop = CropOrPad((24,24,8))

    # Elastic deformation
    #elastic = tio.transforms.RandomElasticDeformation(num_control_points=4, max_displacement=4, locked_borders=1)

    # Composing transforms - flip serves as data augmentation and is used only for training
    transform_train = Compose([canonical, standardize, rescale, affine, flip, crop])
    transform_dev = Compose([canonical, standardize, rescale, crop])
    

    
    datasets_list={}

    for group in dict_kfold_combined_training.keys():

        if group =='train':

            datasets_list[group] = tio.SubjectsDataset(dict_kfold_combined_training[group], transform=transform_train)

        else:

            datasets_list[group] = tio.SubjectsDataset(dict_kfold_combined_training[group], transform=transform_dev)


    # Create dataloaders
    dataloaders_chiasm={'train': DataLoader(dataset=datasets_list['train'], batch_size=10, shuffle=True, num_workers=8),
                       'dev_train': DataLoader(dataset=datasets_list['dev_train'], batch_size=10, shuffle=True, num_workers=8),
                       'dev_test': DataLoader(dataset=datasets_list['dev_test'], batch_size=10, shuffle=True, num_workers=8),
                       'test1': DataLoader(dataset=datasets_list['test1'], batch_size=10, shuffle=True, num_workers=8),
                       'test2': DataLoader(dataset=datasets_list['test2'], batch_size=10, shuffle=True, num_workers=8)}

    # Try setting CUDA if possible
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu") 

    # Criterion
    criterion = nn.BCELoss()

    model_parameters=[[1,2,2,1,256]]
    learning_rates = [0.00005]
    n_epochs=8000

    folder=output_folder

    for parameters in model_parameters:
        for learning_rate in learning_rates:

            # Initialize the proper model
            classifying_network = Classifier(parameters[0],parameters[1], parameters[2], parameters[3], parameters[4])
            classifying_network.freeze_feature_extraction()

            # Optimizer    
            optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, classifying_network.parameters()), lr=learning_rate)
            #optimizer = torch.optim.Adam(params=classifying_network.parameters(), lr=0.00005)

            # Create output folder
            data_folder = folder+'/'+str(parameters[0])+'_'+str(parameters[1])+'_'+str(parameters[2])+'_'+str(parameters[3])+'_'+str(parameters[4])+'_'+str(learning_rate)+'/'
            os.makedirs(data_folder, exist_ok=True)

            # Train & save weights
            train_loss, dev_train_loss, test_loss, train_f1, dev_train_f1, test_f1 = train_network(n_epochs, dataloaders_chiasm, classifying_network, optimizer, criterion, device, data_folder)

            # Save losses
            with open(data_folder+'train_loss.pkl', 'wb') as f:
                pickle.dump(train_loss, f)

            with open(data_folder+'dev_train_loss.pkl', 'wb') as f:
                pickle.dump(dev_train_loss, f)

            with open(data_folder+'test_loss.pkl', 'wb') as f:
                pickle.dump(test_loss, f)

            with open(data_folder+'train_f1.pkl', 'wb') as f:
                pickle.dump(train_f1, f)

            with open(data_folder+'dev_train_f1.pkl', 'wb') as f:
                pickle.dump(dev_train_f1, f)

            with open(data_folder+'test_f1.pkl', 'wb') as f:
                pickle.dump(test_f1, f)
                

  0%|          | 0/1291 [00:00<?, ?it/s]

0


100%|██████████| 1291/1291 [00:01<00:00, 1024.03it/s]
  6%|▋         | 500/8000 [26:49<6:41:50,  3.21s/it]

END OF EPOCH: 500 
 Training loss per image: 0.008758
 Training_dev loss per image: 0.192539
 Test_dev loss per image: 18.916571


 12%|█▎        | 1000/8000 [53:41<6:15:41,  3.22s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.004262
 Training_dev loss per image: 0.206638
 Test_dev loss per image: 18.849672


 19%|█▉        | 1500/8000 [1:20:33<5:49:51,  3.23s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.001879
 Training_dev loss per image: 0.113077
 Test_dev loss per image: 22.004099


 19%|█▉        | 1514/8000 [1:21:21<5:48:31,  3.22s/it]
  8%|▊         | 105/1294 [00:00<00:01, 1040.12it/s]

1


100%|██████████| 1294/1294 [00:01<00:00, 1030.66it/s]
  6%|▋         | 500/8000 [26:50<6:43:03,  3.22s/it]

END OF EPOCH: 500 
 Training loss per image: 0.003311
 Training_dev loss per image: 0.111000
 Test_dev loss per image: 6.175540


 12%|█▎        | 1000/8000 [53:40<6:16:09,  3.22s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.003742
 Training_dev loss per image: 0.536993
 Test_dev loss per image: 12.326787


 19%|█▉        | 1500/8000 [1:20:30<5:46:34,  3.20s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.002062
 Training_dev loss per image: 0.248639
 Test_dev loss per image: 8.691769


 25%|██▌       | 2000/8000 [1:47:21<5:22:32,  3.23s/it]

END OF EPOCH: 2000 
 Training loss per image: 0.004206
 Training_dev loss per image: 0.253956
 Test_dev loss per image: 10.048646


 31%|███▏      | 2500/8000 [2:14:11<4:55:44,  3.23s/it]

END OF EPOCH: 2500 
 Training loss per image: 0.001320
 Training_dev loss per image: 0.349450
 Test_dev loss per image: 7.232255


 38%|███▊      | 3000/8000 [2:41:01<4:27:55,  3.22s/it]

END OF EPOCH: 3000 
 Training loss per image: 0.001643
 Training_dev loss per image: 0.041114
 Test_dev loss per image: 13.530469


 44%|████▍     | 3500/8000 [3:07:52<4:00:42,  3.21s/it]

END OF EPOCH: 3500 
 Training loss per image: 0.004378
 Training_dev loss per image: 0.052862
 Test_dev loss per image: 8.066790


 47%|████▋     | 3743/8000 [3:20:57<3:48:33,  3.22s/it]
  8%|▊         | 100/1296 [00:00<00:01, 993.02it/s]

2


100%|██████████| 1296/1296 [00:01<00:00, 1037.03it/s]
  6%|▋         | 500/8000 [26:49<6:43:58,  3.23s/it]

END OF EPOCH: 500 
 Training loss per image: 0.013489
 Training_dev loss per image: 0.251512
 Test_dev loss per image: 15.134749


 12%|█▎        | 1000/8000 [53:39<6:15:30,  3.22s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.007180
 Training_dev loss per image: 0.341263
 Test_dev loss per image: 22.279575


 19%|█▉        | 1500/8000 [1:20:28<5:47:09,  3.20s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.004501
 Training_dev loss per image: 1.046805
 Test_dev loss per image: 19.234808


 25%|██▌       | 2000/8000 [1:47:18<5:22:08,  3.22s/it]

END OF EPOCH: 2000 
 Training loss per image: 0.002897
 Training_dev loss per image: 1.386414
 Test_dev loss per image: 25.530207


 31%|███▏      | 2500/8000 [2:14:08<4:54:13,  3.21s/it]

END OF EPOCH: 2500 
 Training loss per image: 0.000645
 Training_dev loss per image: 0.503334
 Test_dev loss per image: 21.465755


 38%|███▊      | 3000/8000 [2:40:57<4:29:27,  3.23s/it]

END OF EPOCH: 3000 
 Training loss per image: 0.002148
 Training_dev loss per image: 0.911098
 Test_dev loss per image: 22.144651


 44%|████▍     | 3500/8000 [3:07:46<4:00:47,  3.21s/it]

END OF EPOCH: 3500 
 Training loss per image: 0.002517
 Training_dev loss per image: 0.171748
 Test_dev loss per image: 23.297192


 50%|█████     | 4000/8000 [3:34:36<3:35:01,  3.23s/it]

END OF EPOCH: 4000 
 Training loss per image: 0.001540
 Training_dev loss per image: 0.784367
 Test_dev loss per image: 24.295202


 52%|█████▏    | 4176/8000 [3:44:08<3:25:15,  3.22s/it]
  8%|▊         | 101/1298 [00:00<00:01, 1006.09it/s]

3


100%|██████████| 1298/1298 [00:01<00:00, 1039.32it/s]
  6%|▋         | 500/8000 [26:51<6:42:41,  3.22s/it]

END OF EPOCH: 500 
 Training loss per image: 0.010880
 Training_dev loss per image: 0.824567
 Test_dev loss per image: 16.368328


 12%|█▎        | 1000/8000 [53:42<6:16:18,  3.23s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.004776
 Training_dev loss per image: 1.123164
 Test_dev loss per image: 14.896252


 19%|█▉        | 1500/8000 [1:20:34<5:48:33,  3.22s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.001491
 Training_dev loss per image: 0.465907
 Test_dev loss per image: 20.348457


 25%|██▌       | 2000/8000 [1:47:27<5:29:00,  3.29s/it]

END OF EPOCH: 2000 
 Training loss per image: 0.006301
 Training_dev loss per image: 0.327723
 Test_dev loss per image: 22.361717


 31%|███▏      | 2500/8000 [2:14:19<4:56:32,  3.23s/it]

END OF EPOCH: 2500 
 Training loss per image: 0.000748
 Training_dev loss per image: 0.212808
 Test_dev loss per image: 24.992769


 38%|███▊      | 3000/8000 [2:41:11<4:30:24,  3.24s/it]

END OF EPOCH: 3000 
 Training loss per image: 0.000425
 Training_dev loss per image: 0.158244
 Test_dev loss per image: 26.355103


 44%|████▍     | 3500/8000 [3:08:03<4:02:42,  3.24s/it]

END OF EPOCH: 3500 
 Training loss per image: 0.003118
 Training_dev loss per image: 0.222422
 Test_dev loss per image: 30.375082


 50%|█████     | 4000/8000 [3:34:56<3:35:07,  3.23s/it]

END OF EPOCH: 4000 
 Training loss per image: 0.000813
 Training_dev loss per image: 0.594470
 Test_dev loss per image: 29.770323


 56%|█████▋    | 4500/8000 [4:01:48<3:07:39,  3.22s/it]

END OF EPOCH: 4500 
 Training loss per image: 0.001084
 Training_dev loss per image: 0.482773
 Test_dev loss per image: 28.167349


 59%|█████▊    | 4690/8000 [4:12:04<2:57:53,  3.22s/it]
  8%|▊         | 104/1301 [00:00<00:01, 1036.91it/s]

4


100%|██████████| 1301/1301 [00:01<00:00, 1029.96it/s]
  6%|▋         | 500/8000 [27:00<6:44:47,  3.24s/it]

END OF EPOCH: 500 
 Training loss per image: 0.006107
 Training_dev loss per image: 0.018819
 Test_dev loss per image: 14.271970


 12%|█▎        | 1000/8000 [54:00<6:18:26,  3.24s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.004161
 Training_dev loss per image: 0.019198
 Test_dev loss per image: 13.903898


 19%|█▉        | 1500/8000 [1:21:01<5:51:24,  3.24s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.006221
 Training_dev loss per image: 0.013789
 Test_dev loss per image: 14.206428


 25%|██▌       | 2000/8000 [1:48:01<5:23:25,  3.23s/it]

END OF EPOCH: 2000 
 Training loss per image: 0.001429
 Training_dev loss per image: 0.006892
 Test_dev loss per image: 18.751816


 31%|███▏      | 2500/8000 [2:15:00<4:56:00,  3.23s/it]

END OF EPOCH: 2500 
 Training loss per image: 0.005223
 Training_dev loss per image: 0.027283
 Test_dev loss per image: 12.186604


 36%|███▌      | 2882/8000 [2:35:41<4:36:29,  3.24s/it]
  8%|▊         | 105/1302 [00:00<00:01, 1040.47it/s]

5


100%|██████████| 1302/1302 [00:01<00:00, 1043.52it/s]
  6%|▋         | 500/8000 [27:01<6:47:59,  3.26s/it]

END OF EPOCH: 500 
 Training loss per image: 0.007307
 Training_dev loss per image: 1.591854
 Test_dev loss per image: 10.450393


 12%|█▎        | 1000/8000 [54:03<6:18:00,  3.24s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.007207
 Training_dev loss per image: 1.871748
 Test_dev loss per image: 12.955434


 13%|█▎        | 1027/8000 [55:34<6:17:17,  3.25s/it]
  0%|          | 0/1302 [00:00<?, ?it/s]

6


100%|██████████| 1302/1302 [00:01<00:00, 1033.74it/s]
  6%|▋         | 500/8000 [27:02<6:44:55,  3.24s/it]

END OF EPOCH: 500 
 Training loss per image: 0.012490
 Training_dev loss per image: 0.007471
 Test_dev loss per image: 7.824724


 12%|█▎        | 1000/8000 [54:03<6:18:47,  3.25s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.004162
 Training_dev loss per image: 0.088134
 Test_dev loss per image: 7.558918


 19%|█▉        | 1500/8000 [1:21:06<5:49:25,  3.23s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.002203
 Training_dev loss per image: 0.322757
 Test_dev loss per image: 12.096725


 25%|██▌       | 2000/8000 [1:48:08<5:24:16,  3.24s/it]

END OF EPOCH: 2000 
 Training loss per image: 0.002819
 Training_dev loss per image: 0.002952
 Test_dev loss per image: 6.177959


 31%|███▏      | 2500/8000 [2:15:10<4:57:44,  3.25s/it]

END OF EPOCH: 2500 
 Training loss per image: 0.004336
 Training_dev loss per image: 0.000243
 Test_dev loss per image: 6.773431


 38%|███▊      | 3000/8000 [2:42:11<4:29:19,  3.23s/it]

END OF EPOCH: 3000 
 Training loss per image: 0.002905
 Training_dev loss per image: 0.000852
 Test_dev loss per image: 9.264759


 44%|████▍     | 3500/8000 [3:09:13<4:02:37,  3.23s/it]

END OF EPOCH: 3500 
 Training loss per image: 0.003073
 Training_dev loss per image: 0.000277
 Test_dev loss per image: 9.921553


 44%|████▍     | 3502/8000 [3:09:23<4:03:15,  3.24s/it]
  0%|          | 0/1297 [00:00<?, ?it/s]

7


100%|██████████| 1297/1297 [00:01<00:00, 1019.15it/s]
  6%|▋         | 500/8000 [26:56<6:44:38,  3.24s/it]

END OF EPOCH: 500 
 Training loss per image: 0.009667
 Training_dev loss per image: 0.003159
 Test_dev loss per image: 10.458948


 12%|█▎        | 1000/8000 [53:51<6:16:23,  3.23s/it]

END OF EPOCH: 1000 
 Training loss per image: 0.006151
 Training_dev loss per image: 0.006159
 Test_dev loss per image: 17.411381


 19%|█▉        | 1500/8000 [1:20:47<5:49:44,  3.23s/it]

END OF EPOCH: 1500 
 Training loss per image: 0.001491
 Training_dev loss per image: 0.005848
 Test_dev loss per image: 20.628996


 25%|██▌       | 2000/8000 [1:47:42<5:22:10,  3.22s/it]

END OF EPOCH: 2000 
 Training loss per image: 0.003652
 Training_dev loss per image: 0.002010
 Test_dev loss per image: 19.351538


 31%|███▏      | 2500/8000 [2:14:38<4:56:15,  3.23s/it]

END OF EPOCH: 2500 
 Training loss per image: 0.000329
 Training_dev loss per image: 0.000982
 Test_dev loss per image: 21.132008


 38%|███▊      | 3000/8000 [2:41:35<4:28:43,  3.22s/it]

END OF EPOCH: 3000 
 Training loss per image: 0.001974
 Training_dev loss per image: 0.012945
 Test_dev loss per image: 21.133736


 39%|███▊      | 3089/8000 [2:46:26<4:24:37,  3.23s/it]
