# Dataloaders

In [1]:
# Importing libraries
import torchio as tio
import glob
import numpy as np
import random
import os

from collections import OrderedDict
from pathlib import Path

from tqdm import tqdm
import time

import torchio as tio
from torchio.transforms import (RescaleIntensity,RandomFlip,Compose, HistogramStandardization, RandomAffine, RandomNoise, ToCanonical)

from torch.utils.data import DataLoader
import torch
import torch.nn as nn

import matplotlib.pyplot as plt

import pickle

import copy

from Networks_Training import UNet_1_layer, UNet_2_layer

In [2]:
# List IDs of all participants with albinism
ids_albinism=['Nyst01','Nyst02','Nyst03','Nyst04','Nyst05','Nyst06','Nyst07','Nyst08','Nyst09',
              'Nyst10','Nyst11','Nyst12','Nyst13','Nyst16','Nyst20','Nyst21','Nyst24', 'Nyst25',
              'Nyst31','Nyst35','Nyst37','Nyst43','Nyst45','ALB1','ALB2','ALB3','ALB4',
              'ALB5','ALB6','ALB7','ALB8','ALB9'] 

In [3]:
# One-time creation of dictionary with listed ids of participants. It's fixed througout the experiment to better control for data poisoning

datasets=['ABIDE','Athletes','HCP','COBRE','Leipzig','UoN','CHIASM','MCIC']

subjects_dict={}

for dataset in datasets:
    
    if dataset in ['ABIDE','Athletes','HCP','COBRE','Leipzig','MCIC']:
       
        ids=[path.split('/')[-2] for path in glob.glob('../../1_Data/1_Input/'+dataset+'/*/mask_optic_chiasm.nii.gz')]
        random.shuffle(ids)
    
        subjects_dict[dataset]={}
        subjects_dict[dataset]['control']=ids
    
    if dataset in ['CHIASM','UoN']:
        
        ids_con = ids=[path.split('/')[-2] for path in glob.glob('../../1_Data/1_Input/'+dataset+'/*/mask_optic_chiasm.nii.gz') if path.split('/')[-2] not in ids_albinism]
        ids_alb = ids=[path.split('/')[-2] for path in glob.glob('../../1_Data/1_Input/'+dataset+'/*/mask_optic_chiasm.nii.gz') if path.split('/')[-2] in ids_albinism]
        
        random.shuffle(ids_con)
        random.shuffle(ids_alb)
        
        subjects_dict[dataset]={}
        subjects_dict[dataset]['control']=ids_con
        subjects_dict[dataset]['albinism']=ids_alb
    
#print(subjects_dict)

# Save the dictionary storing all the ids in fixed (beforehand randomized) order
#with open('../subjects_dict.pkl', 'wb') as f:
#    pickle.dump(subjects_dict, f)


In [4]:
# Load the general dictionary 
with open('../subjects_dict.pkl', 'rb') as f:
    subjects_dict = pickle.load(f)

In [9]:
#total=0
#
#for dataset in subjects_dict.keys():
#    for label in subjects_dict[dataset].keys():
#        print(dataset,label,len(subjects_dict[dataset][label]))
#        
#        total+=len(subjects_dict[dataset][label])
#        
#print('total',total)

In [10]:
# Assign the participants to train/dev_train/dev_test/test groups for the purpose of AE training
'''
split_training=[0.0,0.8,0.9,1.0,1.0]
split_testing=[0.0,0.0,0.0,0.15,1.0]

groups=['train','dev_train', 'dev_test', 'test']

design_ae_training={}

for i in range(len(groups)):
    
    if i==3:
        continue
    else:
        design_ae_training[groups[i]]={}

        for dataset in ['ABIDE','Athletes','HCP','COBRE','Leipzig','MCIC']:

            design_ae_training[groups[i]][dataset]={}

            number_participants = len(subjects_dict[dataset]['control'])
            start = np.int(np.floor(number_participants*split_training[i]))
            end = np.int(np.floor(number_participants*split_training[i+1]))

            #print(groups[i],dataset,len(subjects_dict[dataset]['control'][start:end]))
            design_ae_training[groups[i]][dataset]['control']=subjects_dict[dataset]['control'][start:end]
                       
# Do the same for test
for i in [2,3]:
    
    if i==3:
        design_ae_training[groups[i]]={}
    
    for dataset in ['CHIASM','UoN']:
            
        design_ae_training[groups[i]][dataset]={}
        
        for label in ['control','albinism']:
            
            design_ae_training[groups[i]][dataset][label]={}

            number_participants = len(subjects_dict[dataset][label])
            start = np.int(np.floor(number_participants*split_testing[i]))
            end = np.int(np.floor(number_participants*split_testing[i+1]))

            #print(groups[i],dataset,label,len(subjects_dict[dataset][label][start:end]))
            design_ae_training[groups[i]][dataset][label]=subjects_dict[dataset][label][start:end]
'''            
# Check the number of participants in each group

#for a in design_ae_training.keys():
#    print('\n')
#    for b in design_ae_training[a].keys():
#        for c in design_ae_training[a][b].keys():
#            print(a,b,c, len(design_ae_training[a][b][c]))

# Save
#with open('design_ae_training.pkl', 'wb') as f:
#    pickle.dump(design_ae_training, f)


In [11]:
# Load the dictionary for AE training 
with open('design_ae_training.pkl', 'rb') as f:
    design_ae_training = pickle.load(f)

In [12]:
# Create dictionary with data required for creation of dataset and dataloader
dict_ae_training=copy.deepcopy(design_ae_training)

'''
# train and dev_train (combined, randomized data)
for group in ['train','dev_train']:
    
    all_subjects=[]
    
    for dataset in design_ae_training[group].keys():
        for label in design_ae_training[group][dataset].keys():
                        
            all_subjects+=[tio.Subject(t1=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/t1w_1mm_iso_brain.nii.gz', type = tio.INTENSITY),
                                        probs=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/sampling_distribution.nii.gz', type = tio.INTENSITY)) 
                                        for subject in design_ae_training[group][dataset][label]]

    dict_ae_training[group]=all_subjects
'''           
for group in design_ae_training.keys():
    for dataset in design_ae_training[group].keys():
        for label in design_ae_training[group][dataset].keys():
                        
            dict_ae_training[group][dataset][label]=[tio.Subject(t1=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/t1w_1mm_iso_brain.nii.gz', type = tio.INTENSITY),
                                                              probs=tio.Image('../../1_Data/1_Input/'+dataset+'/'+subject+'/sampling_distribution.nii.gz', type = tio.INTENSITY)) 
                                                  for subject in design_ae_training[group][dataset][label]]

In [13]:
# Histogram standardization (to mitigate cross-site differences)
# Standardization is performed on all datasets

# Save paths of all images
images_paths=[]
probs_paths=[]

for group in design_ae_training.keys():
    for dataset in design_ae_training[group].keys():
        for label in design_ae_training[group][dataset].keys():
            for subject in design_ae_training[group][dataset][label]:
                images_paths.append('../../1_Data/1_Input/'+dataset+'/'+subject+'/t1w_1mm_iso_brain.nii.gz')
                probs_paths.append('../../1_Data/1_Input/'+dataset+'/'+subject+'/sampling_distribution.nii.gz')

images_landmarks_paths = Path('images_landmarks.npy') 
probs_landmarks_paths = Path('probs_landmarks.npy') 

images_landmarks = HistogramStandardization.train(images_paths)
probs_landmarks = HistogramStandardization.train(probs_paths)

torch.save(images_landmarks, images_landmarks_paths)
torch.save(probs_landmarks, probs_landmarks_paths)

landmarks={'t1': images_landmarks,
          'probs': probs_landmarks}

standardize = HistogramStandardization(landmarks)

100%|██████████| 1742/1742 [09:44<00:00,  2.98it/s]
100%|██████████| 1742/1742 [06:52<00:00,  4.22it/s]


In [14]:
# Transforms

# Rescale
rescale = RescaleIntensity((0,1))

# Flip
flip = RandomFlip((0,1,2), flip_probability=0.5, p=0.25)

# Composing transforms 
transform_train = Compose([standardize, rescale, flip]) 
transform_dev = Compose([standardize, rescale]) 

In [15]:
# Create Torchio dataset

dataset_ae_training = {}


# train
#dataset_ae_training['train']=tio.SubjectsDataset(dict_ae_training['train'], transform=transform_train)
          
# dev_train
#dataset_ae_training['dev_train']=tio.SubjectsDataset(dict_ae_training['dev_train'], transform=transform_dev)
    
# dev_test and test
for group in dict_ae_training.keys():
    
    dataset_ae_training[group]={}
    
    for dataset in dict_ae_training[group].keys():
        
        dataset_ae_training[group][dataset]={}
            
        for label in dict_ae_training[group][dataset].keys():
            
            dataset_ae_training[group][dataset][label]=tio.SubjectsDataset(dict_ae_training[group][dataset][label], transform=transform_dev)
            

In [16]:
# Sampler
patch_size = (24,24,8)
queue_length = 200
samples_per_volume = 5

sampler = tio.data.WeightedSampler(patch_size,'probs')

In [17]:
# Concatenate train and dev_train datasets (dev_test and test remain as they are)
concatenated_datasets={}

for group in ['train','dev_train']:

    entry=[]
    
    for dataset in dataset_ae_training[group].keys():
    
        for labels in dataset_ae_training[group][dataset].keys():
            
            entry.append(dataset_ae_training[group][dataset][labels])
   
    #print(entry)
    concatenated_datasets[group]=torch.utils.data.ConcatDataset(entry)

In [18]:
# Define dataloader

dataloader = {}

# train & dev_train
dataloader['train']= DataLoader(tio.Queue(concatenated_datasets['train'], queue_length, samples_per_volume, sampler, num_workers=6, shuffle_subjects=True, shuffle_patches=True), batch_size=20, num_workers=0)
dataloader['dev_train']= DataLoader(tio.Queue(concatenated_datasets['dev_train'], queue_length, samples_per_volume, sampler, num_workers=6, shuffle_subjects=True, shuffle_patches=True), batch_size=20, num_workers=0)

# dev_test and test
'''
for group in ['dev_test','test']:
    dataloader[group]={}
    for dataset in dataset_ae_training[group].keys():
        dataloader[group][dataset]={}
        for label in dataset_ae_training[group][dataset].keys():
            dataloader[group][dataset][label]=DataLoader(tio.Queue(dataset_ae_training[group][dataset][label], queue_length, samples_per_volume, sampler, num_workers=6, shuffle_subjects=True, shuffle_patches=True), batch_size = 25, num_workers=0)
'''

"\nfor group in ['dev_test','test']:\n    dataloader[group]={}\n    for dataset in dataset_ae_training[group].keys():\n        dataloader[group][dataset]={}\n        for label in dataset_ae_training[group][dataset].keys():\n            dataloader[group][dataset][label]=DataLoader(tio.Queue(dataset_ae_training[group][dataset][label], queue_length, samples_per_volume, sampler, num_workers=6, shuffle_subjects=True, shuffle_patches=True), batch_size = 25, num_workers=0)\n"

In [19]:
# Testing
#model = torch.nn.Identity()

#for patches_batch in dataloader['dev_test']['MCIC']['control']:
    #print(patches_batch)
#    inputs = patches_batch['t1'][tio.DATA]  # key 't1' is in subject
#    targets = patches_batch['t1'][tio.DATA]  # key 'brain' is in subject
#    logits = model(inputs)  # model being an instance of torch.nn.Module

In [20]:
#inputs.shape

#fig = plt.figure(figsize=(20, 10))

#for i in range(inputs.shape[0]):
#    plt.subplot(5,8,i+1)
#    plt.imshow(inputs[i,0,:,:,5],cmap='gray');
    
#plt.show()

# Network and parameters

In [21]:
# Try setting CUDA if possible
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu") 
    
print(device)

cuda


In [22]:
#print(sum(p.numel() for p in model.parameters() if p.requires_grad))

In [24]:
# Criterion
#criterion = DiceLoss()
criterion = nn.MSELoss()

# Number of epochs
n_epochs = 50

# Training

In [25]:
# Function returning trained model
def train_network(n_epochs, dataloaders, model, optimizer, criterion, device, save_path):
    
    track_train_loss = []
    track_dev_train_loss = []
    
    valid_loss_min = np.Inf
    
    model.to(device)
        
    for epoch in tqdm(range(1, n_epochs+1)):
        
        # Initialize loss monitoring variables
        train_loss = 0.0
        dev_train_loss = 0.0
        
        i=0
        j=0
        
        # train
        model.train()
        
        for batch in dataloaders['train']:
            
            data = batch['t1']['data'].to(device)
            
            optimizer.zero_grad()
            
            output = model(data)
            loss = criterion(output, data)
            loss.backward()
            
            optimizer.step()
            
            train_loss += loss.item()
            i+=1
            
        track_train_loss.append(train_loss/i)
        
        # dev_train
        model.eval()
        
        for batch in dataloaders['dev_train']:
            
            data = batch['t1']['data'].to(device)
            
            with torch.no_grad():
                
                output = model(data)
                loss = criterion(output,data)
                
                dev_train_loss += loss.item()
                j+=1
                
        track_dev_train_loss.append(dev_train_loss/j)

        print('END OF EPOCH: {} \tTraining loss per batch: {:.6f}\tTraining_dev loss per image: {:.6f}'.format(epoch, train_loss/i, dev_train_loss/j))
           
        ## Save the model if reached min validation loss
        if dev_train_loss  < valid_loss_min:
            valid_loss_min = dev_train_loss
            torch.save(model.state_dict(),save_path+'optimal_weights')
            last_updated_epoch = epoch
            
            with open(save_path+'number_epochs.txt','w') as f:
                print('Epoch:', str(epoch), file=f)
                
        # Early stopping
        if (epoch - last_updated_epoch) == 5:
            break
                        
    # return trained model
    return track_train_loss, track_dev_train_loss         

In [28]:
# training for 1-layer network

model_parameters=[[4,1],[2,2]]

folder='../../1_Data/2_Trained_AE/'

for parameters in model_parameters:
    
    print(parameters)
        
    # Initialize the proper model
    unet = UNet_1_layer(1,1,parameters[0],parameters[1])
    
    # Optimizer
    optimizer = torch.optim.Adam(params=unet.parameters(), lr=0.001)
    
    # Create output folder
    data_folder = folder+'/1_layer_'+str(parameters[0])+'_'+str(parameters[1])+'/'
    os.makedirs(data_folder, exist_ok=True)
    
    # Train & save weights
    train_loss, dev_train_loss = train_network(n_epochs, dataloader, unet, optimizer, criterion, device, data_folder)
    
    # Save losses
    with open(data_folder+'train_loss.pkl', 'wb') as f:
        pickle.dump(train_loss, f)
        
    with open(data_folder+'dev_train_loss.pkl', 'wb') as f:
        pickle.dump(dev_train_loss, f)


  0%|          | 0/40 [00:00<?, ?it/s]

[4, 1]


  2%|▎         | 1/40 [11:14<7:18:33, 674.71s/it]

END OF EPOCH: 1 	Training loss per batch: 0.013790	Training_dev loss per image: 0.003050


  5%|▌         | 2/40 [21:05<6:51:22, 649.53s/it]

END OF EPOCH: 2 	Training loss per batch: 0.004300	Training_dev loss per image: 0.003866


  8%|▊         | 3/40 [30:54<6:29:25, 631.49s/it]

END OF EPOCH: 3 	Training loss per batch: 0.003892	Training_dev loss per image: 0.003153


 10%|█         | 4/40 [40:46<6:11:43, 619.53s/it]

END OF EPOCH: 4 	Training loss per batch: 0.003634	Training_dev loss per image: 0.003057


 12%|█▎        | 5/40 [50:35<5:56:07, 610.50s/it]

END OF EPOCH: 5 	Training loss per batch: 0.003564	Training_dev loss per image: 0.002573


 15%|█▌        | 6/40 [1:00:56<5:47:34, 613.38s/it]

END OF EPOCH: 6 	Training loss per batch: 0.003535	Training_dev loss per image: 0.002634


 18%|█▊        | 7/40 [1:10:48<5:33:53, 607.08s/it]

END OF EPOCH: 7 	Training loss per batch: 0.003409	Training_dev loss per image: 0.003134


 20%|██        | 8/40 [1:20:36<5:20:43, 601.35s/it]

END OF EPOCH: 8 	Training loss per batch: 0.003276	Training_dev loss per image: 0.002784


 22%|██▎       | 9/40 [1:30:40<5:11:07, 602.19s/it]

END OF EPOCH: 9 	Training loss per batch: 0.003357	Training_dev loss per image: 0.002936


 25%|██▌       | 10/40 [1:40:33<4:59:40, 599.34s/it]

END OF EPOCH: 10 	Training loss per batch: 0.003476	Training_dev loss per image: 0.002488


 28%|██▊       | 11/40 [1:50:25<4:48:36, 597.11s/it]

END OF EPOCH: 11 	Training loss per batch: 0.003385	Training_dev loss per image: 0.003654


 30%|███       | 12/40 [2:00:27<4:39:22, 598.66s/it]

END OF EPOCH: 12 	Training loss per batch: 0.003485	Training_dev loss per image: 0.002210


 32%|███▎      | 13/40 [2:10:18<4:28:24, 596.45s/it]

END OF EPOCH: 13 	Training loss per batch: 0.003245	Training_dev loss per image: 0.003237


 35%|███▌      | 14/40 [2:20:06<4:17:21, 593.89s/it]

END OF EPOCH: 14 	Training loss per batch: 0.003335	Training_dev loss per image: 0.004479


 38%|███▊      | 15/40 [2:29:57<4:07:05, 593.01s/it]

END OF EPOCH: 15 	Training loss per batch: 0.003374	Training_dev loss per image: 0.002609


 40%|████      | 16/40 [2:39:46<3:56:44, 591.85s/it]

END OF EPOCH: 16 	Training loss per batch: 0.003330	Training_dev loss per image: 0.002777


 42%|████▎     | 17/40 [2:49:52<3:48:31, 596.14s/it]

END OF EPOCH: 17 	Training loss per batch: 0.003302	Training_dev loss per image: 0.002187


 45%|████▌     | 18/40 [2:59:56<3:39:23, 598.34s/it]

END OF EPOCH: 18 	Training loss per batch: 0.003280	Training_dev loss per image: 0.002699


 48%|████▊     | 19/40 [3:09:45<3:28:24, 595.45s/it]

END OF EPOCH: 19 	Training loss per batch: 0.003265	Training_dev loss per image: 0.002605


 50%|█████     | 20/40 [3:19:34<3:17:52, 593.63s/it]

END OF EPOCH: 20 	Training loss per batch: 0.003169	Training_dev loss per image: 0.002218


 52%|█████▎    | 21/40 [3:29:25<3:07:45, 592.94s/it]

END OF EPOCH: 21 	Training loss per batch: 0.003237	Training_dev loss per image: 0.002835


 52%|█████▎    | 21/40 [3:39:17<3:18:24, 626.54s/it]

END OF EPOCH: 22 	Training loss per batch: 0.003259	Training_dev loss per image: 0.002294
[2, 2]



  2%|▎         | 1/40 [10:05<6:33:40, 605.64s/it]

END OF EPOCH: 1 	Training loss per batch: 0.036390	Training_dev loss per image: 0.018867


  5%|▌         | 2/40 [19:55<6:20:32, 600.85s/it]

END OF EPOCH: 2 	Training loss per batch: 0.012927	Training_dev loss per image: 0.008364


  8%|▊         | 3/40 [30:00<6:11:21, 602.21s/it]

END OF EPOCH: 3 	Training loss per batch: 0.007601	Training_dev loss per image: 0.005723


 10%|█         | 4/40 [39:51<5:59:15, 598.76s/it]

END OF EPOCH: 4 	Training loss per batch: 0.005850	Training_dev loss per image: 0.004133


 12%|█▎        | 5/40 [49:42<5:47:51, 596.34s/it]

END OF EPOCH: 5 	Training loss per batch: 0.005151	Training_dev loss per image: 0.006470


 15%|█▌        | 6/40 [59:30<5:36:36, 594.02s/it]

END OF EPOCH: 6 	Training loss per batch: 0.004635	Training_dev loss per image: 0.003179


 18%|█▊        | 7/40 [1:09:31<5:27:51, 596.09s/it]

END OF EPOCH: 7 	Training loss per batch: 0.004244	Training_dev loss per image: 0.003128


 20%|██        | 8/40 [1:19:22<5:17:08, 594.64s/it]

END OF EPOCH: 8 	Training loss per batch: 0.003979	Training_dev loss per image: 0.003118


 22%|██▎       | 9/40 [1:29:13<5:06:34, 593.38s/it]

END OF EPOCH: 9 	Training loss per batch: 0.003656	Training_dev loss per image: 0.004680


 25%|██▌       | 10/40 [1:38:59<4:55:39, 591.32s/it]

END OF EPOCH: 10 	Training loss per batch: 0.003549	Training_dev loss per image: 0.002409


 28%|██▊       | 11/40 [1:49:05<4:47:56, 595.76s/it]

END OF EPOCH: 11 	Training loss per batch: 0.003454	Training_dev loss per image: 0.002330


 30%|███       | 12/40 [1:58:58<4:37:31, 594.68s/it]

END OF EPOCH: 12 	Training loss per batch: 0.003336	Training_dev loss per image: 0.002963


 32%|███▎      | 13/40 [2:09:00<4:28:39, 597.00s/it]

END OF EPOCH: 13 	Training loss per batch: 0.003118	Training_dev loss per image: 0.002060


 35%|███▌      | 14/40 [2:18:51<4:17:53, 595.14s/it]

END OF EPOCH: 14 	Training loss per batch: 0.002923	Training_dev loss per image: 0.001906


 38%|███▊      | 15/40 [2:28:38<4:07:00, 592.84s/it]

END OF EPOCH: 15 	Training loss per batch: 0.003065	Training_dev loss per image: 0.002175


 40%|████      | 16/40 [2:38:32<3:57:13, 593.06s/it]

END OF EPOCH: 16 	Training loss per batch: 0.002844	Training_dev loss per image: 0.004322


 42%|████▎     | 17/40 [2:48:21<3:46:54, 591.93s/it]

END OF EPOCH: 17 	Training loss per batch: 0.002958	Training_dev loss per image: 0.001907


 45%|████▌     | 18/40 [2:58:06<3:36:12, 589.65s/it]

END OF EPOCH: 18 	Training loss per batch: 0.003040	Training_dev loss per image: 0.002330


 45%|████▌     | 18/40 [3:08:26<3:50:19, 628.14s/it]

END OF EPOCH: 19 	Training loss per batch: 0.002893	Training_dev loss per image: 0.002293





In [29]:
# training for 2-layer network
model_parameters=[[8,2],[32,1]]

folder='../../1_Data/2_Trained_AE/'

for parameters in model_parameters:
    
    print(parameters)
        
    # Initialize the proper model
    unet = UNet_2_layer(1,1,parameters[0],parameters[1])
    
    # Optimizer
    optimizer = torch.optim.Adam(params=unet.parameters(), lr=0.0025)
    
    # Create output folder
    data_folder = folder+'/2_layer_'+str(parameters[0])+'_'+str(parameters[1])+'/'
    os.makedirs(data_folder, exist_ok=True)
    
    # Train & save weights
    train_loss, dev_train_loss = train_network(n_epochs, dataloader, unet, optimizer, criterion, device, data_folder)
    
    # Save losses
    with open(data_folder+'train_loss.pkl', 'wb') as f:
        pickle.dump(train_loss, f)
        
    with open(data_folder+'dev_train_loss.pkl', 'wb') as f:
        pickle.dump(dev_train_loss, f)

  0%|          | 0/40 [00:00<?, ?it/s]

[8, 2]


  2%|▎         | 1/40 [09:51<6:24:38, 591.75s/it]

END OF EPOCH: 1 	Training loss per batch: 0.010423	Training_dev loss per image: 0.004013


  5%|▌         | 2/40 [19:43<6:14:50, 591.86s/it]

END OF EPOCH: 2 	Training loss per batch: 0.005744	Training_dev loss per image: 0.005842


  8%|▊         | 3/40 [29:33<6:04:28, 591.05s/it]

END OF EPOCH: 3 	Training loss per batch: 0.005356	Training_dev loss per image: 0.005457


 10%|█         | 4/40 [39:31<5:56:01, 593.36s/it]

END OF EPOCH: 4 	Training loss per batch: 0.004780	Training_dev loss per image: 0.004222


 12%|█▎        | 5/40 [49:37<5:48:21, 597.19s/it]

END OF EPOCH: 5 	Training loss per batch: 0.004324	Training_dev loss per image: 0.004246


 15%|█▌        | 6/40 [59:28<5:37:13, 595.10s/it]

END OF EPOCH: 6 	Training loss per batch: 0.004343	Training_dev loss per image: 0.003693


 18%|█▊        | 7/40 [1:09:14<5:25:55, 592.58s/it]

END OF EPOCH: 7 	Training loss per batch: 0.004385	Training_dev loss per image: 0.003894


 20%|██        | 8/40 [1:19:19<5:18:03, 596.36s/it]

END OF EPOCH: 8 	Training loss per batch: 0.004158	Training_dev loss per image: 0.003161


 22%|██▎       | 9/40 [1:29:08<5:06:57, 594.10s/it]

END OF EPOCH: 9 	Training loss per batch: 0.004174	Training_dev loss per image: 0.003592


 25%|██▌       | 10/40 [1:39:00<4:56:45, 593.51s/it]

END OF EPOCH: 10 	Training loss per batch: 0.003951	Training_dev loss per image: 0.003575


 28%|██▊       | 11/40 [1:49:06<4:48:34, 597.05s/it]

END OF EPOCH: 11 	Training loss per batch: 0.004004	Training_dev loss per image: 0.003293


 30%|███       | 12/40 [1:58:54<4:37:24, 594.44s/it]

END OF EPOCH: 12 	Training loss per batch: 0.003918	Training_dev loss per image: 0.003558


 30%|███       | 12/40 [2:08:45<5:00:26, 643.80s/it]
  0%|          | 0/40 [00:00<?, ?it/s]

END OF EPOCH: 13 	Training loss per batch: 0.003878	Training_dev loss per image: 0.003623
[32, 1]


  2%|▎         | 1/40 [09:50<6:23:52, 590.59s/it]

END OF EPOCH: 1 	Training loss per batch: 0.009584	Training_dev loss per image: 0.004991


  5%|▌         | 2/40 [19:38<6:13:29, 589.72s/it]

END OF EPOCH: 2 	Training loss per batch: 0.005560	Training_dev loss per image: 0.004536


  8%|▊         | 3/40 [29:43<6:06:32, 594.39s/it]

END OF EPOCH: 3 	Training loss per batch: 0.005314	Training_dev loss per image: 0.004722


 10%|█         | 4/40 [39:48<5:58:30, 597.53s/it]

END OF EPOCH: 4 	Training loss per batch: 0.005083	Training_dev loss per image: 0.003851


 12%|█▎        | 5/40 [49:38<5:47:17, 595.35s/it]

END OF EPOCH: 5 	Training loss per batch: 0.004786	Training_dev loss per image: 0.003976


 15%|█▌        | 6/40 [59:30<5:36:42, 594.19s/it]

END OF EPOCH: 6 	Training loss per batch: 0.004568	Training_dev loss per image: 0.003198


 18%|█▊        | 7/40 [1:09:18<5:25:49, 592.42s/it]

END OF EPOCH: 7 	Training loss per batch: 0.004299	Training_dev loss per image: 0.003526


 20%|██        | 8/40 [1:19:07<5:15:21, 591.30s/it]

END OF EPOCH: 8 	Training loss per batch: 0.004015	Training_dev loss per image: 0.003110


 22%|██▎       | 9/40 [1:29:08<5:07:06, 594.39s/it]

END OF EPOCH: 9 	Training loss per batch: 0.004133	Training_dev loss per image: 0.003044


 25%|██▌       | 10/40 [1:38:58<4:56:28, 592.94s/it]

END OF EPOCH: 10 	Training loss per batch: 0.004009	Training_dev loss per image: 0.002720


 28%|██▊       | 11/40 [1:49:04<4:48:26, 596.78s/it]

END OF EPOCH: 11 	Training loss per batch: 0.004132	Training_dev loss per image: 0.002974


 30%|███       | 12/40 [1:58:50<4:37:01, 593.63s/it]

END OF EPOCH: 12 	Training loss per batch: 0.003720	Training_dev loss per image: 0.002962


 32%|███▎      | 13/40 [2:08:41<4:26:46, 592.85s/it]

END OF EPOCH: 13 	Training loss per batch: 0.003866	Training_dev loss per image: 0.005111


 35%|███▌      | 14/40 [2:18:31<4:16:33, 592.04s/it]

END OF EPOCH: 14 	Training loss per batch: 0.003869	Training_dev loss per image: 0.002767


 35%|███▌      | 14/40 [2:28:33<4:35:54, 636.71s/it]
  0%|          | 0/40 [00:00<?, ?it/s]

END OF EPOCH: 15 	Training loss per batch: 0.003727	Training_dev loss per image: 0.003477
[2, 4]


  2%|▎         | 1/40 [09:48<6:22:35, 588.60s/it]

END OF EPOCH: 1 	Training loss per batch: 0.019419	Training_dev loss per image: 0.008078


  5%|▌         | 2/40 [19:39<6:13:07, 589.15s/it]

END OF EPOCH: 2 	Training loss per batch: 0.005747	Training_dev loss per image: 0.004269


  8%|▊         | 3/40 [29:29<6:03:31, 589.50s/it]

END OF EPOCH: 3 	Training loss per batch: 0.005235	Training_dev loss per image: 0.004489


 10%|█         | 4/40 [39:34<5:56:32, 594.25s/it]

END OF EPOCH: 4 	Training loss per batch: 0.004689	Training_dev loss per image: 0.005386


 12%|█▎        | 5/40 [49:23<5:45:45, 592.71s/it]

END OF EPOCH: 5 	Training loss per batch: 0.004577	Training_dev loss per image: 0.003312


 15%|█▌        | 6/40 [59:30<5:38:17, 596.99s/it]

END OF EPOCH: 6 	Training loss per batch: 0.004503	Training_dev loss per image: 0.004508


 18%|█▊        | 7/40 [1:09:19<5:27:01, 594.58s/it]

END OF EPOCH: 7 	Training loss per batch: 0.004393	Training_dev loss per image: 0.003641


 20%|██        | 8/40 [1:19:10<5:16:30, 593.46s/it]

END OF EPOCH: 8 	Training loss per batch: 0.004349	Training_dev loss per image: 0.003514


 22%|██▎       | 9/40 [1:29:00<5:06:05, 592.44s/it]

END OF EPOCH: 9 	Training loss per batch: 0.004284	Training_dev loss per image: 0.003294


 25%|██▌       | 10/40 [1:38:50<4:55:45, 591.53s/it]

END OF EPOCH: 10 	Training loss per batch: 0.004364	Training_dev loss per image: 0.003213


 28%|██▊       | 11/40 [1:48:33<4:44:39, 588.96s/it]

END OF EPOCH: 11 	Training loss per batch: 0.004061	Training_dev loss per image: 0.003355


 30%|███       | 12/40 [1:58:53<4:39:18, 598.51s/it]

END OF EPOCH: 12 	Training loss per batch: 0.004174	Training_dev loss per image: 0.003942


 32%|███▎      | 13/40 [2:08:45<4:28:23, 596.43s/it]

END OF EPOCH: 13 	Training loss per batch: 0.004101	Training_dev loss per image: 0.003117


 35%|███▌      | 14/40 [2:18:33<4:17:23, 593.99s/it]

END OF EPOCH: 14 	Training loss per batch: 0.004000	Training_dev loss per image: 0.003048


 38%|███▊      | 15/40 [2:28:24<4:07:04, 592.99s/it]

END OF EPOCH: 15 	Training loss per batch: 0.004015	Training_dev loss per image: 0.002812


 40%|████      | 16/40 [2:38:13<3:56:44, 591.84s/it]

END OF EPOCH: 16 	Training loss per batch: 0.003853	Training_dev loss per image: 0.003151


 42%|████▎     | 17/40 [2:48:17<3:48:14, 595.40s/it]

END OF EPOCH: 17 	Training loss per batch: 0.003905	Training_dev loss per image: 0.003214


 45%|████▌     | 18/40 [2:58:06<3:37:40, 593.64s/it]

END OF EPOCH: 18 	Training loss per batch: 0.003879	Training_dev loss per image: 0.003089


 48%|████▊     | 19/40 [3:07:53<3:27:03, 591.59s/it]

END OF EPOCH: 19 	Training loss per batch: 0.004026	Training_dev loss per image: 0.004015


 50%|█████     | 20/40 [3:18:00<3:18:41, 596.06s/it]

END OF EPOCH: 20 	Training loss per batch: 0.003913	Training_dev loss per image: 0.002650


 52%|█████▎    | 21/40 [3:27:48<3:08:00, 593.73s/it]

END OF EPOCH: 21 	Training loss per batch: 0.003870	Training_dev loss per image: 0.007027


 55%|█████▌    | 22/40 [3:37:38<2:57:49, 592.76s/it]

END OF EPOCH: 22 	Training loss per batch: 0.003851	Training_dev loss per image: 0.002907


 57%|█████▊    | 23/40 [3:47:42<2:48:52, 596.06s/it]

END OF EPOCH: 23 	Training loss per batch: 0.003734	Training_dev loss per image: 0.002847


 60%|██████    | 24/40 [3:57:32<2:38:25, 594.09s/it]

END OF EPOCH: 24 	Training loss per batch: 0.003749	Training_dev loss per image: 0.002980


 60%|██████    | 24/40 [4:07:21<2:44:54, 618.40s/it]

END OF EPOCH: 25 	Training loss per batch: 0.003663	Training_dev loss per image: 0.003164



