In [2]:
import numpy as np
import pandas as pd
import time
import timeit
from datetime import datetime
import os
import glob
import natsort
import sys
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'gray'
import cv2
from PIL import Image
import random
import copy
import warnings
warnings.filterwarnings('ignore')
# import ipynbname
FILENAME = os.getcwd()+'/'+str(__session__).split('/')[-1]

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import RandomResizedCrop
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split, KFold

from monai.losses import TverskyLoss as TverskyLoss
from monai.transforms import Compose, ToTensor, RandFlip
from monai.metrics import DiceMetric as Dice_Function
from monai.metrics import compute_iou as IoU_Function
from monai.metrics import ConfusionMatrixMetric

import sys
sys.path.append("..")

model_dir = 'models'
module_names = ['PSA_UNet']
Dataset_dir = '240925_Herbarium_Dataset' 
data_split_csv='Herbarium_dataset_split.csv'
model_names = module_names

for  module_name in module_names:
    exec(f'from {model_dir}.{module_name} import *')

iterations = [1, 10]
# train_size=0.6

in_channels = 3
number_of_classes=1
epochs = 50 # 125
EARLY_STOP = 25  
batch_size = 16

devices = [0,1]

optimizer = 'AdamW'
lr = 1e-3
momentum = 0.9
weight_decay = 1e-4
optim_args = {'optimizer': optimizer, 'lr': lr, 'momentum': momentum, 'weight_decay': weight_decay}

lr_scheduler = 'CosineAnnealingLR'
T_max = epochs
T_0 = epochs
eta_min = 1e-6
lr_scheduler_args = {'lr_scheduler': lr_scheduler, 'T_max': T_max, 'T_0': T_0, 'eta_min': eta_min}

loss_function = 'DiceBCELoss'
# loss_function = 'Tversky Focal Loss'
reduction = 'mean'
gamma = 2.0
weight = None
loss_function_args = {'loss_function': loss_function, 'reduction': reduction, 'gamma': gamma, 'weight': weight}

In [3]:
def control_random_seed(seed, pytorch=True):
    random.seed(seed)
    np.random.seed(seed)
    try:
        torch.manual_seed(seed)
        if torch.cuda.is_available()==True:
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            torch.backends.cudnn.deterministic = True
            torch.backends.cudnn.benchmark = False
    except:
        pass
        torch.backends.cudnn.benchmark = False
def imread_kor ( filePath, mode=cv2.IMREAD_UNCHANGED ) : 
    stream = open( filePath.encode("utf-8") , "rb") 
    bytes = bytearray(stream.read()) 
    numpyArray = np.asarray(bytes, dtype=np.uint8)
    return cv2.imdecode(numpyArray , mode)
def imwrite_kor(filename, img, params=None): 
    try: 
        ext = os.path.splitext(filename)[1] 
        result, n = cv2.imencode(ext, img, params) 
        if result:
            with open(filename, mode='w+b') as f: 
                n.tofile(f) 
                return True
        else: 
            return False 
    except Exception as e: 
        print(e) 
        return False
    
def random_rotation(image, mask, angle_range=(-30, 30)):
    angle = random.uniform(angle_range[0], angle_range[1])
    image = TF.rotate(image, angle)
    mask = TF.rotate(mask, angle)
    return image, mask

class ImagesDataset(Dataset):
    def __init__(self, image_path_list, target_path_list, aug=False):
        self.image_path_list = image_path_list
        self.target_path_list = target_path_list
        self.transform = transforms.Compose([
                        transforms.ToTensor(),
                ])
        self.aug = aug
    def __len__(self):
        return len(self.image_path_list)
    def __getitem__(self, idx):
        image_path = self.image_path_list[idx]
        mask_path = self.target_path_list[idx]
        image = imread_kor(image_path)
        image = self.transform(image).float()
        
        mask = imread_kor(mask_path)
        mask = np.where(mask >= 128, 1, 0)
        mask = self.transform(mask).float()
        if self.aug==True:
            if random.random() < 0.5:
                resize_transform = RandomResizedCrop(size=(384, 256))
                i, j, h, w = resize_transform.get_params(image, scale=(0.7, 1.0), ratio=(1, 1))
                image = TF.resized_crop(image, i, j, h, w, (384, 256))
            if random.random() < 0.5:
                image = RandFlip(1, 0)(image)
                mask = RandFlip(1, 0)(mask)
            if random.random() < 0.5:
                image, mask = random_rotation(image, mask)
        return image, mask, image_path

def Pixel_Accuracy(yhat, ytrue, threshold=0.5):
    yhat = yhat>threshold
    correct = torch.sum(yhat == ytrue)
    total = ytrue.numel()
    accuracy = correct.float() / total
    return accuracy.item()

def Intersection_over_Union(yhat, ytrue, threshold=0.5):
    yhat = yhat>threshold
    return IoU_Function(yhat, ytrue).nanmean().item()
 
def Dice_Coefficient(yhat, ytrue, threshold=0.5):
    yhat = yhat>threshold
    return Dice_Function()(yhat, ytrue).nanmean().item()

class DiceBCELoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceBCELoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        inputs = F.sigmoid(inputs)       
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).sum()                            
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        BCE = F.binary_cross_entropy(inputs, targets, reduction='mean')
        Dice_BCE = BCE + dice_loss
        
        return Dice_BCE
    
def Confusion_Matrix(yhat, ytrue, threshold=0.5):
    yhat = yhat>threshold
    confusion_matrix = ConfusionMatrixMetric(metric_name = ["recall", "precision"], reduction ='mean', compute_sample =True)
    confusion_matrix(yhat, ytrue)
    recall, precision = confusion_matrix.aggregate()
    return recall, precision
    
def train(train_loader, epoch, \
          model, criterion, optimizer, device
          ):
    model.train()
    train_losses=AverageMeter()
    for i, (input, target, _) in enumerate(train_loader):
        input = input.to(device)
        target = target.to(device)
        output = nn.Sigmoid()(model(input))
        loss = criterion(output,target).float()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.update(loss.detach().cpu().numpy(),input.shape[0])
    Train_Loss=np.round(train_losses.avg,6)
    return Train_Loss
def validate(validation_loader, 
          model, criterion, device,
        model_path=False,
             return_image_paths=False,
          ):
    if model_path!=False:
        model.load_state_dict(torch.load(model_path))
    model.eval()
    for i, (input, target, image_path) in enumerate(validation_loader):
        input = input.to(device)
        target = target.to(device)
        with torch.no_grad():
            output = nn.Sigmoid()(model(input))
        if i==0:
            targets=target
            outputs=output
            if return_image_paths==True:
                image_paths = image_path
        else:
            targets=torch.cat((targets,target))
            outputs=torch.cat((outputs,output),axis=0)
            if return_image_paths==True:
                image_paths += image_path
    if return_image_paths==True:
        return outputs, targets, image_paths
    return outputs, targets

def str_to_class(classname):
    return getattr(sys.modules[__name__], classname)

def copy_sourcefile(output_dir, src_dir = 'src' ):    
    import os 
    import shutil
    import glob 
    source_dir = os.path.join(output_dir, src_dir)

    os.makedirs(source_dir, exist_ok=True)
    org_files1 = os.path.join('./', '*.py' )
    org_files2 = os.path.join('./', '*.sh' )
    org_files3 = os.path.join('./', '*.ipynb' )
    org_files4 = os.path.join('./', '*.txt' )
    org_files5 = os.path.join('./', '*.json' )    
    files =[]
    files = glob.glob(org_files1 )
    files += glob.glob(org_files2  )
    files += glob.glob(org_files3  )
    files += glob.glob(org_files4  ) 
    files += glob.glob(org_files5  )     

    # print("COPY source to output/source dir ", files)
    tgt_files = os.path.join( source_dir, '.' )
    for i, file in enumerate(files):
        shutil.copy(file, tgt_files)
class LossSaver(object):
    def __init__(self):
        self.train_losses = []
        self.val_losses = []
    def reset(self):
        self.train_losses = []
        self.val_losses = []
    def update(self, train_loss, val_loss):
        self.train_losses.append(train_loss)
        self.val_losses.append(val_loss)
    def return_list(self):
        return self.train_losses, self.val_losses
    def save_as_csv(self, csv_file):
        df = pd.DataFrame({'Train Losses': self.train_losses, 'Validation Losses': self.val_losses})
        df.index = [f"{i+1} Epoch" for i in df.index]
        df.to_csv(csv_file, index=True)
class AverageMeter (object):
    def __init__(self):
        self.reset ()
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count        
def create_dataset_lists(Dataset_dir, iteration, data_split_csv):
    df = pd.read_csv(data_split_csv)
    
    split_name = f'split{str(iteration).zfill(2)}'  
    
    train_data = df[(df['split'] == split_name) & (df['data_type'] == 'training')]
    validation_data = df[(df['split'] == split_name) & (df['data_type'] == 'validation')]
    test_data = df[(df['split'] == split_name) & (df['data_type'] == 'test')]
    
    train_image_path_list = [os.path.join(Dataset_dir, path) for path in train_data['image'].tolist()]
    train_target_path_list = [os.path.join(Dataset_dir, path) for path in train_data['mask'].tolist()]
    
    validation_image_path_list = [os.path.join(Dataset_dir, path) for path in validation_data['image'].tolist()]
    validation_target_path_list = [os.path.join(Dataset_dir, path) for path in validation_data['mask'].tolist()]
    
    test_image_path_list = [os.path.join(Dataset_dir, path) for path in test_data['image'].tolist()]
    test_target_path_list = [os.path.join(Dataset_dir, path) for path in test_data['mask'].tolist()]
    
    
    return (
        train_image_path_list, train_target_path_list, 
        validation_image_path_list, validation_target_path_list, 
        test_image_path_list, test_target_path_list
    )

def Do_Experiment(iteration, model_name, model, train_loader, validation_loader, test_loader, Optimizer, lr,  number_of_classes, epochs, Metrics,df,device, transform):
    start = timeit.default_timer()
    train_bool=True
    test_bool=True
    if loss_function == 'Tversky Focal Loss':
        criterion=TverskyLoss()
    elif loss_function == 'DiceBCELoss':
        criterion=DiceBCELoss()
    if Optimizer=='Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif Optimizer == 'SGD':
        momentum = 0.9
        weight_decay = 1e-4
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum ,weight_decay=weight_decay)
    elif Optimizer =='AdamW':
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    if lr_scheduler_args['lr_scheduler'] == 'CosineAnnealingLR':
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = lr_scheduler_args['T_max'], eta_min = lr_scheduler_args['eta_min'])
    
    os.makedirs(output_dir, exist_ok = True)
    control_random_seed(seed)
    if train_bool:
        now = datetime.now()
        Train_date=now.strftime("%y%m%d_%H%M%S")
        print('Training Start Time:',Train_date)
        best=9999
        best_epoch=1
        Early_Stop=0
        loss_saver = LossSaver()
        train_start_time = timeit.default_timer()
        for epoch in range(1, epochs+1):
            Train_Loss = train(train_loader, epoch, 
              model, criterion, optimizer, device
              )
            lr_scheduler.step()
            outputs, targets  \
            = validate(validation_loader, 
              model, criterion, device
              )
            Val_Loss = np.round(criterion(outputs,targets).cpu().numpy(),6)            
            iou = np.round(Intersection_over_Union(outputs, targets),3)
            dice = np.round(Dice_Coefficient(outputs, targets),3)
            now = datetime.now()
            date=now.strftime("%y%m%d_%H%M%S")
            print(str(epoch)+'EP('+date+'):',end=' ')
            print('T_Loss: ' + str(Train_Loss), end=' ')
            print('V_Loss: ' + str(Val_Loss), end=' ')
            print('IoU: ' + str(iou), end=' ')
            print('Dice: ' + str(dice), end=' ')
            
            loss_saver.update(Train_Loss, Val_Loss)
            loss_saver.save_as_csv(f'{output_dir}/Losses_{Experiments_Time}.csv')
            if Val_Loss<best:
                Early_Stop = 0
                torch.save(model.state_dict(), f'{output_dir}/{Train_date}_{model_name}_Iter_{iteration}.pt')
                best_epoch = epoch
                best = Val_Loss
                print('Best Epoch:',best_epoch,'Loss:',Val_Loss)
            else:
                print('')
                Early_Stop+=1
            if Early_Stop>=EARLY_STOP:
                break
        train_stop_time = timeit.default_timer()
    if test_bool:
        now = datetime.now()
        date=now.strftime("%y%m%d_%H%M%S")
        print('Test Start Time:',date)
        outputs, targets, image_paths \
            = validate(test_loader, 
              model, criterion, device,
            model_path=f'{output_dir}/{Train_date}_{model_name}_Iter_{iteration}.pt',
                       return_image_paths=True
              )        
        Loss = np.round(criterion(outputs,targets).cpu().numpy(),6)
        pa = np.round(Pixel_Accuracy(outputs.cpu(), targets.cpu()),3)
        iou = np.round(Intersection_over_Union(outputs, targets),3)
        dice = np.round(Dice_Coefficient(outputs, targets),3)
        recall, precision = Confusion_Matrix(outputs, targets) 
        recall = np.round(recall.cpu().numpy()[0],3); precision = np.round(precision.cpu().numpy()[0],3)
                
        now = datetime.now()
        date=now.strftime("%y%m%d_%H%M%S")
        print('Best Epoch:',best_epoch)
        print('Test('+date+'): '+'Loss: ' + str(Loss),end=' ')
        print('PA: ' + str(pa), end=' ')
        print('IoU: ' + str(iou), end=' ')
        print('Dice: ' + str(dice), end=' ')
        print('Recall: ' + str(recall), end=' ')
        print('Precision: ' + str(precision), end=' ')
                            
        stop = timeit.default_timer();m, s = divmod((train_stop_time - train_start_time)/epoch, 60);h, m = divmod(m, 60);Time_per_Epoch = "%02d:%02d:%02d" % (h, m, s);
        m, s = divmod(stop - start, 60);h, m = divmod(m, 60);Time = "%02d:%02d:%02d" % (h, m, s);
        total_params = sum(p.numel() for p in model.parameters()); total_params = format(total_params , ',');
        Performances = [Experiments_Time, Train_date, iteration, model_name, best, Loss, pa, iou, dice, recall, precision, total_params,Time, best_epoch, Time_per_Epoch, loss_function, lr, batch_size, epochs, FILENAME]
        df = df.append(pd.Series(Performances, index=df.columns), ignore_index=True)
        os.makedirs(f'{output_dir}/test_outputs', exist_ok = True)
        outputs = outputs.cpu().numpy()
        for output, image_path in zip(outputs, image_paths):
            np.save(f'{output_dir}/test_outputs/{os.path.basename(image_path)}', output)
    now = datetime.now()
    date=now.strftime("%y%m%d_%H%M%S")
    print('End',date)
    
    return df

In [None]:
now = datetime.now()
Experiments_Time=now.strftime("%y%m%d_%H%M%S")
print('Experiment Start Time:',Experiments_Time)
Metrics=['Experiment Time','Train Time', 'Iteration','Model Name', 'Val_Loss', 'Test_Loss','PA', 'IoU', 'Dice', 'Recall', 'Precision', 'Total Params','Train-Predction Time','Best Epoch','Time per Epoch', 'Loss Function', 'LR', 'Batch size', '#Epochs', 'DIR']
df = pd.DataFrame(index=None, columns=Metrics)
output_root = f'output/output_{Experiments_Time}'
os.makedirs(output_root, exist_ok = True)
    
for iteration in range(iterations[0], iterations[1]+1):
    seed = iteration
    control_random_seed(seed)
    (train_image_path_list, train_target_path_list,
     validation_image_path_list, validation_target_path_list,
     test_image_path_list, test_target_path_list) = create_dataset_lists(Dataset_dir, iteration, data_split_csv)
    
    # train_image_path_list = natsort.natsorted(train_image_path_list[:100])
    # train_target_path_list = natsort.natsorted(train_target_path_list[:100])
    # validation_image_path_list  = natsort.natsorted(validation_image_path_list[:100])
    # validation_target_path_list = natsort.natsorted(validation_target_path_list[:100])
    # test_image_path_list = natsort.natsorted(test_image_path_list[:100])
    # test_target_path_list= natsort.natsorted(test_target_path_list[:100])
    
    train_dataset = ImagesDataset(train_image_path_list, train_target_path_list, aug=True)
    validation_dataset = ImagesDataset(validation_image_path_list, validation_target_path_list, aug=False)
    test_dataset = ImagesDataset(test_image_path_list, test_target_path_list, aug=False)
    train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size,
    num_workers=4, pin_memory=True, shuffle=True, drop_last=True,
    )
    validation_loader = torch.utils.data.DataLoader(
        validation_dataset, batch_size=batch_size, 
        num_workers=4, pin_memory=True,
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=batch_size, 
        num_workers=4, pin_memory=True,
    )
    
    for model_name in model_names:
        print(f'{model_name} (Iter {iteration})')
        output_dir = output_root + f'/{model_name}_Iter_{iteration}'
        copy_sourcefile(output_dir, src_dir='src')
        control_random_seed(seed)
        model=str_to_class(model_name)(in_channels, number_of_classes)
        device = torch.device("cuda:"+str(devices[0]))
        if len(devices)>1:
            model = torch.nn.DataParallel(model, device_ids = devices ).to(device)
        else:
            model = model.to(device)
        df = Do_Experiment(seed, model_name, model, train_loader, validation_loader, test_loader,  optimizer, lr,  number_of_classes, epochs, Metrics, df, device,None)
        try:
            df.to_csv(output_root+'/'+'Plant_Seg_'+Experiments_Time+'.csv', index=False, header=True, encoding="cp949")
        except:
            now = datetime.now()
            tmp_date=now.strftime("%y%m%d_%H%M%S")
            df.to_csv(output_root+'/'+'Plant_Seg_'+Experiments_Time+'_'+tmp_date+'_tmp'+'.csv', index=False, header=True, encoding="cp949")
import os
print('End')
os._exit(00) 

Experiment Start Time: 241016_101149
ObsNet_Remove_att (Iter 2)
Training Start Time: 241016_101150
