In [1]:
import argparse
import os
import copy
import logging
import torch
import torch.nn as nn
from torchvision import *
import torchvision.models
import numpy as np
import pandas as pd
import random
import sys
import time
import re
from PIL import Image

from sklearn.metrics import roc_auc_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#sys.path.append(r"../python_files/")

import python_files.utils as utils
import python_files.dataloader_dark as dataloader
import python_files.models as models
import python_files.factory as factory

# args

In [3]:
parser = argparse.ArgumentParser()

parser.add_argument('--report-freq', type=int, default=10, help='logging frequency')
parser.add_argument('--tune-mode', type=str, default='fine-tune', choices=['fine-tune', 'feature-extract'], help='tuning mode' )
parser.add_argument('--backbone', type=str, default='resnet50', choices=['resnet50', 'vgg19', 'inception_v3'], 
                    help='backbone architecture' )
parser.add_argument('--cls-type', type=str, default='single', choices=['single', 
                   'double', 'double-bn', 'double-dropout'], help='classifier architecture' )
parser.add_argument('--hidden-dim', type=int, default=512, help='hidden dimension of classifier' )
parser.add_argument('--record-root-dir', type=str, default='./record-data', help='record data root dir' )
parser.add_argument('--exp', type=str, default='default_exp', help='name of experiment' )
parser.add_argument('--batch-size', type=int, default=8, help='batch size' )
parser.add_argument('--num-workers', type=int, default=4, help='number of processes working on cpu.')
parser.add_argument('--num-classes', type=int, default=5, help='number of classes')
parser.add_argument('--num-epochs', type=int, default=20,  help='number of epochs.')
parser.add_argument( '--num-steps', type=int, default=-1, help='number of steps per epoch. '+ '-1 means use entire data' )
parser.add_argument('--learn-rate', type=float, default=1e-3, help='learning rate for gradient descent')
parser.add_argument('--weight-decay', type=float, default=1e-3, help='weight decay for optimization')
parser.add_argument('--resume', action='store_true', help='resume experiment <exp> from last checkpoint' )
parser.add_argument('--input-dir', type=str, default= r'../h5py/', help='data root dir' )
parser.add_argument('--save-name', type=str, default='model.pt', help='saved model name' )

args, unknown =  parser.parse_known_args()

In [4]:
# device
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

args.num_epochs = 30
args.learn_rate = 1e-4
args.weight_decay = 1e-4 
args.batch_size = 8
args.input_dir1 = '../../Data/' 
args.backbone = 'resnet50'
args.cls_type = 'single'
args.exp = 'Mel_detection'
args.save_name = 'Resnet50'
args.num_classes = 1

# Auxiliary functions

In [5]:
def setup_logger(args):
    name = args.exp
    exp_dir = os.path.join( args.record_root_dir, name )
    
    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(exp_dir, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)
    log( f'Exp Name: {name}\n\n' )
    log( f'Results will be stored in {exp_dir}' )

In [6]:
# checkpoint.
def load_experiment(args):
    name = args.exp
    exp_dir = os.path.join( args.record_root_dir, name )
    
    os.makedirs(args.record_root_dir, exist_ok=True)

    if os.path.exists(exp_dir):
        if not args.resume:
            # if resume option is not specified
            # check to make sure exp_dir is empty
            files = os.listdir( exp_dir )
            if len(files) > 1:
                print( f'exp dir: {exp_dir} not empty. ' +
                        'Please delete all files' +
                        ' in dir and rerun train command.' )
                #import pdb; pdb.set_trace()
                exit( 1 )
        else:
            print('Hi')
    else:
        os.makedirs(exp_dir)
    setup_logger(args)

In [7]:
def sig(x):
    return 1/(1 + np.exp(-x))

In [8]:
def log( log_str ):
    logging.info( log_str )

In [9]:
def log_epoch_stats(prefix, batch_id, num_steps, loss, acc, mAUC, data_time, batch_time ):
    
    log( f'| {prefix} | ' +
        f'EPOCH [{current_epoch+1:02d}/{num_epochs:02d}] ' +
        f'Step [{batch_id+1:04d}/{num_steps:04d}] ' +
        f'Loss: {loss:.2f} ' +
        f'acc: {acc:.2f} ' +
        f'mAUC: {mAUC:.2f} ' +
        f'Data time: {data_time:.2f} ' +
        f'Batch time: {batch_time:.2f} ' )

In [10]:
def log_auc_scores( auc_scores ):
    ATTR_TO_INDEX = {
    'Mel' : 0
    }

    INDEX_TO_ATTR = { idx:attr for idx, attr in ATTR_TO_INDEX.items() }
    
    
    log_str = 'AUC Scores: '
    for attr, idx in ATTR_TO_INDEX.items():
        log_str += f'{attr}: {auc_scores:.2f} '
    log_str += f'mAUC: {auc_scores.mean():.2f} '
    log(log_str)

In [11]:
def save_model(model, best_model, optimizer, current_epoch, args, save_name='model.pt'):
    name = args.exp
    save_name = args.save_name
    exp_dir = os.path.join( args.record_root_dir, name )
    
    #Each epoch model save
    save_path = os.path.join( exp_dir, save_name + '_model.pt' )
    model_dict = model.state_dict()
    opt_dict = optimizer.state_dict()
    epoch = current_epoch+1
    torch.save( { 'model': model_dict, 'opt': opt_dict, 'epoch': epoch }, save_path )
    
    # Best model
    save_path = os.path.join( exp_dir, save_name + '_best_model.pt' )
    model_dict = best_model.state_dict()
    epoch = best_epoch
    torch.save( { 'model': model_dict, 'epoch': epoch }, save_path )

In [12]:
def save_stats(train_loss,train_acc,train_auc_scores, train_data_time,
               train_batch_time, val_loss, val_acc, val_auc_scores, val_data_time, val_batch_time, args):
    name = args.exp
    exp_dir = os.path.join( args.record_root_dir, name )
    
    save_path = os.path.join( exp_dir, 'stats.pt' )
    stat_dict = {
        'train_loss': train_loss,
        'train_acc': train_acc,
        'train_auc_scores': train_auc_scores,
        'train_data_time': train_data_time,
        'train_batch_time': train_batch_time,
        'val_loss': val_loss,
        'val_acc': val_acc,
        'val_auc_scores': val_auc_scores,
        'val_data_time': val_data_time,
        'val_batch_time': val_batch_time,
        'args': args,
    }
    torch.save( stat_dict, save_path )

# Load model

In [13]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = torchvision.models.resnet50(pretrained=True)
        self.base_layers = list(self.base_model.children())
        
        self.layer0 = nn.Sequential(*self.base_layers[:3])
        self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 256, x.H/4, x.W/4)
        self.layer2 = self.base_layers[5]  # size=(N, 512, x.H/8, x.W/8)
        self.layer3 = self.base_layers[6]  # size=(N, 1024, x.H/16, x.W/16)
        self.layer4 = self.base_layers[7]  # size=(N, 2048, x.H/32, x.W/32)
        self.Avgpooling = nn.Sequential(self.base_layers[8])
        
        self.mlp = torch.nn.Sequential( torch.nn.Linear(2048,1), )# only one linear layer on top
        
    def forward(self, X):
        layer0 = self.layer0(X)  # layer0:  torch.Size([1, 64, 256, 256])
        layer1 = self.layer1(layer0) # layer1:  torch.Size([1, 256, 128, 128])
        layer2 = self.layer2(layer1) # layer2:  torch.Size([1, 512, 64, 64])
        layer3 = self.layer3(layer2) # layer3:  torch.Size([1, 1024, 32, 32])
        layer4 = self.layer4(layer3) # layer4:  torch.Size([1, 2048, 16, 16])
        out    = self.Avgpooling(layer4)
        
        out1 = torch.squeeze(out) 
        if X.size(0) == 1: # torch.Size([1, 2048, 1, 1]) => torch.Size([2048])
            out1 = torch.unsqueeze(out1, 0) # torch.Size([1, 2048])
        out = self.mlp(out1)
        return out1, out

# Streaks

In [14]:
net = Model()
streaks_net = torch.nn.DataParallel(net)
streaks_net = streaks_net.to( DEVICE )

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


In [15]:
#Model path
Main_folder = '../F(CLR) + F(Res) + F(Seg)/'
record_data_folder = "streaks/record-data/"
choosed_folder = 'SIMCLR+Resnet50+Unet/'
model_name = 'SIMCLR+Resnet50+Unet_best_model.pt'
streaks_Model_path = os.path.join(Main_folder, record_data_folder, choosed_folder, model_name)

In [16]:
checkpoint = torch.load(streaks_Model_path, map_location='cuda:0')
streaks_net.load_state_dict(checkpoint['model'])

<All keys matched successfully>

# Pigment

In [17]:
net = Model()
pigment_net = torch.nn.DataParallel(net)
pigment_net = pigment_net.to( DEVICE )

In [18]:
#Model path
Main_folder = '../F(CLR) + F(Res) + F(Seg)/'
record_data_folder = "pigment/record-data/"
choosed_folder = 'SIMCLR+Resnet50+Unet/'
model_name = 'SIMCLR+Resnet50+Unet_best_model.pt'
pigment_Model_path = os.path.join(Main_folder, record_data_folder, choosed_folder, model_name)

In [19]:
checkpoint = torch.load(pigment_Model_path, map_location='cuda:0')
pigment_net.load_state_dict(checkpoint['model'])

<All keys matched successfully>

# Negative

In [20]:
net = Model()
negative_net = torch.nn.DataParallel(net)
negative_net = negative_net.to( DEVICE )

In [21]:
#Model path
Main_folder = '../F(CLR) + F(Res) + F(Seg)/'
record_data_folder = "negative/record-data/"
choosed_folder = 'SIMCLR+Resnet50+Unet/'
model_name = 'SIMCLR+Resnet50+Unet_best_model.pt'
negative_Model_path = os.path.join(Main_folder, record_data_folder, choosed_folder, model_name)

In [22]:
checkpoint = torch.load(negative_Model_path, map_location='cuda:0')
negative_net.load_state_dict(checkpoint['model'])

<All keys matched successfully>

# Milia

In [23]:
net = Model()
milia_net = torch.nn.DataParallel(net)
milia_net = milia_net.to( DEVICE )

In [24]:
#Model path
Main_folder = '../F(CLR) + F(Res) + F(Seg)/'
record_data_folder = "milia_like_cyst/record-data/"
choosed_folder = 'SIMCLR+Resnet50+Unet/'
model_name = 'SIMCLR+Resnet50+Unet_best_model.pt'
milia_Model_path = os.path.join(Main_folder, record_data_folder, choosed_folder, model_name)

In [25]:
checkpoint = torch.load(milia_Model_path, map_location='cuda:0')
milia_net.load_state_dict(checkpoint['model'])

<All keys matched successfully>

# Globules

In [26]:
net = Model()
globules_net = torch.nn.DataParallel(net)
globules_net = globules_net.to( DEVICE )

In [27]:
#Model path
Main_folder = '../F(CLR) + F(Res) + F(Seg)/'
record_data_folder = "globules/record-data/"
choosed_folder = 'SIMCLR+Resnet50+Unet/'
model_name = 'SIMCLR+Resnet50+Unet_best_model.pt'
globules_Model_path = os.path.join(Main_folder, record_data_folder, choosed_folder, model_name)

In [28]:
checkpoint = torch.load(globules_Model_path, map_location='cuda:0')
globules_net.load_state_dict(checkpoint['model'])

<All keys matched successfully>

# softmax

In [29]:
class LogisticRegression(torch.nn.Module): 
    def __init__(self, input_dim = 10240, output_dim = 1): 
        super(LogisticRegression, self).__init__() 
        #self.linear = torch.nn.Linear(input_dim, output_dim) 
        self.linear1 = torch.nn.Linear(input_dim, 2048) 
        self.linear2 = torch.nn.Linear(2048, 512) 
        self.linear3 = torch.nn.Linear(512, output_dim) 
        
    def forward(self, x): 
        #outputs = torch.sigmoid(self.linear(x)) 
        output1 = self.linear1(x)
        output2 = self.linear2(output1)
        output3 = self.linear3(output2)
        
        return nn.Sigmoid()(output3).type(torch.float64)

# init

In [30]:
criterion = nn.BCELoss()
model = LogisticRegression()

optimizer = torch.optim.Adam(
            params=filter( lambda p: p.requires_grad, model.parameters() ),
            lr=args.learn_rate,
            weight_decay=args.weight_decay )

#init model
criterion.to( DEVICE )
model.to( DEVICE )

LogisticRegression(
  (linear1): Linear(in_features=10240, out_features=2048, bias=True)
  (linear2): Linear(in_features=2048, out_features=512, bias=True)
  (linear3): Linear(in_features=512, out_features=1, bias=True)
)

In [31]:
#Model path
record_data_folder = "record-data/"
choosed_folder = 'Mel_detection/'
model_name = 'Resnet50_best_model.pt'
Logistic_Model_path = os.path.join(record_data_folder, choosed_folder, model_name)

In [32]:
checkpoint = torch.load(Logistic_Model_path, map_location='cuda:0')
model.load_state_dict(checkpoint['model'])

<All keys matched successfully>

# Create list/seed

In [33]:
# stats
best_loss = np.inf
train_loss = []
train_acc = []
train_auc_scores = []
train_data_time = []
train_batch_time = []
val_loss = []
val_acc = []
val_auc_scores = []
val_data_time = []
val_batch_time = []


# set seed
seed = random.randint(0, 1e5)
torch.manual_seed( seed )
random.seed( seed )
np.random.seed( seed )

# load exp
load_experiment(args)
log( f'seed is: {seed}' )
log( f'args: {args}' )

exp dir: ./record-data\Mel_detection not empty. Please delete all files in dir and rerun train command.
04/28 06:50:09 AM Exp Name: Mel_detection


04/28 06:50:09 AM Results will be stored in ./record-data\Mel_detection
04/28 06:50:09 AM seed is: 43507
04/28 06:50:09 AM args: Namespace(backbone='resnet50', batch_size=8, cls_type='single', exp='Mel_detection', hidden_dim=512, input_dir='../h5py/', input_dir1='../../Data/', learn_rate=0.0001, num_classes=1, num_epochs=30, num_steps=-1, num_workers=4, record_root_dir='./record-data', report_freq=10, resume=False, save_name='Resnet50', tune_mode='fine-tune', weight_decay=0.0001)


In [34]:
name = args.exp
exp_dir = os.path.join( args.record_root_dir, name )
args = args

# dataloader
num_classes = 1
dataloader = dataloader.get_dataloader( args )

# experiment params
num_epochs = args.num_epochs
current_epoch = 0
best_epoch = 0
report_freq = args.report_freq

# Train; Val

In [35]:
def concate_5120(imgs):
    
    with torch.no_grad():
        
        globules_net.eval()
        negative_net.eval()
        milia_net.eval()
        pigment_net.eval()
        streaks_net.eval()
        
        pred1, _ = globules_net( imgs )
        pred2, _  = negative_net( imgs )
        pred3, _  = milia_net( imgs )
        pred4, _  = pigment_net( imgs )
        pred5, _  = streaks_net( imgs )
        
        concate_input = torch.cat((pred1, pred2, pred3, pred4, pred5), 1)
        
    return concate_input

In [36]:
#********************val********************
model.eval()
loss_meter = utils.AverageMeter()
acc_meter = utils.AverageMeter()
data_time_meter = utils.AverageMeter()
batch_time_meter = utils.AverageMeter()
all_steps = len( dataloader[ 'dark' ] )
num_steps = args.num_steps if args.num_steps > 0 else all_steps
all_preds, all_labels = [], []
start = time.time()
# import pdb; pdb.set_trace()
with torch.no_grad():
    for batch_id, ( img_ids, imgs, labels, masks ) in enumerate(dataloader[ 'dark' ]):
        data_time = time.time() - start
        # break if we have iterated the required number of steps
        if batch_id >= num_steps: break
        # move data to device
        batch_size = len( imgs )
        imgs = imgs.to( DEVICE )
        labels = labels.to( DEVICE )
        # get model predictions
        #pred = model( imgs )
        concate_input = concate_5120(imgs)
        pred = model( concate_input )
        pred = torch.squeeze(pred,1) # [8,1]  => [8]

        loss = criterion( pred, labels )
        # stats
        loss_meter.update( loss.item(), batch_size )
        pred, labels = pred.detach().cpu(), labels.cpu()
        acc = utils.accuracy( pred, labels )
        acc_meter.update( acc, batch_size )
        all_preds.append( pred )
        all_labels.append( labels )
        batch_time = time.time() - start
        data_time_meter.update( data_time )
        batch_time_meter.update( batch_time )
        # log
        if ( batch_id+1 ) % report_freq == 0:
            log_epoch_stats( 'VAL', batch_id, num_steps, loss_meter.avg, acc_meter.avg, 0, data_time, batch_time )
        start = time.time()

val_loss.append( loss_meter.avg )
val_acc.append( acc_meter.avg )
val_batch_time.append( batch_time_meter.avg )
val_data_time.append( data_time_meter.avg )
all_labels = torch.cat(all_labels, dim=0)
all_preds = torch.cat(all_preds, dim=0)
auc_scores = roc_auc_score(all_labels, all_preds, average=None)
val_auc_scores.append( auc_scores )
mean_auc_score = auc_scores.mean()

log_epoch_stats( 'VAL', batch_id, num_steps, loss_meter.avg, acc_meter.avg, mean_auc_score,
            data_time_meter.avg, batch_time_meter.avg )
log_auc_scores( auc_scores )

04/28 06:50:12 AM | VAL | EPOCH [01/30] Step [0003/0003] Loss: 1.02 acc: 0.41 mAUC: 0.74 Data time: 0.31 Batch time: 0.89 
04/28 06:50:12 AM AUC Scores: Mel: 0.74 mAUC: 0.74 
