In [None]:
!pip install efficientnet_pytorch torchtoolbox

In [None]:
import numpy as np
import pandas as pd
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype

import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torchvision
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau 
import torchtoolbox.transform as transforms
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold
from efficientnet_pytorch import EfficientNet

import os 
import gc

import cv2
import datetime
import random
import warnings
import time

from plotly.offline import iplot
import cufflinks
cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

warnings.simplefilter('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
warnings.simplefilter('ignore')
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(47)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_csv = pd.read_csv('../input/jpeg-melanoma-256x256/train.csv')
test_csv = pd.read_csv('../input/jpeg-melanoma-256x256/test.csv')

In [None]:
pd.isnull(train_csv).sum()

In [None]:
age_filler = train_csv['age_approx'].median()
train_csv['age_approx'] = train_csv['age_approx'].fillna(age_filler)

In [None]:
train_csv['sex'] = train_csv['sex'].map({'male':1, 'female':0})
test_csv['sex']  = test_csv['sex'].map({'male':1, 'female':0})
train_csv['sex'] = train_csv['sex'].fillna(-1)


In [None]:
concat = pd.concat([train_csv['anatom_site_general_challenge'], test_csv['anatom_site_general_challenge']], ignore_index = True)
dummies = pd.get_dummies(concat, dtype = np.uint8, prefix = 'site')

train_csv = pd.concat([train_csv, dummies.iloc[:train_csv.shape[0]]], axis=1)
test_csv  = pd.concat([test_csv,  dummies.iloc[train_csv.shape[0]: ].reset_index(drop=True)],axis = 1)

In [None]:
train_csv.head()

In [None]:
test_csv.head()

In [None]:
train_csv.drop(['anatom_site_general_challenge'], axis = 1, inplace = True)
test_csv.drop(['anatom_site_general_challenge'], axis =1, inplace =True)

In [None]:
train_csv['fold']  = -1 

skf = StratifiedKFold(n_splits=8, shuffle=True)

for fold, (train_idx, val_idx) in enumerate(skf.split(X = train_csv,y = train_csv['target'].values)):
                                                      
    train_csv.loc[val_idx , 'fold'] = fold +1

train_csv.to_csv('train8Fold.csv')

In [None]:
meta_features = ['sex', 'age_approx']  + [ f for f in train_csv.columns if 'site' in f]

In [None]:
meta_features

In [None]:
class Microscope:

  def __init__(self, p):
    
    self.p = p

  def __call__(self, img):


    if random.random() < self.p:
      circle = cv2.circle((np.ones(img.shape) * 255).astype(np.uint8),
                          (img.shape[0]//2, img.shape[1]//2),
                          (random.randint(img.shape[0]//2 - 3 , img.shape[1]//2  + 15)),
                          (0,0,0),
                          -1)
      mask  = circle - 255
      img = np.multiply(img,mask)
    
    return img


class RemoveHair:
    
    def __call__(self, image):
        
        grayscale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

        kernel = cv2.getStructuringElement(1, (17,17))

        blackhat = cv2.morphologyEx(grayscale, cv2.MORPH_BLACKHAT, kernel)

        _, threshold = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)

        final_image = cv2.inpaint(image, threshold, 1, cv2.INPAINT_TELEA)

        return final_image



In [None]:
class Net(nn.Module):

  def __init__(self, model, n_meta_features):
    super().__init__()
    
    self.model = model
    if 'EfficientNet' in str(model.__class__):
      self.model._fc = nn.Linear(in_features=1280, out_features=500, bias=True)
    
    self.meta = nn.Sequential(nn.Linear(n_meta_features, 500),
                              nn.BatchNorm1d(500),
                              nn.ReLU(),
                              nn.Dropout(p=0.2),
                              nn.Linear(500,250),
                              nn.BatchNorm1d(250),
                              nn.ReLU(),
                              nn.Dropout(p=0.2))
    
    self.output = nn.Linear(500 + 250, 1)

  def forward(self, inputs):

    x, meta = inputs
    cnn_features = self.model(x)
    meta_features = self.meta(meta)
    features  = torch.cat((cnn_features, meta_features), dim = 1)
    output = self.output(features)
    
    return output

In [None]:
class MelanomaDataset(Dataset):
  """
    Our Dataset for Melanoma Classification
    img_folder: Path to images directory
    meta_features: Additional Data features to be used
    df: Contains Meta_features
    transforms: Data Augmentation Techniques to be applied
  """
  def __init__(self, df, img_folder, meta_features = None, train = True, transforms = None ):
    
    #super().__init__()

    self.img_folder = img_folder
    self.df = df
    self.meta_features = meta_features
    self.transforms = transforms
    self.train = train
    
  
  def __len__(self):
    return len(self.df)

  
  def __getitem__(self, index):

    img = os.path.join(self.img_folder , self.df.iloc[index]['image_name'] + '.jpg')
    meta = np.array(self.df.iloc[index][meta_features].values, dtype= np.float32)
    x = cv2.imread(img)

    if self.transforms:
      x = self.transforms(x)

    if self.train:
      y = self.df.iloc[index]['target']
      return (x,meta) , y
    else:
      return (x,meta)





In [None]:
train_aug = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.RandomVerticalFlip(),
                                RemoveHair(),
                                Microscope(p=0.5),  
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485,0.456,0.406], std = [0.229, 0.224, 0.225])])

test_aug = transforms.Compose([RemoveHair(),
                               transforms.ToTensor(),
                               transforms.Normalize(mean=[0.485,0.456,0.406], std = [0.229, 0.224, 0.225])])

In [None]:
model = EfficientNet.from_pretrained('efficientnet-b1')

In [None]:
epochs = 8

for fold in range(5):
    
    fold = fold+1
    print( '='*20, 'Fold',fold, '='*20)
    
    
    model_path = f'model_{fold}.pth'
    df_train = train_csv[train_csv.fold != fold].reset_index(drop=True)
    df_valid = train_csv[train_csv.fold == fold].reset_index(drop=True)
    
    best_val=0
    
    model_arch = EfficientNet.from_pretrained('efficientnet-b1')
    model = Net(model = model_arch, n_meta_features = len(meta_features))
    model.to(device)
 
    
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = ReduceLROnPlateau(optimizer=optim, mode='max', patience=1, verbose=True, factor=0.2)
    criterion = nn.BCEWithLogitsLoss()
    
    train = MelanomaDataset(df = df_train,
                            img_folder = '../input/jpeg-melanoma-256x256/train',
                            meta_features = meta_features,
                            train=True,
                            transforms = train_aug)
    
    val   = MelanomaDataset(df = df_valid,
                            img_folder = '../input/jpeg-melanoma-256x256/train',
                            meta_features = meta_features,
                            train = True,
                            transforms = train_aug)
    

    train_loader = DataLoader(dataset = train,
                              batch_size = 64,
                              shuffle = True,
                              num_workers = 1)


    val_loader =  DataLoader(dataset = val,
                             batch_size = 16,
                             shuffle = False,
                             num_workers = 1)

    
   
    for epoch in range(epochs):
        
        start_time = time.time()
        correct = 0
        epoch_loss = 0
        model.train()
        
        for x,y in train_loader:
            
            x[0] = torch.tensor(x[0], device = device, dtype= torch.float32)
            x[1] = torch.tensor(x[1], device = device, dtype= torch.float32)
            y    = torch.tensor( y, device = device, dtype= torch.float32)
            
            optim.zero_grad()
            z = model(x)
            
            loss = criterion(z, y.unsqueeze(1))
            loss.backward()
            optim.step()
            
            
            
            pred = torch.round(torch.sigmoid(z))
            correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item()
            epoch_loss += loss.item()
        
        train_acc = correct/len(df_train)
        model.eval()
        val_preds = torch.zeros((len(df_valid), 1), dtype = torch.float32, device = device)
        
        with torch.no_grad():
            
            for j, (x_val,y_val) in enumerate(val_loader):
                
                x_val[0] = torch.tensor(x_val[0], device=device, dtype = torch.float32)
                x_val[1] = torch.tensor(x_val[1], device=device, dtype = torch.float32)
                y_val = torch.tensor(y_val, device = device, dtype = torch.float32)
                
                z_val = model(x_val)
                val_pred = torch.sigmoid(z_val)
                
                val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val[0].shape[0]] = val_pred
            
            val_acc = accuracy_score(df_valid['target'].values,torch.round(val_preds.cpu()))
            val_roc = roc_auc_score(df_valid['target'].values, val_preds.cpu())
            print('Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val acc: {:.3f} | Val roc_auc: {:.3f} | Training time: {}'.format(
                   epoch + 1, 
                   epoch_loss, 
                   train_acc, 
                   val_acc, 
                   val_roc, 
                   str(datetime.timedelta(seconds=time.time() - start_time))[:7]))
            scheduler.step(val_roc)
            
            if val_roc >= best_val:
                best_val = val_roc
                torch.save({'epoch': epoch,
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optim.state_dict(),
                            'loss': loss}, model_path)
                
    
 
    del train, val, train_loader, val_loader, x, y, x_val, y_val
    gc.collect()




In [None]:
epochs = 10

for fold in range(1):
    
    fold = fold+1
    #print( '='*20, 'Fold',fold, '='*20)
    
    
    model_path = f'model_512.pth'
    df_train = train_csv[train_csv.fold != fold].reset_index(drop=True)
    df_valid = train_csv[train_csv.fold == fold].reset_index(drop=True)
    
    best_val=0
    
    
    model_arch = EfficientNet.from_pretrained('efficientnet-b1')
    model = Net(model = model_arch, n_meta_features = len(meta_features))
    model.to(device)
 
    
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    checkpoint = torch.load('../input/model-256/model_1.pth')
    model.load_state_dict(checkpoint['model_state_dict'])
    optim.load_state_dict(checkpoint['optimizer_state_dict'])
    
    scheduler = ReduceLROnPlateau(optimizer=optim, mode='max', patience=1, verbose=True, factor=0.2)
    criterion = nn.BCEWithLogitsLoss()
    
    train = MelanomaDataset(df = df_train,
                            img_folder = '../input/jpeg-melanoma-512x512/train',
                            meta_features = meta_features,
                            train=True,
                            transforms = train_aug)
    
    val   = MelanomaDataset(df = df_valid,
                            img_folder = '../input/jpeg-melanoma-512x512/train',
                            meta_features = meta_features,
                            train = True,
                            transforms = train_aug)
    

    train_loader = DataLoader(dataset = train,
                              batch_size = 16,
                              shuffle = True,
                              num_workers = 1)


    val_loader =  DataLoader(dataset = val,
                             batch_size = 16,
                             shuffle = False,
                             num_workers = 1)

    
   
    for epoch in range(epochs):
        
        start_time = time.time()
        correct = 0
        epoch_loss = 0
        model.train()
        
        for x,y in train_loader:
            
            x[0] = torch.tensor(x[0], device = device, dtype= torch.float32)
            x[1] = torch.tensor(x[1], device = device, dtype= torch.float32)
            y    = torch.tensor( y, device = device, dtype= torch.float32)
            
            optim.zero_grad()
            z = model(x)
            
            loss = criterion(z, y.unsqueeze(1))
            loss.backward()
            optim.step()
            
            
            
            pred = torch.round(torch.sigmoid(z))
            correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item()
            epoch_loss += loss.item()
        
        train_acc = correct/len(df_train)
        model.eval()
        val_preds = torch.zeros((len(df_valid), 1), dtype = torch.float32, device = device)
        
        with torch.no_grad():
            
            for j, (x_val,y_val) in enumerate(val_loader):
                
                x_val[0] = torch.tensor(x_val[0], device=device, dtype = torch.float32)
                x_val[1] = torch.tensor(x_val[1], device=device, dtype = torch.float32)
                y_val = torch.tensor(y_val, device = device, dtype = torch.float32)
                
                z_val = model(x_val)
                val_pred = torch.sigmoid(z_val)
                
                val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val[0].shape[0]] = val_pred
            
            val_acc = accuracy_score(df_valid['target'].values,torch.round(val_preds.cpu()))
            val_roc = roc_auc_score(df_valid['target'].values, val_preds.cpu())
            print('Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val acc: {:.3f} | Val roc_auc: {:.3f} | Training time: {}'.format(
                   epoch + 1, 
                   epoch_loss, 
                   train_acc, 
                   val_acc, 
                   val_roc, 
                   str(datetime.timedelta(seconds=time.time() - start_time))[:7]))
            scheduler.step(val_roc)
            
            if val_roc >= best_val:
                best_val = val_roc
                torch.save({'epoch': epoch,
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optim.state_dict(),
                            'loss': loss}, model_path)
                
    
 
    del train, val, train_loader, val_loader, x, y, x_val, y_val
    gc.collect()




In [None]:
test = MelanomaDataset(df = test_csv,
                       img_folder = '../input/jpeg-melanoma-512x512/test',
                       meta_features = meta_features,
                       train = False,
                       transforms = test_aug)


test_loader = DataLoader(dataset = test,
                         batch_size = 8,
                         shuffle = False,
                         num_workers = 1)

preds = torch.zeros((len(test_csv), 1), dtype = torch.float32, device = device)
model_arch = EfficientNet.from_pretrained('efficientnet-b1')
model = Net(model = model_arch, n_meta_features = len(meta_features))
model.to(device)

checkpoint = torch.load('../input/model-512/model_512.pth')
model.load_state_dict(checkpoint['model_state_dict'])
    
model.eval()

for i, x in enumerate(test_loader):
    
    x[0] = torch.tensor(x[0], dtype = torch.float32, device = device)
    x[1] = torch.tensor(x[1], dtype = torch.float32, device = device)
    
    z = model(x)
    z_pred = torch.sigmoid(z)
    
    
    preds[i*test_loader.batch_size: i*test_loader.batch_size + x[0].shape[0]]  = z_pred
    
    

    
    