In [1]:
import os
import random
import time
import datetime

import cv2
import pandas as pd
import numpy as np
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']=(20,10)
%matplotlib inline
import seaborn as sns
sns.set_style("dark")

from colorama import Fore, Back, Style

from tqdm.notebook import tqdm, trange
tqdm.pandas()

from sklearn import metrics
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, GroupKFold
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn import preprocessing

import warnings
warnings.filterwarnings("ignore")


  from pandas import Panel


In [2]:
# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import FloatTensor, LongTensor
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau

from efficientnet_pytorch import EfficientNet

In [3]:
import torchtoolbox.transform as transforms

In [4]:
IMAGE_PATH = "../kaggle/jpeg/"
TEST_PATH = "../kaggle/test.csv"
TRAIN_PATH = "../kaggle/train.csv"
SUB_PATH = "../kaggle/sample_submission.csv"


sub = pd.read_csv(SUB_PATH)
test_df = pd.read_csv(TEST_PATH)
train_df = pd.read_csv(TRAIN_PATH)

In [5]:
train_df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0


In [6]:
test_df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,
1,ISIC_0052349,IP_7782715,male,40.0,lower extremity
2,ISIC_0058510,IP_7960270,female,55.0,torso
3,ISIC_0073313,IP_6375035,female,50.0,torso
4,ISIC_0073502,IP_0589375,female,45.0,lower extremity


# train

In [7]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed = 1234
seed_everything(seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## train on small dataset

In [8]:
train_df=pd.read_csv('../kaggle-datasource/melanoma-external-malignant-256/train_concat.csv')

In [9]:
train_df['sex'] = train_df['sex'].map({'male': 1, 'female': 0})
test_df['sex'] = test_df['sex'].map({'male': 1, 'female': 0})
train_df['sex'] = train_df['sex'].fillna(-1)
test_df['sex'] = test_df['sex'].fillna(-1)

In [10]:
# imputing
imp_mean=(train_df["age_approx"].sum())/(train_df["age_approx"].count()-train_df["age_approx"].isna().sum())
train_df['age_approx']=train_df['age_approx'].fillna(imp_mean)
train_df['age_approx'].head()
imp_mean_test=(test_df["age_approx"].sum())/(test_df["age_approx"].count())
test_df['age_approx']=test_df['age_approx'].fillna(imp_mean_test)

In [11]:
train_df['patient_id'] = train_df['patient_id'].fillna(0)

In [12]:
# OHE

concat = pd.concat([train_df['anatom_site_general_challenge'], test_df['anatom_site_general_challenge']], ignore_index=True)
dummies = pd.get_dummies(concat, dummy_na=True, dtype=np.uint8, prefix='site')
train_df = pd.concat([train_df, dummies.iloc[:train_df.shape[0]]], axis=1)
test_df = pd.concat([test_df, dummies.iloc[train_df.shape[0]:].reset_index(drop=True)], axis=1)

In [13]:
[col for col in train_df.columns if 'site_' in col]

['anatom_site_general_challenge',
 'site_anterior torso',
 'site_head/neck',
 'site_lateral torso',
 'site_lower extremity',
 'site_oral/genital',
 'site_palms/soles',
 'site_posterior torso',
 'site_torso',
 'site_upper extremity',
 'site_nan']

In [14]:
meta_features = ['sex', 'age_approx'] + [col for col in train_df.columns if 'site_' in col]
meta_features.remove('anatom_site_general_challenge')

In [15]:
test_df=test_df.drop(["anatom_site_general_challenge"],axis=1)
train_df=train_df.drop(["anatom_site_general_challenge"],axis=1)

In [16]:
train_df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,target,site_anterior torso,site_head/neck,site_lateral torso,site_lower extremity,site_oral/genital,site_palms/soles,site_posterior torso,site_torso,site_upper extremity,site_nan
0,ISIC_2637011,IP_7279968,1.0,45.0,0,0,1,0,0,0,0,0,0,0,0
1,ISIC_0015719,IP_3075186,0.0,45.0,0,0,0,0,0,0,0,0,0,1,0
2,ISIC_0052212,IP_2842074,0.0,50.0,0,0,0,0,1,0,0,0,0,0,0
3,ISIC_0068279,IP_6890425,0.0,45.0,0,0,1,0,0,0,0,0,0,0,0
4,ISIC_0074268,IP_8723313,0.0,55.0,0,0,0,0,0,0,0,0,0,1,0


In [46]:
class MelanomaDataset(Dataset):
    def __init__(self, df: pd.DataFrame, imfolder: str, train: bool = True, transforms = None, meta_features = None):
        
        self.df = df
        self.imfolder = imfolder
        self.transforms = transforms
        self.train = train
        self.meta_features = meta_features
        
    def __getitem__(self, index):
        im_path = os.path.join(self.imfolder, self.df.iloc[index]['image_name'] + '.jpg')
        image = cv2.imread(im_path)
        metadata = np.array(self.df.iloc[index][self.meta_features].values, dtype=np.float32)

        if self.transforms:
            image = self.transforms(image)
            
        if self.train:
            y = self.df.iloc[index]['target']
#             image = image.cuda()
            return (image, metadata), y
        else:
            return (image, metadata)
    
    def __len__(self):
        return len(self.df)
    
    

In [47]:
skf = GroupKFold(n_splits=5)

test = MelanomaDataset(df=test_df,
                       imfolder='../kaggle-datasource/melanoma-external-malignant-256/test/test/', 
                       train=False,
                       transforms=test_transform,
                       meta_features=meta_features)

In [19]:
from torch.multiprocessing import Pool, Process, set_start_method
try:
     set_start_method('spawn')
except RuntimeError:
    pass

# Model

In [20]:
# Config

epochs = 10  # no of times till the loop will iterate over the model
ESpatience = 3 # no of times the model will wait if the loss is not decreased
TTA = 3      # test time augmentation, random augmantation like mirror image performed on thhe input image 
num_workers = 6 # tells DataLoader the number of subprocess to use while data loading
learning_rate = 0.001 # Learning Rate
weight_decay = 0.0  # Decay Factor
lr_patience = 1     # patience for learning rate      
lr_factor = 0.4     
output_size=1    # statics
batch_size1 = 64
batch_size2 = 32

train_len = len(train_df)
test_len = len(test_df)
oof = np.zeros(shape = (train_len, 1))

In [21]:
class EfficientNetwork(nn.Module):
    def __init__(self, output_size, no_columns, b4=False, b2=False):
        super().__init__()
        self.b4, self.b2, self.no_columns = b4, b2, no_columns
        
        # Define Feature part (IMAGE)
        if b4:
            self.features = EfficientNet.from_pretrained('efficientnet-b4')
        elif b2:
            self.features = EfficientNet.from_pretrained('efficientnet-b2')
        else:
            self.features = EfficientNet.from_pretrained('efficientnet-b7')
        
        # (CSV) or Meta Features
        self.csv = nn.Sequential(nn.Linear(self.no_columns, 250),
                                 nn.BatchNorm1d(250),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.3),
                                 
                                 nn.Linear(250, 250),
                                 nn.BatchNorm1d(250),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.3),
                                 
                                 nn.Linear(250, 250),
                                 nn.BatchNorm1d(250),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.3))
        
        # Define Classification part
        if b4:
            self.classification = nn.Sequential(nn.Linear(1792 + 250, 250),
                                                nn.Linear(250, output_size))
        elif b2:
            self.classification = nn.Sequential(nn.Linear(1408 + 250, 250),
                                                nn.Linear(250, output_size))
        else:
            self.classification = nn.Sequential(nn.Linear(2560 + 250, 250),
                                                nn.Linear(250, output_size))
        
        
    def forward(self, image, csv_data, prints=False):    
        
        if prints: print('Input Image shape:', image.shape, '\n'+
                         'Input csv_data shape:', csv_data.shape)
        
        # IMAGE CNN
        image = self.features.extract_features(image)
        if prints: print('Features Image shape:', image.shape)
            
        if self.b4:
            image = F.avg_pool2d(image, image.size()[2:]).reshape(-1, 1792)
        elif self.b2:
            image = F.avg_pool2d(image, image.size()[2:]).reshape(-1, 1408)
        else:
            image = F.avg_pool2d(image, image.size()[2:]).reshape(-1, 2560)
        if prints: print('Image Reshaped shape:', image.shape)
            
        # CSV FNN
        csv_data = self.csv(csv_data)
        if prints: print('CSV Data:', csv_data.shape)
            
        # Concatenate
        image_csv_data = torch.cat((image, csv_data), dim=1)
        
        # CLASSIF
        out = self.classification(image_csv_data)
        if prints: print('Out shape:', out.shape)
        
        return out

# Train

In [22]:
train_transform = transforms.Compose([
#     HairGrowth(hairs = 5,hairs_folder='/kaggle/input/melanoma-hairs/'),
    transforms.RandomResizedCrop(size=256, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=32. / 255.,saturation=0.5,hue=0.01),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
#     HairGrowth(hairs = 5,hairs_folder='/kaggle/input/melanoma-hairs/'),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

In [23]:
#comment out in you don't want to Train

for fold, (train_idx, val_idx) in enumerate(skf.split(X=np.zeros(len(train_df)), y=train_df['target'], groups=train_df['patient_id'].tolist()), 1):
    print(Fore.CYAN,'-' * 20,Style.RESET_ALL,Fore.MAGENTA, 'Fold', fold,Style.RESET_ALL,Fore.CYAN, '-' * 20,Style.RESET_ALL)
    best_val = None
    patience=ESpatience# Best validation score within this fold
    model_path = 'model{Fold}.pth'.format(Fold=fold)  
    train = MelanomaDataset(df=train_df.iloc[train_idx].reset_index(drop=True), 
                            imfolder='../kaggle-datasource/melanoma-external-malignant-256/train/train/', 
                            train=True, 
                            transforms=train_transform,
                            meta_features=meta_features)
    val = MelanomaDataset(df=train_df.iloc[val_idx].reset_index(drop=True), 
                            imfolder='../kaggle-datasource/melanoma-external-malignant-256/train/train/', 
                            train=True, 
                            transforms=test_transform,
                            meta_features=meta_features)
    train_loader = DataLoader(dataset=train, batch_size=batch_size1, shuffle=True, num_workers=0)
    val_loader = DataLoader(dataset=val, batch_size=batch_size2, shuffle=False, num_workers=0)
    test_loader = DataLoader(dataset=test, batch_size=batch_size2, shuffle=False, num_workers=0)
    
    model = EfficientNetwork(output_size=output_size, no_columns=len(meta_features),b2=True)
    model = model.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
    scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='max', 
                                      patience=lr_patience, verbose=True, factor=lr_factor)
    for epoch in trange(epochs, desc='Epoch'):
        start_time = time.time()
        correct = 0
        train_losses = 0

        model.train() #Set the model in train mode
        
        for data, labels in tqdm(train_loader, desc='Batch', leave=False):
                # Save them to device
                data[0] = torch.tensor(data[0], device=device, dtype=torch.float32)
                data[1] = torch.tensor(data[1], device=device, dtype=torch.float32)
                labels = torch.tensor(labels, device=device, dtype=torch.float32)
                
                criterion = nn.BCEWithLogitsLoss()

                # Clear gradients first; very important, usually done BEFORE prediction
                optimizer.zero_grad()

                # Log Probabilities & Backpropagation
                out = model(data[0], data[1])
                loss = criterion(out, labels.unsqueeze(1))
                loss.backward()
                optimizer.step()

                # --- Save information after this batch ---
                # Save loss
                # From log probabilities to actual probabilities
                 # 0 and 1
                train_preds = torch.round(torch.sigmoid(out))
                train_losses += loss.item()
                
                # Number of correct predictions
                correct += (train_preds.cpu() == labels.cpu().unsqueeze(1)).sum().item()

            # Compute Train Accuracy
        train_acc = correct / len(train_idx)
        model.eval()  # switch model to the evaluation mode
        val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
        with torch.no_grad():  # Do not calculate gradient since we are only predicting
            
            for j,(data_val, label_val) in enumerate(tqdm(val_loader, desc='Val: ', leave=False)):
                data_val[0] = torch.tensor(data_val[0], device=device, dtype=torch.float32)
                data_val[1] = torch.tensor(data_val[1], device=device, dtype=torch.float32)
                label_val = torch.tensor(label_val, device=device, dtype=torch.float32)
                z_val = model(data_val[0],data_val[1])
                val_pred = torch.sigmoid(z_val)
                val_preds[j*data_val[0].shape[0]:j*data_val[0].shape[0] + data_val[0].shape[0]] = val_pred
            val_acc = accuracy_score(train_df.iloc[val_idx]['target'].values, torch.round(val_preds.cpu()))
            val_roc = roc_auc_score(train_df.iloc[val_idx]['target'].values, val_preds.cpu())
                
            epochval=epoch + 1
            
            print(Fore.YELLOW,'Epoch: ',Style.RESET_ALL,epochval,'|',Fore.CYAN,'Loss: ',Style.RESET_ALL,train_losses,'|',Fore.GREEN,'Train acc:',Style.RESET_ALL,train_acc,'|',Fore.BLUE,' Val acc: ',Style.RESET_ALL,val_acc,'|',Fore.RED,' Val roc_auc:',Style.RESET_ALL,val_roc,'|',Fore.YELLOW,' Training time:',Style.RESET_ALL,str(datetime.timedelta(seconds=time.time() - start_time)))
                 
                
                
                 
                
            
            scheduler.step(val_roc)
            # During the first iteration (first epoch) best validation is set to None
            if not best_val:
                best_val = val_roc  # So any validation roc_auc we have is the best one for now
                torch.save(model, model_path)  # Saving the model
                continue
                
            if val_roc >= best_val:
                best_val = val_roc
                patience = patience  # Resetting patience since we have new best validation accuracy
                torch.save(model, model_path)  # Saving current best model
            else:
                patience -= 1
                if patience == 0:
                    print(Fore.BLUE,'Early stopping. Best Val roc_auc: {:.3f}'.format(best_val),Style.RESET_ALL)
                    break
                        
    model = torch.load(model_path)  # Loading best model of this fold
    model.eval()  # switch model to the evaluation mode
    val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
    with torch.no_grad():
        # Predicting on validation set once again to obtain data for OOF
        for j, (x_val, y_val) in enumerate(val_loader):
            x_val[0] = torch.tensor(x_val[0], device=device, dtype=torch.float32)
            x_val[1] = torch.tensor(x_val[1], device=device, dtype=torch.float32)
            y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
            z_val = model(x_val[0],x_val[1])
            val_pred = torch.sigmoid(z_val)
            val_preds[j*x_val[0].shape[0]:j*x_val[0].shape[0] + x_val[0].shape[0]] = val_pred
        oof[val_idx] = val_preds.cpu().numpy()

[36m -------------------- [0m [35m Fold 1 [0m [36m -------------------- [0m
Loaded pretrained weights for efficientnet-b2


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 1 | [36m Loss:  [0m 54.77203934453428 | [32m Train acc: [0m 0.9647718972043297 | [34m  Val acc:  [0m 0.9452855245683931 | [31m  Val roc_auc: [0m 0.9704276300955988 | [33m  Training time: [0m 0:11:42.309800


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 2 | [36m Loss:  [0m 42.70628907997161 | [32m Train acc: [0m 0.9742678796732851 | [34m  Val acc:  [0m 0.9297476759628154 | [31m  Val roc_auc: [0m 0.9642616407753126 | [33m  Training time: [0m 0:11:40.300741


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 3 | [36m Loss:  [0m 41.306693855673075 | [32m Train acc: [0m 0.9749983398632047 | [34m  Val acc:  [0m 0.9294820717131475 | [31m  Val roc_auc: [0m 0.9706344861374159 | [33m  Training time: [0m 0:11:41.908094


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 4 | [36m Loss:  [0m 38.50051237922162 | [32m Train acc: [0m 0.9767248821302875 | [34m  Val acc:  [0m 0.951128818061089 | [31m  Val roc_auc: [0m 0.9749083562560126 | [33m  Training time: [0m 0:11:42.714703


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 5 | [36m Loss:  [0m 36.281264348421246 | [32m Train acc: [0m 0.9779865860946942 | [34m  Val acc:  [0m 0.9411686586985392 | [31m  Val roc_auc: [0m 0.972815311316288 | [33m  Training time: [0m 0:11:39.890407


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 6 | [36m Loss:  [0m 35.06518623000011 | [32m Train acc: [0m 0.9784846271332758 | [34m  Val acc:  [0m 0.9426294820717132 | [31m  Val roc_auc: [0m 0.9624754450193904 | [33m  Training time: [0m 0:11:39.820550
Epoch     6: reducing learning rate of group 0 to 4.0000e-04.
[34m Early stopping. Best Val roc_auc: 0.975 [0m

[36m -------------------- [0m [35m Fold 2 [0m [36m -------------------- [0m
Loaded pretrained weights for efficientnet-b2


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 1 | [36m Loss:  [0m 60.00326539389789 | [32m Train acc: [0m 0.9590610266285942 | [34m  Val acc:  [0m 0.9733067729083665 | [31m  Val roc_auc: [0m 0.9617275881030046 | [33m  Training time: [0m 0:11:40.696657


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 2 | [36m Loss:  [0m 45.55652724765241 | [32m Train acc: [0m 0.970549173251876 | [34m  Val acc:  [0m 0.9754316069057105 | [31m  Val roc_auc: [0m 0.969119974704906 | [33m  Training time: [0m 0:11:39.972648


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 3 | [36m Loss:  [0m 43.64851966872811 | [32m Train acc: [0m 0.9720432963676207 | [34m  Val acc:  [0m 0.9763612217795484 | [31m  Val roc_auc: [0m 0.9747585602515741 | [33m  Training time: [0m 0:11:42.377798


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 4 | [36m Loss:  [0m 39.936915438156575 | [32m Train acc: [0m 0.9740686632578525 | [34m  Val acc:  [0m 0.9755644090305445 | [31m  Val roc_auc: [0m 0.9691953943087049 | [33m  Training time: [0m 0:11:43.468912


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 5 | [36m Loss:  [0m 36.41002221405506 | [32m Train acc: [0m 0.9765588684507603 | [34m  Val acc:  [0m 0.9727755644090306 | [31m  Val roc_auc: [0m 0.9725390811057694 | [33m  Training time: [0m 0:11:42.904524
Epoch     5: reducing learning rate of group 0 to 4.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 6 | [36m Loss:  [0m 30.05329697439447 | [32m Train acc: [0m 0.9793478982668172 | [34m  Val acc:  [0m 0.9759628154050465 | [31m  Val roc_auc: [0m 0.976496163808005 | [33m  Training time: [0m 0:11:42.806352


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 7 | [36m Loss:  [0m 28.368290272075683 | [32m Train acc: [0m 0.9812404542134272 | [34m  Val acc:  [0m 0.9766268260292165 | [31m  Val roc_auc: [0m 0.9767265563560755 | [33m  Training time: [0m 0:11:39.782713


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 8 | [36m Loss:  [0m 27.097358575556427 | [32m Train acc: [0m 0.9810412377979946 | [34m  Val acc:  [0m 0.9764940239043824 | [31m  Val roc_auc: [0m 0.9774493697401364 | [33m  Training time: [0m 0:11:40.381334


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 9 | [36m Loss:  [0m 24.87853361153975 | [32m Train acc: [0m 0.9825353609137393 | [34m  Val acc:  [0m 0.9741035856573705 | [31m  Val roc_auc: [0m 0.97604794864583 | [33m  Training time: [0m 0:11:39.687647
[34m Early stopping. Best Val roc_auc: 0.977 [0m

[36m -------------------- [0m [35m Fold 3 [0m [36m -------------------- [0m
Loaded pretrained weights for efficientnet-b2


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 1 | [36m Loss:  [0m 58.105588706210256 | [32m Train acc: [0m 0.961551231821502 | [34m  Val acc:  [0m 0.9613545816733068 | [31m  Val roc_auc: [0m 0.9369577744044116 | [33m  Training time: [0m 0:11:37.892670


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 2 | [36m Loss:  [0m 45.049673543311656 | [32m Train acc: [0m 0.9699183212696726 | [34m  Val acc:  [0m 0.9694555112881806 | [31m  Val roc_auc: [0m 0.9646696436869614 | [33m  Training time: [0m 0:11:37.381908


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 3 | [36m Loss:  [0m 41.15408810926601 | [32m Train acc: [0m 0.9742346769373796 | [34m  Val acc:  [0m 0.9682602921646747 | [31m  Val roc_auc: [0m 0.9676083964364272 | [33m  Training time: [0m 0:11:38.494395


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 4 | [36m Loss:  [0m 38.47846650425345 | [32m Train acc: [0m 0.9742346769373796 | [34m  Val acc:  [0m 0.9657370517928286 | [31m  Val roc_auc: [0m 0.9651371745451489 | [33m  Training time: [0m 0:11:38.344048


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 5 | [36m Loss:  [0m 38.05396496783942 | [32m Train acc: [0m 0.97539677269407 | [34m  Val acc:  [0m 0.9755644090305445 | [31m  Val roc_auc: [0m 0.9702134514699959 | [33m  Training time: [0m 0:11:37.859839


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 6 | [36m Loss:  [0m 36.034161345800385 | [32m Train acc: [0m 0.9763596520353277 | [34m  Val acc:  [0m 0.9686586985391766 | [31m  Val roc_auc: [0m 0.9709121808517702 | [33m  Training time: [0m 0:11:37.623323


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 7 | [36m Loss:  [0m 34.503563438076526 | [32m Train acc: [0m 0.9781858025101269 | [34m  Val acc:  [0m 0.9717131474103585 | [31m  Val roc_auc: [0m 0.9672171646315826 | [33m  Training time: [0m 0:11:38.976837


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 8 | [36m Loss:  [0m 34.150181665085256 | [32m Train acc: [0m 0.9776545587356398 | [34m  Val acc:  [0m 0.9707835325365206 | [31m  Val roc_auc: [0m 0.9612296362517869 | [33m  Training time: [0m 0:11:40.351302
Epoch     8: reducing learning rate of group 0 to 4.0000e-04.
[34m Early stopping. Best Val roc_auc: 0.971 [0m

[36m -------------------- [0m [35m Fold 4 [0m [36m -------------------- [0m
Loaded pretrained weights for efficientnet-b2


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 1 | [36m Loss:  [0m 57.58970647351816 | [32m Train acc: [0m 0.960523257744281 | [34m  Val acc:  [0m 0.968123256740603 | [31m  Val roc_auc: [0m 0.9675936849850308 | [33m  Training time: [0m 0:11:40.675289


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 2 | [36m Loss:  [0m 44.4338690796867 | [32m Train acc: [0m 0.971313788638401 | [34m  Val acc:  [0m 0.9768893611369371 | [31m  Val roc_auc: [0m 0.9667188372152027 | [33m  Training time: [0m 0:11:38.749630


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 3 | [36m Loss:  [0m 40.03032327629626 | [32m Train acc: [0m 0.9738371127859491 | [34m  Val acc:  [0m 0.9620135476158852 | [31m  Val roc_auc: [0m 0.9658921956665143 | [33m  Training time: [0m 0:11:42.265634
Epoch     3: reducing learning rate of group 0 to 4.0000e-04.


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 4 | [36m Loss:  [0m 32.367358142044395 | [32m Train acc: [0m 0.9780537202430359 | [34m  Val acc:  [0m 0.977022180900518 | [31m  Val roc_auc: [0m 0.9715100440198914 | [33m  Training time: [0m 0:11:40.966599


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 5 | [36m Loss:  [0m 31.150723638944328 | [32m Train acc: [0m 0.9791161725156877 | [34m  Val acc:  [0m 0.9754283437375482 | [31m  Val roc_auc: [0m 0.9732821776526107 | [33m  Training time: [0m 0:11:42.585656


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 6 | [36m Loss:  [0m 28.188896940089762 | [32m Train acc: [0m 0.981473488495634 | [34m  Val acc:  [0m 0.9721078496480277 | [31m  Val roc_auc: [0m 0.970289506901101 | [33m  Training time: [0m 0:11:39.767871
[34m Early stopping. Best Val roc_auc: 0.973 [0m

[36m -------------------- [0m [35m Fold 5 [0m [36m -------------------- [0m
Loaded pretrained weights for efficientnet-b2


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 1 | [36m Loss:  [0m 57.86171119008213 | [32m Train acc: [0m 0.9608220724459643 | [34m  Val acc:  [0m 0.9646699428875016 | [31m  Val roc_auc: [0m 0.9692697342046515 | [33m  Training time: [0m 0:11:38.427191


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 2 | [36m Loss:  [0m 45.613595618866384 | [32m Train acc: [0m 0.9698197151299844 | [34m  Val acc:  [0m 0.959224332580688 | [31m  Val roc_auc: [0m 0.960524523191493 | [33m  Training time: [0m 0:11:41.979375


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 3 | [36m Loss:  [0m 41.185760599095374 | [32m Train acc: [0m 0.9746339519904379 | [34m  Val acc:  [0m 0.9524505246380661 | [31m  Val roc_auc: [0m 0.9695482239960158 | [33m  Training time: [0m 0:11:40.778823


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 4 | [36m Loss:  [0m 39.45146223157644 | [32m Train acc: [0m 0.9740695242205917 | [34m  Val acc:  [0m 0.9564351175454908 | [31m  Val roc_auc: [0m 0.9678733755639344 | [33m  Training time: [0m 0:11:41.390769


HBox(children=(FloatProgress(value=0.0, description='Batch', max=471.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Val: ', max=236.0, style=ProgressStyle(description_width=…

[33m Epoch:  [0m 5 | [36m Loss:  [0m 36.43768069613725 | [32m Train acc: [0m 0.9763272352999768 | [34m  Val acc:  [0m 0.9582945942356222 | [31m  Val roc_auc: [0m 0.9552900579445228 | [33m  Training time: [0m 0:11:40.175060
Epoch     5: reducing learning rate of group 0 to 4.0000e-04.
[34m Early stopping. Best Val roc_auc: 0.970 [0m



# Predict

In [60]:
cv2.setNumThreads(0)

In [67]:
test_loader = DataLoader(dataset=test, batch_size=batch_size2, shuffle=False, num_workers=2)

In [68]:
print('Out of the Folds Score:',roc_auc_score(train_df['target'], oof))

Out of the Folds Score: 0.9714285588760848


In [69]:
import gc

In [70]:
model = torch.load('model2.pth')
model.eval()  # switch model to the evaluation mode
preds = torch.zeros((len(test), 1), dtype=torch.float32, device=device)
with torch.no_grad():
    for _ in trange(TTA, desc='TTA'):  
            for i, x_test in enumerate(tqdm(test_loader, desc='Predict', leave=False)):  
                x_test[0] = torch.tensor(x_test[0], device=device, dtype=torch.float32)
                x_test[1] = torch.tensor(x_test[1], device=device, dtype=torch.float32) 
                z_test = model(x_test[0],x_test[1])
                z_test = torch.sigmoid(z_test)
                preds[i*x_test[0].shape[0]:i*x_test[0].shape[0] + x_test[0].shape[0]] += z_test
    preds /= TTA
            
             
    
    gc.collect()   
           
preds /= skf.n_splits 

HBox(children=(FloatProgress(value=0.0, description='TTA', max=3.0, style=ProgressStyle(description_width='ini…

HBox(children=(FloatProgress(value=0.0, description='Predict', max=344.0, style=ProgressStyle(description_widt…




RuntimeError: DataLoader worker (pid(s) 1448107) exited unexpectedly

In [41]:
sub = pd.read_csv('../kaggle/sample_submission.csv')
sub['target'] = preds.cpu().numpy().reshape(-1,)
sub.to_csv('submission.csv', index=False)

In [66]:
from multiprocessing import Pool
p = Pool(5)
def f(x):
    return x*x

p.map(f, [1,2,3])

KeyboardInterrupt: 