In [2]:
import warnings
import time
from data import *
#from data_transforms import *
from apex import amp
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from efficientnet_pytorch import EfficientNet
from catalyst.data.sampler import BalanceClassSampler

#CV2
import cv2

#Importing Tabnet
from pytorch_tabnet.tab_network import TabNet

import datetime
from fastprogress import master_bar, progress_bar
%load_ext autoreload
%autoreload 2
warnings.simplefilter('ignore')

In [6]:
batch_size=32

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
# Defining Categorical variables and their Indexes, embedding dimensions , number of classes each have
df = pd.read_csv('/data/full/folds_13062020.csv')
df_test =pd.read_csv('/data/full/test.csv')
df_test['anatom_site_general_challenge'].fillna('unknown',inplace=True)
df_test['target'] = 0

In [14]:
features = ['sex', 'age_approx', 'anatom_site_general_challenge'] 
cat = ['sex', 'anatom_site_general_challenge']
target = 'target'

categorical_columns = []

for col in cat:    
    print('train', col, df[col].nunique())
    print('test', col, df_test[col].nunique())
    l_enc = LabelEncoder()
    df[col] = l_enc.fit_transform(df[col].values)
    df_test[col] = l_enc.transform(df_test[col].values)

train sex 3
test sex 2
train anatom_site_general_challenge 8
test anatom_site_general_challenge 7


In [47]:
class MelanomaDataset(Dataset):  

    def __init__(self, df: pd.DataFrame, 
                 imfolder: (str, Path), 
                 train: bool = True, 
                 transforms = None, 
                 meta_features = None):
        """
        Class initialization
        Args:
            df (pd.DataFrame): DataFrame with data description
            imfolder (str): folder with images
            train (bool): flag of whether a training dataset is being initialized or testing one
            transforms: image transformation method to be applied
            meta_features (list): list of features with meta information, such as sex and age
            
        """
        self.df = df
        self.imfolder = imfolder
        self.transforms = transforms
        self.train = train
        self.meta_features = meta_features
        
    def __getitem__(self, index):
        im_path = Path(f"{self.imfolder}/{self.df.iloc[index]['image_name']}.jpg")
        x = cv2.imread(str(im_path))
        meta = torch.tensor(self.df.iloc[index][self.meta_features].values, dtype=torch.float)

        if self.transforms:
            x = self.transforms(x)
            
        if self.train:
            y = self.df.iloc[index]['target']
            y_meta = self.one_hot(2, y)                    
            return {'image': x,
                    'label': y,
                    'features': meta,
                    'target': y_meta}
        else:
            return {'image': x,
                    'label': None,
                    'features': meta,
                    'target': None}
    
    def __len__(self):
        return len(self.df)
                       
    @staticmethod
    def one_hot(size, target):
        tensor = torch.zeros(size, dtype=torch.float32)
        tensor[target] = 1.
        return tensor

In [30]:
class CustomTabnet(nn.Module):
    def __init__(self, input_dim, output_dim,n_d=8, n_a=8,n_steps=3, gamma=1.3,
                cat_idxs=[], cat_dims=[3,8], cat_emb_dim=[3,5],n_independent=2, n_shared=2,
                momentum=0.02,mask_type="sparsemax"):
        
        super(CustomTabnet, self).__init__()
        self.tabnet = TabNet(input_dim=input_dim,output_dim=output_dim, n_d=n_d, n_a=n_a,n_steps=n_steps, gamma=gamma,
                             cat_idxs=cat_idxs, cat_dims=cat_dims, cat_emb_dim=cat_emb_dim,n_independent=n_independent,
                             n_shared=n_shared, momentum=momentum,mask_type="sparsemax")
        
        
        
    def forward(self, x):
        return self.tabnet(x)

In [31]:
tabnet = CustomTabnet(2, 2)

In [57]:
list(tabnet.tabnet.modules())[-1].in_features

8

In [59]:
net = EfficientNet.from_pretrained('efficientnet-b0')

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /content/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth


HBox(children=(FloatProgress(value=0.0, max=21388428.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b0


In [7]:
class Effnet(nn.Module):
    def __init__(self, arch, input_dim, output_dim):
        super().__init__()
        self.arch = arch
        self.arch._fc = nn.Linear(in_features=1280, out_features=64, bias=True)
        self.tab = CustomTabnet(input_dim, output_dim)
        self.tab = nn.Sequential(*list(self.tab.modules())[:-1])
        self.ouput = nn.Linear(64 + 8, 1)
        
    def forward(self, inputs):
        """
        No sigmoid in forward because we are going to use BCEWithLogitsLoss
        Which applies sigmoid for us when calculating a loss
        """
        x, meta = inputs['image'], inputs['features']
        cnn_features = self.arch(x)
        meta_features = self.tab(meta)
        features = torch.cat((cnn_features, meta_features), dim=1)
        output = self.ouput(features)
        return output 
    

In [8]:
test = MelanomaDataset(df=test_df,
                       imfolder=TEST, 
                       train=False,
                       transforms=train_transform,  # For TTA
                       meta_features=meta_features)

In [9]:
import gc

epochs = 15  # Number of epochs to run
es_patience = 3  # Early Stopping patience - for how many epochs with no improvements to wait
TTA = 3 # Test Time Augmentation rounds

oof = np.zeros((len(train_df), 1))  # Out Of Fold predictions
preds = torch.zeros((len(test), 1), dtype=torch.float32, device=device)  # Predictions for test test

skf = KFold(n_splits=5, shuffle=True, random_state=47)

In [10]:
for fold,(idxT, idxV) in enumerate(list(skf.split(np.arange(15)))[3:], 4):
    print('=' * 20, 'Fold', fold, '=' * 20)
    
    train_idx = train_df.loc[train_df['fold'].isin(idxT)].index
    val_idx = train_df.loc[train_df['fold'].isin(idxV)].index
    
    
    model_path = f'/out/model_{fold}.pth'  # Path and filename to save model to
    best_val = 0  # Best validation score within this fold
    patience = es_patience  # Current patience counter
    arch = EfficientNet.from_pretrained('efficientnet-b1')
    model = Effnet(arch=arch, n_meta_features=len(meta_features))  # New model for each fold
    if Path(model_path).exists():
        inference = True
        
    model = model.to(device)
    optim = torch.optim.AdamW(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optim, mode='max', patience=1, verbose=True, factor=0.2)
    criterion = nn.BCEWithLogitsLoss()
    
    train = MelanomaDataset(df=train_df.iloc[train_idx].reset_index(drop=True), 
                            imfolder=TRAIN, 
                            train=True, 
                            transforms=train_transform,
                            meta_features=meta_features)
    val = MelanomaDataset(df=train_df.iloc[val_idx].reset_index(drop=True), 
                            imfolder=TRAIN, 
                            train=True, 
                            transforms=test_transform,
                            meta_features=meta_features)
    
    train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(dataset=val, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=False, num_workers=4)
    
    mb = master_bar(range(epochs))

    if not inference:
    
        for epoch in mb:
            start_time = time.time()
            correct = 0
            epoch_loss = 0
            model.train()
            
            for x, y in progress_bar(train_loader, parent=mb, total=int(len(train)/ 64)):
                x[0] = torch.tensor(x[0], device=device, dtype=torch.float32)
                x[1] = torch.tensor(x[1], device=device, dtype=torch.float32)
                y = torch.tensor(y, device=device, dtype=torch.float32)
                optim.zero_grad()
                z = model(x)
                loss = criterion(z, y.unsqueeze(1))
                loss.backward()
                optim.step()
                pred = torch.round(torch.sigmoid(z))  # round off sigmoid to obtain predictions
                correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item()  # tracking number of correctly predicted samples
                epoch_loss += loss.item()
                mb.child.comment = f'{epoch_loss:.4f}'
            train_acc = correct / len(train_idx)
            
            model.eval()  # switch model to the evaluation mode
            val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
            with torch.no_grad():  # Do not calculate gradient since we are only predicting
                # Predicting on validation set
                for j, (x_val, y_val) in progress_bar(enumerate(val_loader), parent=mb, total=int(len(val)/32)):
                    x_val[0] = torch.tensor(x_val[0], device=device, dtype=torch.float32)
                    x_val[1] = torch.tensor(x_val[1], device=device, dtype=torch.float32)
                    y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
                    z_val = model(x_val)
                    val_pred = torch.sigmoid(z_val)
                    val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val[0].shape[0]] = val_pred
                val_acc = accuracy_score(train_df.iloc[val_idx]['target'].values, torch.round(val_preds.cpu()))
                val_roc = roc_auc_score(train_df.iloc[val_idx]['target'].values, val_preds.cpu())
                
                mb.write('Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val acc: {:.3f} | Val roc_auc: {:.3f} | Training time: {}'.format(
                epoch + 1, 
                epoch_loss, 
                train_acc, 
                val_acc, 
                val_roc, 
                str(datetime.timedelta(seconds=time.time() - start_time))[:7]))
                
                scheduler.step(val_roc)
                    
                if val_roc >= best_val:
                    best_val = val_roc
                    patience = es_patience  # Resetting patience since we have new best validation accuracy
                    torch.save(model, model_path)  # Saving current best model
                else:
                    patience -= 1
                    if patience == 0:
                        print('Early stopping. Best Val roc_auc: {:.3f}'.format(best_val))
                        break
                
    model = torch.load(model_path)  # Loading best model of this fold
    model.eval()  # switch model to the evaluation mode
    val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
    with torch.no_grad():
        # Predicting on validation set once again to obtain data for OOF
        for j, (x_val, y_val) in progress_bar(enumerate(val_loader), total=int(len(val)/32)):
            x_val[0] = torch.tensor(x_val[0], device=device, dtype=torch.float32)
            x_val[1] = torch.tensor(x_val[1], device=device, dtype=torch.float32)
            y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
            z_val = model(x_val)
            val_pred = torch.sigmoid(z_val)
            val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val[0].shape[0]] = val_pred
        oof[val_idx] = val_preds.cpu().numpy()
        
        # Predicting on test set
        for _ in range(TTA):
            for i, x_test in progress_bar(enumerate(test_loader), parent=mb, total=len(test)//32):
                x_test[0] = torch.tensor(x_test[0], device=device, dtype=torch.float32)
                x_test[1] = torch.tensor(x_test[1], device=device, dtype=torch.float32)
                z_test = model(x_test)
                z_test = torch.sigmoid(z_test)
                preds[i*test_loader.batch_size:i*test_loader.batch_size + x_test[0].shape[0]] += z_test
        preds /= TTA
        
    del train, val, train_loader, val_loader, x, y, x_val, y_val
    gc.collect()
    
preds /= skf.n_splits

Loaded pretrained weights for efficientnet-b1


In [None]:
# Saving OOF predictions so stacking would be easier
pd.Series(oof.reshape(-1,)).to_csv('oof.csv', index=False)
sub = pd.read_csv(DATA / 'sample_submission.csv')
sub['target'] = preds.cpu().numpy().reshape(-1,)
sub.to_csv('/out/img_meta_submission.csv', index=False)

In [None]:
!kaggle competitions submit -c siim-isic-melanoma-classification -f submission.csv -m "Melanoma Starter Image Size 384"