In [1]:
import pandas as pd
import numpy as np
import pickle
from matplotlib.pyplot import plot as plt
import random
import datetime
import math
from matplotlib.ticker import MaxNLocator
from colorama import Fore, Back, Style
import gc
import copy

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold
from sklearn.metrics import roc_curve,roc_auc_score,average_precision_score
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR

import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [3]:
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)

In [4]:
train = pd.read_feather('../input/amexfeatureengineering/770_FE_train.feather')
target = train.target
FEATURES = [col for col in train.columns if col not in ['customer_ID','target']]

# The model

Our model has four hidden layers, enriched by a skip connection and a Dropout layer.

In [5]:
class resnetModel(nn.Module):
    def __init__(self, num_features,hidden_size,ispretrain=False):
        super(resnetModel, self).__init__()
        self.ispretrain=ispretrain
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        
        self.batch_norm2 = nn.BatchNorm1d(num_features+hidden_size)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.utils.weight_norm(nn.Linear(num_features+hidden_size, hidden_size))
        self.batch_norm20 = nn.BatchNorm1d(hidden_size)
        self.dropout20 = nn.Dropout(0.5)
        self.dense20 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        

        self.batch_norm3 = nn.BatchNorm1d(2*hidden_size)
        self.dropout3 = nn.Dropout(0.5)
        self.dense3 = nn.utils.weight_norm(nn.Linear(2*hidden_size, hidden_size))
        self.batch_norm30 = nn.BatchNorm1d(hidden_size)
        self.dropout30 = nn.Dropout(0.5)
        self.dense30 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        

        self.batch_norm4 = nn.BatchNorm1d(2*hidden_size)
        self.dropout4 = nn.Dropout(0.5)
        if self.ispretrain:
          self.dense4 = nn.utils.weight_norm(nn.Linear(2*hidden_size, 1))
        else:
          self.dense5 = nn.utils.weight_norm(nn.Linear(2*hidden_size, 1))
    
    def forward(self, x):
        x1 = self.batch_norm1(x)
        x1 = F.elu(self.dense1(x1))
        x = torch.cat([x,x1],1)
        
        x2 = self.batch_norm2(x)
        x2 = self.dropout2(x2)
        x2 = F.elu(self.dense2(x2))
        x2 = self.batch_norm20(x2)
        x2 = self.dropout20(x2)
        x2 = F.elu(self.dense20(x2))
        x = torch.cat([x1,x2],1)

        x3 = self.batch_norm3(x)
        x3 = self.dropout3(x3)
        x3 = F.elu(self.dense3(x3))
        x3 = self.batch_norm30(x3)
        x3 = self.dropout30(x3)
        x3 = F.elu(self.dense30(x3))
        x3 = torch.cat([x2,x3],1)
        
        x3 = self.batch_norm4(x3)
        x3 = self.dropout4(x3)
        if self.ispretrain:
          x3 = self.dense4(x3)
        else:
          x3 = self.dense5(x3)
        return x3

In [6]:
skf = StratifiedKFold(n_splits=5)

for f,(t_idx,v_idx) in enumerate(skf.split(X=train,y=target)):
    train.loc[v_idx,'kfold'] = int(f)

train['kfold'] = train['kfold'].astype(int)    

## Dataset Classes

In [7]:
class AmexDataset:
    def __init__(self,features,target,noise=0.1):
        self.features = features
        self.target = target
        self.noise = noise
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self,idx):
        
        sample = self.features[idx, :].copy()
        sample = self.swap_sample(sample)
        
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.target[idx] ,dtype=torch.float)          
        }
        return dct
    
    def swap_sample(self,sample):
            #print(sample.shape)
            num_samples = self.features.shape[0]
            num_features = self.features.shape[1]
            if len(sample.shape) == 2:
                batch_size = sample.shape[0]
                random_row = np.random.randint(0, num_samples, size=batch_size)
                for i in range(batch_size):
                    random_col = np.random.rand(num_features) < self.noise
                    #print(random_col)
                    sample[i, random_col] = self.features[random_row[i], random_col]
            else:
                batch_size = 1
          
                random_row = np.random.randint(0, num_samples, size=batch_size)
               
            
                random_col = np.random.rand(num_features) < self.noise
                #print(random_col)
                #print(random_col)
       
                sample[ random_col] = self.features[random_row, random_col]
                
            return sample

In [8]:
def train_fn(model,optimizer,scheduler,loss_fn,dataloader,device):
    
    model.train()
    final_loss = 0
    
    for  data in dataloader:
        optimizer.zero_grad()
        inputs,target = data['x'].to(device),data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs[:,0],target.float())
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss

def valid_fn(model,loss_fn,dataloader,device):
    model.eval()
    final_loss = 0
    valid_preds = [] 
    
    for data in dataloader:
        inputs,target = data['x'].to(device),data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs[:,0],target.float())
        final_loss +=loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

In [9]:
class FineTuneScheduler:
    def __init__(self, epochs):
        self.epochs = epochs
        self.epochs_per_step = 0
        self.frozen_layers = []

    def copy_without_top(self, model, num_features):
        self.frozen_layers = []

        model_new = resnetModel(num_features,1024)
        model_new.load_state_dict(model.state_dict())

        # Freeze all weights
        for name, param in model_new.named_parameters():
            layer_index = name.split('.')[0][-1]

            if layer_index == 5:
                continue

            param.requires_grad = False

            # Save frozen layer names
            if layer_index not in self.frozen_layers:
                self.frozen_layers.append(layer_index)

        self.epochs_per_step = self.epochs // len(self.frozen_layers)
        
        hidden_size = 1024
        # Replace the top layers with another ones
        model_new.batch_norm4 = nn.BatchNorm1d(2*hidden_size)
        model_new.dropout4 = nn.Dropout(0.5)
        model_new.dense5 = nn.utils.weight_norm(nn.Linear(2*hidden_size, 1))
        model_new.to(DEVICE)
        return model_new

    def step(self, epoch, model):
        if len(self.frozen_layers) == 0:
            return

        if epoch % self.epochs_per_step == 0:
            last_frozen_index = self.frozen_layers[-1]
            
            # Unfreeze parameters of the last frozen layer
            for name, param in model.named_parameters():
                layer_index = name.split('.')[0][-1]

                if layer_index == last_frozen_index:
                    param.requires_grad = True

            del self.frozen_layers[-1]  # Remove the last layer as unfrozen

In [10]:
DEVICE = 'cuda'
EPOCHS = 15
BATCH_SIZE = 1024
LEARNING_RATE = 1e-3
NFOLDS = 5           #<-- Update
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

In [11]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1

        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
            
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [12]:
def run_training(fold,seed):
    seed_everything(seed)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    def train_model(model,fine_tune_scheduler=None):
    
        x_train,y_train = train_df[FEATURES].values,train_df['target'].values
        x_valid,y_valid = valid_df[FEATURES].values,valid_df['target'].values
        
      #  scaler = StandardScaler()
        
      #  x_train =  scaler.fit_transform(x_train)
      #  x_valid = scaler.transform(x_valid)
        
        train_dataset = AmexDataset(x_train,y_train)
        valid_dataset = AmexDataset(x_valid,y_valid)

        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
        
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
        
        loss_fn = nn.BCEWithLogitsLoss()
        loss_tr = SmoothBCEwLogits(smoothing=0.001)

        oof = np.zeros((len(train),1))
        best_loss = np.inf
        
        for epoch in range(EPOCHS):
            if fine_tune_scheduler is not None:
                fine_tune_scheduler.step(epoch, model)

            train_loss = train_fn(model, optimizer, scheduler, loss_tr, trainloader, DEVICE)
            valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
            print(f"SEED: {seed}, FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss:.6f}, valid_loss: {valid_loss:.6f}")

            if np.isnan(valid_loss):
                break
            
            if valid_loss < best_loss:
                best_loss = valid_loss
                oof[val_idx] = valid_preds
                torch.save(model.state_dict(), f"SEED{seed}_FOLD{fold}_.pth")
       
        return oof

    fine_tune_scheduler = FineTuneScheduler(EPOCHS)
    
    pretrained_model = resnetModel(len(FEATURES),1024)
    pretrained_model.to(DEVICE)
    
    print('1st Stage')
    
    # Train on scored + nonscored targets
    train_model(pretrained_model)
    
    # Load the pretrained model with the best loss
    pretrained_model = resnetModel(len(FEATURES),1024)
    pretrained_model.load_state_dict(torch.load(f"SEED{seed}_FOLD{fold}_.pth"))
    pretrained_model.to(DEVICE)
                                     
    # Copy model without the top layer
    final_model = fine_tune_scheduler.copy_without_top(pretrained_model,len(FEATURES))     
    
    print('2nd Stage / Fine Tuning....')
                                     
    oof = train_model(final_model,fine_tune_scheduler)       
                                    
    return oof                                 

In [13]:
def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(train), 1))
    
    for fold in range(NFOLDS):
        oof_ = run_training(fold, seed)
        oof += oof_
    return oof

In [14]:
# Averaging on multiple SEEDS

SEED = [41,42]  #<-- Update
oof = np.zeros((len(train),1))

for seed in SEED:
    
    oof_ = run_k_fold(NFOLDS, seed)
    oof += oof_ / len(SEED)

1st Stage
SEED: 41, FOLD: 0, EPOCH: 0, train_loss: 0.295759, valid_loss: 0.233448
SEED: 41, FOLD: 0, EPOCH: 1, train_loss: 0.229706, valid_loss: 0.227208
SEED: 41, FOLD: 0, EPOCH: 2, train_loss: 0.225234, valid_loss: 0.222241
SEED: 41, FOLD: 0, EPOCH: 3, train_loss: 0.222848, valid_loss: 0.225898
SEED: 41, FOLD: 0, EPOCH: 4, train_loss: 0.220391, valid_loss: 0.219921
SEED: 41, FOLD: 0, EPOCH: 5, train_loss: 0.218838, valid_loss: 0.220065
SEED: 41, FOLD: 0, EPOCH: 6, train_loss: 0.216608, valid_loss: 0.220168
SEED: 41, FOLD: 0, EPOCH: 7, train_loss: 0.214090, valid_loss: 0.220342
SEED: 41, FOLD: 0, EPOCH: 8, train_loss: 0.210673, valid_loss: 0.223702
SEED: 41, FOLD: 0, EPOCH: 9, train_loss: 0.206708, valid_loss: 0.224151
SEED: 41, FOLD: 0, EPOCH: 10, train_loss: 0.200971, valid_loss: 0.225135
SEED: 41, FOLD: 0, EPOCH: 11, train_loss: 0.193660, valid_loss: 0.231898
SEED: 41, FOLD: 0, EPOCH: 12, train_loss: 0.185052, valid_loss: 0.237297
SEED: 41, FOLD: 0, EPOCH: 13, train_loss: 0.177518,

In [15]:
my_ap = amex_metric_mod(target,oof[:,0])

In [16]:
my_ap

0.7918790490212578

In [17]:
oof = pd.DataFrame({'customer_ID':train.customer_ID,'target':train.target,'oof_pred':oof[:,0]})
oof.to_csv('oof_transfer_learning.csv',index=False)