In [13]:
import lightgbm as lgb
import warnings
import os
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.optim.optimizer import Optimizer

warnings.filterwarnings('ignore')

IS_LOCAL = True
if(IS_LOCAL):
    PATH = Path('/mnt/disks/data/santander-customer-transaction/')
else:
    PATH = Path('../input/')
    
os.listdir(str(PATH))

['train.csv.zip',
 'test.csv',
 'sample_submission.csv.zip',
 'test.csv.zip',
 'train.csv']

# Load Data

In [14]:
%%time
train_df = pd.read_csv(str(PATH / 'train.csv'))
test_df = pd.read_csv(str(PATH / 'test.csv'))

CPU times: user 15.1 s, sys: 769 ms, total: 15.9 s
Wall time: 15.9 s


In [15]:
train_features = train_df.drop(['target', 'ID_code'], axis=1)
test_features = test_df.drop(['ID_code'], axis=1)
train_target = train_df['target']


## Standardization

In [16]:
sc = StandardScaler()
train_features = sc.fit_transform(train_features)
test_features = sc.transform(test_features)

## K-fold validation

In [18]:
n_split = 11
splits = list(StratifiedKFold(n_splits=n_split, shuffle=True).split(
    train_features, train_target))


## Cycling learning rate

In [19]:
class CyclicLR(object):
    def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3, step_size=2000, 
                 mode='triangular', gamma=1., scale_fn=None, scale_mode='cycle',
                 last_batch_iteration=-1):
        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is not an Optimizer'.format(type(optimizer).__name__))
        self.optimizer = optimizer
        
        if isinstance(base_lr, list) or isinstance(base_lr, tuple):
            if len(base_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} base_lr, got {}".format(
                    len(optimizer.param_groups), len(base_lr)))
            self.base_lrs = list(base_lr)    
        else:
            self.base_lrs = [base_lr] * len(optimizer.param_groups)
        
        if isinstance(max_lr, list) or isinstance(max_lr, tuple):
            if len(max_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} max_lr, got {}".format(
                    len(optimizer.param_groups), len(max_lr)))
            self.max_lrs = list(max_lr)
        else:
            self.max_lrs = [max_lr] * len(optimizer.param_groups)
        
        self.step_size = step_size
        
        if mode not in ['triangular', 'triangular2', 'exp_range'] and scale_fn is None:
            raise ValueError('mode is invalid and scale_fn is None')
        
        self.mode = mode
        self.gamma = gamma
        
        if scale_fn is None:
            if self.mode == 'triangular':
                self.scale_fn = self._triangular_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'trangular2':
                self.scale_fn = self._triangular2_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = self._exp_range_scale_fn
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        
        self.batch_step(last_batch_iteration + 1)
        self.last_batch_iteration = last_batch_iteration
        
    def batch_step(self, batch_iteration=None):
        if batch_iteration is None:
            batch_iteration = self.last_batch_iteration + 1
        self.last_batch_iteration = batch_iteration
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr
    
    def _triangular_scale_fn(self, x):
        return 1.
    
    def _triangular2_scale_fn(self, x):
        return 1 / (2.**(x-1))
    
    def _exp_range_scale_fn(self, x):
        return self.gamma**(x)
    
    def get_lr(self):
        step_size = float(self.step_size)
        cycle = np.floor(1 + self.last_batch_iteration / (2 * step_size))
        x = np.abs(self.last_batch_iteration / step_size - 2 * cycle + 1)
        lrs = []
        param_lrs = zip(self.optimizer.param_groups, self.base_lrs, self.max_lrs)
        for param_group, base_lr, max_lr in param_lrs:
            base_height = (max_lr - base_lr) * np.maximum(0, (1-x))
            if self.scale_mode == 'cycle':
                lr = base_lr + base_height * self.scale_fn(cycle)
            else:
                lr = base_lr + base_height * self.scale_fn(self.last_batch_iteration)
            lrs.append(lr)
        return lrs

# Build Simple NN model

In [20]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
    
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout=0.75):
        super(SimpleNN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.relu = nn.ReLU()
        self.droput = dropout
        self.fc1 = nn.Linear(1, hidden_dim)
        self.fc2 = nn.Linear(int(hidden_dim * input_dim), 1)
        
    def forward(self, x):
        bias_size = x.size(0)
        x = x.view(-1, 1)
        y = self.fc1(x)
        y = self.relu(y)
        y = y.view(bias_size, -1)        
        output = self.fc2(y)
        return output    


In [21]:
n_epochs = 40
batch_size = 256

pred_train = np.zeros(train_features.shape[0])
pred_test = np.zeros(test_features.shape[0])

x_test = np.array(test_features)
x_test_cuda = torch.tensor(x_test, dtype=torch.float).cuda()
test = torch.utils.data.TensorDataset(x_test_cuda)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)

total_train_loss = []
total_val_loss = []

In [None]:
for fold_, (train_idx, val_idx) in enumerate(splits):
    
    x_train_fold = torch.tensor(train_features[train_idx], dtype=torch.float).cuda()
    y_train_fold = torch.tensor(train_target[train_idx, np.newaxis], dtype=torch.float32).cuda()
    
    x_val_fold = torch.tensor(train_features[val_idx], dtype=torch.float).cuda()
    y_val_fold = torch.tensor(train_target[val_idx, np.newaxis], dtype=torch.float32).cuda()

    # Loss function 
    loss_fn = nn.BCEWithLogitsLoss()
    
    # Build model, initial weight and optimizer    
    model = SimpleNN(200, 16)
    model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    
    # Cycling learning rate
    step_size = 2000
    base_lr, max_lr = 0.001, 0.005
    optimizer = optim.Adam(filter(lambda p:p.requires_grad, model.parameters()), lr=max_lr)
    scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, 
                         step_size=step_size, mode='exp_range', gamma=0.99994)
    train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
    valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)
    
    print('===== Fold {} ====='.format(fold_ + 1))
    for epoch in range(n_epochs):
        start_time = time.time()
        model.train()
        avg_loss = 0.
        for i, (x_batch, y_batch) in enumerate(train_loader):
            y_pred = model(x_batch)
            
            if scheduler:
                scheduler.batch_step()
                
            loss = loss_fn(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            avg_loss += loss.item() / len(train_loader)
        model.eval()
        valid_pred_folds = np.zeros((x_val_fold.size(0)))    
        
        avg_val_loss = 0. 
        # avg_val_auc = 0.
        for i, (x_batch, y_batch) in enumerate(valid_loader):
            y_val_pred = model(x_batch).detach()
            # avg_val_auc += round(roc_auc_score(y_batch.cpu(), 
            #       sigmoid(y_pred.cpu().numpy())[:, 0]),4) / len(valid_loader)
            avg_val_loss += loss_fn(y_val_pred, y_batch).item() / len(valid_loader)
            valid_pred_folds[i * batch_size:(i + 1) * batch_size] = sigmoid(y_val_pred.cpu().numpy())[:, 0]
        elapsed_time = time.time() - start_time
        
        print('Epoch {}/{}, loss:{:.4f}, val_loss:{:.4f}, time:{:.2f}s'.format(
            epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time))
        
    total_train_loss.append(avg_loss)
    total_val_loss.append(avg_val_loss)
    
    test_pred_folds = np.zeros((len(test_features)))
    for i, (x_batch,) in enumerate(test_loader):
        y_pred = model(x_batch).detach()
        test_pred_folds[i * batch_size:(i + 1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]

    pred_train[val_idx] = valid_pred_folds    
    pred_test += test_pred_folds / len(splits)
        
cv_score = round(roc_auc_score(train_target, pred_train), 4)      
print('## K-fold, Train Loss:{:.4f}, Valid Loss:{:.4f}, CV score:{:.4f}'.format(
    np.average(total_train_loss), np.average(total_val_loss), cv_score))    

===== Fold 1 =====
Epoch 1/40, loss:0.2328, val_loss:0.2173, time:3.93s
Epoch 2/40, loss:0.2160, val_loss:0.2436, time:3.35s
Epoch 3/40, loss:0.2127, val_loss:0.2219, time:3.33s
Epoch 4/40, loss:0.2089, val_loss:0.2083, time:3.32s
Epoch 5/40, loss:0.2052, val_loss:0.2125, time:3.38s
Epoch 6/40, loss:0.2027, val_loss:0.2096, time:3.33s
Epoch 7/40, loss:0.2046, val_loss:0.2083, time:3.34s
Epoch 8/40, loss:0.2051, val_loss:0.2093, time:3.38s
Epoch 9/40, loss:0.2065, val_loss:0.2104, time:3.32s
Epoch 10/40, loss:0.2045, val_loss:0.2120, time:3.34s
Epoch 11/40, loss:0.2018, val_loss:0.2098, time:3.36s
Epoch 12/40, loss:0.2010, val_loss:0.2099, time:3.42s
Epoch 13/40, loss:0.2022, val_loss:0.2097, time:3.34s
Epoch 14/40, loss:0.2041, val_loss:0.2077, time:3.33s
Epoch 15/40, loss:0.2032, val_loss:0.2081, time:3.36s
Epoch 16/40, loss:0.2017, val_loss:0.2087, time:3.37s
Epoch 17/40, loss:0.2007, val_loss:0.2099, time:3.32s
Epoch 18/40, loss:0.2002, val_loss:0.2118, time:3.42s
Epoch 19/40, loss:

Epoch 32/40, loss:0.2001, val_loss:0.2004, time:3.33s
Epoch 33/40, loss:0.1994, val_loss:0.2003, time:3.27s
Epoch 34/40, loss:0.1988, val_loss:0.2002, time:3.27s
Epoch 35/40, loss:0.1990, val_loss:0.2003, time:3.34s
Epoch 36/40, loss:0.1995, val_loss:0.2007, time:3.34s
Epoch 37/40, loss:0.1994, val_loss:0.2010, time:3.28s
Epoch 38/40, loss:0.1991, val_loss:0.2004, time:3.28s
Epoch 39/40, loss:0.1986, val_loss:0.2015, time:3.36s
Epoch 40/40, loss:0.1982, val_loss:0.2007, time:3.31s
===== Fold 5 =====
Epoch 1/40, loss:0.2368, val_loss:0.2094, time:3.33s
Epoch 2/40, loss:0.2146, val_loss:0.2076, time:3.31s
Epoch 3/40, loss:0.2107, val_loss:0.2080, time:3.32s
Epoch 4/40, loss:0.2084, val_loss:0.2054, time:3.29s
Epoch 5/40, loss:0.2048, val_loss:0.2052, time:3.34s
Epoch 6/40, loss:0.2024, val_loss:0.2045, time:3.31s
Epoch 7/40, loss:0.2041, val_loss:0.2107, time:3.28s
Epoch 8/40, loss:0.2055, val_loss:0.2049, time:3.34s
Epoch 9/40, loss:0.2065, val_loss:0.2099, time:3.27s
Epoch 10/40, loss:

Epoch 23/40, loss:0.1991, val_loss:0.1993, time:3.28s
Epoch 24/40, loss:0.1999, val_loss:0.1996, time:3.30s
Epoch 25/40, loss:0.2006, val_loss:0.1998, time:3.31s
Epoch 26/40, loss:0.2008, val_loss:0.2000, time:3.27s
Epoch 27/40, loss:0.2002, val_loss:0.2001, time:3.26s
Epoch 28/40, loss:0.1988, val_loss:0.1989, time:3.34s
Epoch 29/40, loss:0.1989, val_loss:0.1991, time:3.26s
Epoch 30/40, loss:0.1991, val_loss:0.1988, time:3.28s
Epoch 31/40, loss:0.1998, val_loss:0.1996, time:3.29s
Epoch 32/40, loss:0.1999, val_loss:0.1990, time:3.36s
Epoch 33/40, loss:0.1991, val_loss:0.1990, time:3.31s
Epoch 34/40, loss:0.1985, val_loss:0.2014, time:3.28s
Epoch 35/40, loss:0.1986, val_loss:0.1990, time:3.33s
Epoch 36/40, loss:0.1990, val_loss:0.1988, time:3.30s
Epoch 37/40, loss:0.1994, val_loss:0.1988, time:3.30s
Epoch 38/40, loss:0.1989, val_loss:0.2000, time:3.33s
Epoch 39/40, loss:0.1987, val_loss:0.1984, time:3.29s
Epoch 40/40, loss:0.1981, val_loss:0.1985, time:3.34s
===== Fold 9 =====
Epoch 1/4

In [23]:
cv_score = round(roc_auc_score(train_target, pred_train), 4)      
print('## K-fold, Train Loss:{:.4f}, Valid Loss:{:.4f}, CV score:{:.4f}'.format(
    np.average(total_train_loss), np.average(total_val_loss), cv_score))  

## K-fold, Train Loss:0.1980, Valid Loss:0.2049, CV score:0.8963


In [26]:
file_name = 'nn_submission.csv'
submission_df = test_df[['ID_code']]
submission_df['target'] = pred_test
submission_df.to_csv(file_name, header=True, index=False, mode='w')