In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import time
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors.kde import KernelDensity
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import lightgbm as lgb
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

import os
print(os.listdir("../input"))

## Load Data

In [None]:
#Load data
train_df = pd.read_csv('../input/train.csv')
test_df = pd.read_csv('../input/test.csv')

In [None]:
train_features = train_df.drop(['target','ID_code'], axis = 1)
test_features = test_df.drop(['ID_code'],axis = 1)
train_target = train_df['target']

In [None]:
df_test = test_features.values

unique_samples = []
unique_count = np.zeros_like(df_test)
basic_features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
for feature in range(df_test.shape[1]):
    _, index_, count_ = np.unique(df_test[:, feature], return_counts=True, return_index=True)
    unique_count[index_[count_ == 1], feature] += 1

# 具有唯一值的样本是真实的，其他样本是假的
real_samples_indexes = np.argwhere(np.sum(unique_count, axis=1) > 0)[:, 0]
synthetic_samples_indexes = np.argwhere(np.sum(unique_count, axis=1) == 0)[:, 0]

In [None]:
def process_data(train_df, test_df):
    idx = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    df = pd.concat([train_df,test_df.ix[real_samples_indexes]])
    for feat in idx:
        temp = df[feat].value_counts(dropna=True)
        train_df["count_"+feat] = train_df[feat].map(temp) 
        test_df["count_"+feat] = test_df[feat].map(temp)       
                
        train_df["sum_"+feat] = ((train_df[feat]-df[feat].mean())*train_df["count_"+feat].map(lambda x:int(x>1))).astype(np.float32)
        test_df["sum_"+feat] = ((test_df[feat]-df[feat].mean())*test_df["count_"+feat].map(lambda x:int(x>1))).astype(np.float32)        
        
        train_df["copy_"+feat]=train_df[feat] * (train_df["count_"+feat] > 1).astype(int)
        test_df["copy_"+feat]=test_df[feat] * (test_df["count_"+feat] > 1).astype(int)
        train_df["copy_"+feat] = train_df["copy_"+feat].replace(0,df[feat].median()) 
        test_df["copy_"+feat] = test_df["copy_"+feat].replace(0,df[feat].median()) 

    return train_df, test_df

In [None]:
train_features , test_features = process_data(train_features,test_features)

In [None]:
feature_name = test_features.columns.tolist()
feature_name.sort()

In [None]:
import gc
train_features = train_features[feature_name]
test_features = test_features[feature_name]
gc.collect()

In [None]:
#### Scaling feature #####
from sklearn.preprocessing import StandardScaler,MinMaxScaler
sc = StandardScaler()
train_features = sc.fit_transform(train_features)
test_features = sc.transform(test_features)

## Split K- fold validation

In [None]:
# Implement K-fold validation to improve results
n_splits = 10 # Number of K-fold Splits

splits = list(StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4590).split(train_features, train_target))

## Cycling learning rate

*copy from ==> https://github.com/anandsaha/pytorch.cyclic.learning.rate/blob/master/cls.py

In [None]:
class CyclicLR(object):
    def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3,
                 step_size=2000, mode='triangular', gamma=1.,
                 scale_fn=None, scale_mode='cycle', last_batch_iteration=-1):

        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is not an Optimizer'.format(
                type(optimizer).__name__))
        self.optimizer = optimizer

        if isinstance(base_lr, list) or isinstance(base_lr, tuple):
            if len(base_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} base_lr, got {}".format(
                    len(optimizer.param_groups), len(base_lr)))
            self.base_lrs = list(base_lr)
        else:
            self.base_lrs = [base_lr] * len(optimizer.param_groups)

        if isinstance(max_lr, list) or isinstance(max_lr, tuple):
            if len(max_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} max_lr, got {}".format(
                    len(optimizer.param_groups), len(max_lr)))
            self.max_lrs = list(max_lr)
        else:
            self.max_lrs = [max_lr] * len(optimizer.param_groups)

        self.step_size = step_size

        if mode not in ['triangular', 'triangular2', 'exp_range'] \
                and scale_fn is None:
            raise ValueError('mode is invalid and scale_fn is None')

        self.mode = mode
        self.gamma = gamma

        if scale_fn is None:
            if self.mode == 'triangular':
                self.scale_fn = self._triangular_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = self._triangular2_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = self._exp_range_scale_fn
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode

        self.batch_step(last_batch_iteration + 1)
        self.last_batch_iteration = last_batch_iteration

    def batch_step(self, batch_iteration=None):
        if batch_iteration is None:
            batch_iteration = self.last_batch_iteration + 1
        self.last_batch_iteration = batch_iteration
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

    def _triangular_scale_fn(self, x):
        return 1.

    def _triangular2_scale_fn(self, x):
        return 1 / (2. ** (x - 1))

    def _exp_range_scale_fn(self, x):
        return self.gamma**(x)

    def get_lr(self):
        step_size = float(self.step_size)
        cycle = np.floor(1 + self.last_batch_iteration / (2 * step_size))
        x = np.abs(self.last_batch_iteration / step_size - 2 * cycle + 1)

        lrs = []
        param_lrs = zip(self.optimizer.param_groups, self.base_lrs, self.max_lrs)
        for param_group, base_lr, max_lr in param_lrs:
            base_height = (max_lr - base_lr) * np.maximum(0, (1 - x))
            if self.scale_mode == 'cycle':
                lr = base_lr + base_height * self.scale_fn(cycle)
            else:
                lr = base_lr + base_height * self.scale_fn(self.last_batch_iteration)
            lrs.append(lr)
        return lrs

## Build Simple NN model (Pytorch)

* add flatten layer before fc layer (improve to 0.89+)
* https://www.kaggle.com/c/santander-customer-transaction-prediction/discussion/82863

* Model structure
* (batch_size, 200) ==> Flatten ==> (batch_size* 200,1) ==> fc1 ==> (batch_size* 200, hidden_layer) ==>Reshape ==>(batch_size, hidden_layer * 200) ==> fc2 ==> (batch_size, 1)

In [None]:
class Simple_NN(nn.Module):
    def __init__(self ,input_dim ,hidden_dim, dropout = 0.5):
        super(Simple_NN, self).__init__()
        
        self.inpt_dim = input_dim
        self.hidden_dim = hidden_dim
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(1, hidden_dim)
        self.fc2 = nn.Linear(int(hidden_dim*input_dim), 1)
    
    def forward(self, x):
        b_size = x.size(0)
        x = x.view(-1, 1)
        y = self.fc1(x)
        y = self.relu(y)
        y = y.view(b_size, -1)
        
        out= self.fc2(y)
        
        return out

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=2, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_auc_min = 0

    def __call__(self, val_auc, model):

        score = val_auc

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_auc, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_auc, model)
            self.counter = 0

    def save_checkpoint(self, val_auc, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_auc_min:.6f} --> {val_auc:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_auc_min = val_auc




## Start training
* Epoch = 40
* Batch size = 256
* Cycling step = 150

In [None]:
def augment(x,y,t=2):
    xs,xn = [],[]
    for i in range(t):
        mask = y>0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xs.append(x1)

    for i in range(t//2):
        mask = y==0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xn.append(x1)

    xs = np.vstack(xs)
    xn = np.vstack(xn)
    ys = np.ones(xs.shape[0])
    yn = np.zeros(xn.shape[0])
    x = np.vstack([x,xs,xn])
    y = np.concatenate([y,ys,yn])
    return x,y

In [None]:
from torch.optim.optimizer import Optimizer
## Hyperparameter
n_epochs = 20000
batch_size = 256

## Build tensor data for torch
train_preds = np.zeros((len(train_features)))
test_preds = np.zeros((len(test_features)))

x_test = np.array(test_features)
x_test_cuda = torch.tensor(x_test, dtype=torch.float).cuda()
test = torch.utils.data.TensorDataset(x_test_cuda)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

avg_losses_f = []
avg_val_losses_f = []
val_auc=[]
## Start K-fold validation
for i, (train_idx, valid_idx) in enumerate(splits):
    valid_preds = []
    x_train = np.array(train_features)
    y_train = np.array(train_target)   
    #x_train , y_train = augment(x_train , y_train)
    
    x_train_fold = torch.tensor(x_train[train_idx.astype(int)], dtype=torch.float).cuda()
    y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis], dtype=torch.float32).cuda()
    
    x_val_fold = torch.tensor(x_train[valid_idx.astype(int)], dtype=torch.float).cuda()
    y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis], dtype=torch.float32).cuda()
    
    ##Loss function
    #loss_fn = FocalLoss(2)
    loss_fn = torch.nn.BCEWithLogitsLoss()
    
    #Build model, initial weight and optimizer
    model = Simple_NN(train_features.shape[1],32)
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.001,weight_decay=1e-5) # Using Adam optimizer
    
    
    ######################Cycling learning rate########################

    step_size = 2000
    base_lr, max_lr = 0.0001, 0.002  
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                             lr=max_lr)
    
    scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr,
               step_size=step_size, mode='exp_range',
               gamma=0.99994)

    ###################################################################

    train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
    valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)
    
    print(f'Fold {i + 1}')
    early_stopping = EarlyStopping(patience=8, verbose=True)
    for epoch in range(n_epochs):
        start_time = time.time()
        model.train()
        avg_loss = 0.
        #avg_auc = 0.
        for i, (x_batch, y_batch) in enumerate(train_loader):
            y_pred = model(x_batch)
            ###################tuning learning rate###############
            if scheduler:
                #print('cycle_LR')
                scheduler.batch_step()

            ######################################################
            loss = loss_fn(y_pred, y_batch)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
            avg_loss += loss.item()/len(train_loader)
            #avg_auc += round(roc_auc_score(y_batch.cpu(),y_pred.detach().cpu()),4) / len(train_loader)
        model.eval()
        
        valid_preds_fold = np.zeros((x_val_fold.size(0)))
        test_preds_fold = np.zeros((len(test_features)))
        
        avg_val_loss = 0.
        avg_val_auc = 0.
        for i, (x_batch, y_batch) in enumerate(valid_loader):
            y_pred = model(x_batch).detach()
            
            avg_val_auc += round(roc_auc_score(y_batch.cpu(),sigmoid(y_pred.cpu().numpy())[:, 0]),4) / len(valid_loader)
            avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
            valid_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]
        valid_preds.append(valid_preds_fold)
            
        elapsed_time = time.time() - start_time 
        print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format(
            epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time))
        
        early_stopping(avg_val_auc, model)
        if early_stopping.early_stop:
            valid_preds_fold = valid_preds[-9]
            print("Early stopping")
            break
    model.load_state_dict(torch.load('checkpoint.pt')) 
    avg_losses_f.append(avg_loss)
    avg_val_losses_f.append(avg_val_loss) 
    
    for i, (x_batch,) in enumerate(test_loader):
        y_pred = model(x_batch).detach()

        test_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]
        
    train_preds[valid_idx] = valid_preds_fold
    test_preds += test_preds_fold / len(splits)
    val_auc.append(roc_auc_score(train_target[valid_idx],train_preds[valid_idx]))
    print(val_auc)
print(np.mean(val_auc))
auc  =  round(roc_auc_score(train_target,train_preds),4)        
print('All \t loss={:.4f} \t val_loss={:.4f} \t auc={:.4f}'.format(np.average(avg_losses_f),np.average(avg_val_losses_f),auc))

In [None]:
test_ID = test_df['ID_code'].values
submission_nn = pd.DataFrame({ "ID_code": test_ID, "target": test_preds})
submission_nn.to_csv('test_preds_submission_nn_10fold.csv', index=False)

In [None]:
train_ID = train_df['ID_code'].values
submission_nn = pd.DataFrame({ "ID_code": train_ID, "target": train_preds})
submission_nn.to_csv('train_preds_submission_nn_10fold.csv', index=False)