In [8]:
import gc
import os
import warnings
import numpy as np
import pandas as pd
import lightgbm as lgb
import time
from multiprocessing import Pool

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

warnings.filterwarnings('ignore')

PATH="../input/"#santander-customer-transaction-prediction/"
N_SPLITS = 10
SEED_SKF = 4221




cuda


In [2]:
def merge_train_test(df_train, df_test):
    if "target" not in df_test.columns.values:
        df_test["target"] = -1
    res = pd.concat([df_train, df_test])
    res.reset_index(inplace=True, drop=True)
    return res

def split_train_test(df):
    df_train = df[df["target"] >= 0]
    df_test = df[df["target"] <= -1]
    df_train.reset_index(inplace=True, drop=True)
    df_test.reset_index(inplace=True, drop=True)
    assert list(df_train["ID_code"].values) == [f"train_{i}" for i in range(200000)]
    assert list(df_test["ID_code"].values) == [f"test_{i}" for i in range(200000)]
    return df_train, df_test

In [3]:
%%time
train_df = pd.read_csv(PATH+"train.csv")
test_df = pd.read_csv(PATH+"test.csv")

CPU times: user 13.7 s, sys: 1.14 s, total: 14.8 s
Wall time: 14.8 s


In [4]:
class CountEncoder:
    def fit(self, series):
        self.counts = series.groupby(series).count()
    
    def transform(self, series):
        return series.map(self.counts).fillna(0).astype(np.int16)

In [5]:
# separate into real and fake

df_cnt = pd.DataFrame()
for v in range(200):
    sr = test_df[f"var_{v}"]
    enc = CountEncoder()
    enc.fit(sr)
    df_cnt[f"cnt_{v}"] = enc.transform(sr)
test_df["target"] = -df_cnt.min(1)  # target==-1 -> real, target==-2 -> fake
del df_cnt

In [6]:
df_merged = merge_train_test(train_df, test_df)
df_merged.tail()

Unnamed: 0,ID_code,target,var_0,var_1,var_10,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_11,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_12,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_13,var_130,var_131,...,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_7,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_8,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_9,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99
399995,test_199995,-1,13.1678,1.0136,2.6802,0.0951,9.7517,28.6119,1.7091,13.6924,5.9843,7.0253,22.2816,14.2617,25.2567,6.1565,1.9588,6.5321,2.993,13.3917,0.4961,-0.6465,0.2973,9.7944,3.2861,-1.2859,14.3201,29.1451,13.9596,-3.4051,13.9743,3.3887,12.2799,13.9287,4.0643,-0.3375,19.0097,17.4594,12.9933,0.0775,...,-1.3391,6.1586,-1.8552,4.7364,15.6292,5.0223,-2.6639,14.7625,11.0281,0.0306,1.896,24.9906,32.5007,8.3094,7.6126,25.6503,7.2437,15.0479,-2.7239,7.7879,13.9172,-9.0753,4.8331,4.4553,15.6388,5.5637,4.2547,12.684,0.0995,6.9937,-1.8135,6.8214,9.3799,11.1513,9.6868,-0.1093,23.1655,8.3491,1.4743,-2.3265
399996,test_199996,-1,9.7171,-9.1462,3.2618,-6.1583,20.4441,21.8421,1.7256,8.5803,4.9388,8.8325,7.6675,14.0468,17.116,-2.0445,6.3677,5.4146,1.9411,2.6129,3.4406,3.426,2.3695,-8.9106,-13.8603,-2.0931,13.8246,31.8407,13.8584,-3.226,6.3128,5.8228,12.7894,12.2272,-5.2897,1.6418,21.5449,6.6547,12.9118,0.9432,...,-0.1774,5.5691,2.9599,5.3306,14.8157,5.0154,-3.0839,18.8862,11.1069,0.3795,8.5277,4.1178,41.1888,14.107,7.8092,14.2418,4.2794,15.5445,5.0915,12.1609,15.5275,-4.609,7.3667,8.6763,18.618,10.0517,19.1785,6.242,7.0502,6.3545,-28.9851,6.7225,17.3362,10.8515,16.2477,0.5921,22.7872,25.731,1.7975,-2.0057
399997,test_199997,-2,11.636,2.2769,9.8596,-3.0749,3.1074,12.0068,1.6774,11.3799,4.238,11.1093,25.8779,14.2004,18.504,-0.3412,0.2553,5.2585,3.1487,12.9586,3.6474,3.4343,3.0913,0.1812,-15.1198,5.7577,14.0675,30.2584,10.6134,11.0362,-1.0272,7.6308,12.1679,13.4871,6.6516,-8.1981,10.2774,13.9975,11.1849,0.587,...,-4.8658,7.0998,2.4941,5.0471,4.473,5.0294,-7.1495,18.3794,11.0807,0.2335,10.5464,12.5727,45.7036,23.5307,-4.1993,19.3466,5.8432,12.7485,1.6603,12.0411,15.288,-3.4333,4.0775,-7.9256,26.242,13.7303,3.0444,11.8603,3.0822,5.7341,-16.0234,7.1124,7.4361,10.7057,15.3976,-0.6755,6.7713,-4.9342,1.6797,-0.3975
399998,test_199998,-2,13.5745,-0.5134,5.5,1.503,12.7682,35.0019,1.4541,9.617,4.1599,9.2192,25.7227,13.9813,16.1063,-13.1346,10.8259,4.6892,1.4154,2.5782,4.7853,-2.7854,-0.6011,16.0398,9.9258,-1.0264,14.3051,16.3168,11.5549,4.0843,12.0524,3.7198,12.559,13.6529,-1.9074,2.4535,14.3782,4.2644,11.7188,0.1629,...,-1.3938,5.0658,2.4233,7.4078,4.2432,5.0181,-4.3118,16.828,27.3033,0.9399,7.0138,12.4946,32.2448,21.2218,15.5314,18.903,2.003,15.6688,5.3208,2.3608,9.1805,-4.3252,-2.6203,-1.8899,16.7015,10.2513,1.4672,11.7386,2.0682,8.9032,-8.2107,6.7505,18.3474,11.316,10.6454,-0.9224,11.8991,26.8269,2.7603,0.3056
399999,test_199999,-1,10.4664,1.807,8.4796,0.3854,12.125,27.8602,1.224,9.9291,5.412,9.764,27.7455,14.1016,25.9169,-5.896,3.3216,5.5989,5.056,10.9571,1.1325,-0.7894,3.8041,23.0863,-24.5122,4.6938,13.8333,31.4476,9.4732,5.5884,18.5127,0.1348,12.516,12.8744,1.9396,-3.025,10.3312,2.459,11.208,-0.2407,...,2.0655,7.148,-2.7835,5.1201,20.625,5.0179,2.5964,14.4892,19.1312,0.8608,2.1197,14.4495,25.5517,18.2896,-0.6072,19.7737,8.0756,15.2295,-0.5902,8.011,11.3898,-3.7269,-4.8575,2.085,17.8978,7.3186,9.8698,10.1636,5.395,7.8362,-20.9045,6.7966,9.3417,10.2155,11.5941,1.3084,7.8346,12.8029,0.9685,-0.6401


In [7]:
%%time

# count encoding

count_enc = [None] * 200
df_real = df_merged[df_merged["target"]!=-2]
for v in range(200):
    enc = CountEncoder()
    enc.fit(df_real[f"var_{v}"])
    count_enc[v] = enc.transform(df_merged[f"var_{v}"])
    
for v in range(200):
    df_merged[f"cnt_{v}"] = count_enc[v]

del df_real

CPU times: user 13.1 s, sys: 556 ms, total: 13.7 s
Wall time: 13.7 s


In [9]:
# normalize

for v in range(200):
    df_merged[f"var_{v}_minmax"] = StandardScaler().fit_transform(df_merged[f"var_{v}"].values.reshape(-1, 1))
    df_merged[f"cnt_{v}_minmax"] = MinMaxScaler().fit_transform(df_merged[f"cnt_{v}"].values.reshape(-1, 1))
df_merged.drop(columns=[f"var_{v}" for v in range(200)]+[f"cnt_{v}" for v in range(200)], inplace=True)

In [10]:
train_df, test_df = split_train_test(df_merged)
target = train_df['target']
gc.collect()
print(train_df.shape)
test_df.head()

(200000, 402)


Unnamed: 0,ID_code,target,var_0_minmax,cnt_0_minmax,var_1_minmax,cnt_1_minmax,var_2_minmax,cnt_2_minmax,var_3_minmax,cnt_3_minmax,var_4_minmax,cnt_4_minmax,var_5_minmax,cnt_5_minmax,var_6_minmax,cnt_6_minmax,var_7_minmax,cnt_7_minmax,var_8_minmax,cnt_8_minmax,var_9_minmax,cnt_9_minmax,var_10_minmax,cnt_10_minmax,var_11_minmax,cnt_11_minmax,var_12_minmax,cnt_12_minmax,var_13_minmax,cnt_13_minmax,var_14_minmax,cnt_14_minmax,var_15_minmax,cnt_15_minmax,var_16_minmax,cnt_16_minmax,var_17_minmax,cnt_17_minmax,var_18_minmax,cnt_18_minmax,...,var_180_minmax,cnt_180_minmax,var_181_minmax,cnt_181_minmax,var_182_minmax,cnt_182_minmax,var_183_minmax,cnt_183_minmax,var_184_minmax,cnt_184_minmax,var_185_minmax,cnt_185_minmax,var_186_minmax,cnt_186_minmax,var_187_minmax,cnt_187_minmax,var_188_minmax,cnt_188_minmax,var_189_minmax,cnt_189_minmax,var_190_minmax,cnt_190_minmax,var_191_minmax,cnt_191_minmax,var_192_minmax,cnt_192_minmax,var_193_minmax,cnt_193_minmax,var_194_minmax,cnt_194_minmax,var_195_minmax,cnt_195_minmax,var_196_minmax,cnt_196_minmax,var_197_minmax,cnt_197_minmax,var_198_minmax,cnt_198_minmax,var_199_minmax,cnt_199_minmax
0,test_0,-2,0.130422,0.230769,2.325119,0.090909,0.850188,0.1875,1.287498,0.235294,0.219369,0.055556,0.34037,0.111111,0.505104,0.344828,0.50566,0.090909,0.555971,0.0,1.006658,0.761905,-0.437376,0.1,-0.184102,0.222222,-0.282638,0.189655,-1.767273,0.0,0.002507,0.1875,-0.1781,0.377778,-0.636837,0.125,-1.989929,0.0,0.041496,0.0,...,-0.343983,0.0,0.224065,0.241379,-1.805139,0.0,-1.073747,0.3,1.997741,0.0,0.594309,0.222222,0.097825,0.222222,-1.145956,0.0,-0.535275,0.363636,0.930328,0.407407,-1.178258,0.076923,1.455295,0.076923,-2.26941,0.045455,-0.219363,0.6,-1.3653,0.076923,1.822121,0.1,0.379477,0.444444,1.964372,0.111111,-0.132151,0.230769,-0.521508,0.428571
1,test_1,-2,-0.703966,0.461538,0.712,0.363636,0.224987,0.5625,-0.784449,0.470588,-1.160614,0.388889,0.133003,0.111111,0.701835,0.344828,0.612082,0.363636,-1.408266,0.076923,-1.292635,0.428571,-0.320404,0.1,0.490471,0.0,0.471313,0.165517,-1.288023,0.181818,-0.904411,0.1875,-0.942826,0.333333,-0.904638,0.5,1.539034,0.0,1.771654,0.222222,...,0.599008,0.222222,1.400615,0.172414,-1.234721,0.125,-0.653158,0.2,1.337022,0.0,-1.031481,0.111111,0.891677,0.111111,0.138503,0.333333,0.136037,0.090909,0.235932,0.259259,1.62533,0.076923,0.458483,0.384615,-0.66698,0.318182,1.702963,0.0,-0.770858,0.230769,0.430333,0.85,-0.693959,0.111111,1.043247,0.37037,1.082903,0.153846,-1.69792,0.285714
2,test_2,-2,-1.707028,0.153846,-2.158612,0.0,-0.216359,0.5625,0.124768,0.352941,-0.50288,0.222222,1.889479,0.111111,-0.597304,0.37931,1.086202,0.090909,0.372833,0.307692,0.629023,0.285714,-0.924394,0.2,0.036017,0.0,-1.832103,0.044828,-1.01794,0.0,1.473784,0.0625,-0.876507,0.533333,0.218033,0.375,1.241495,0.111111,0.759725,0.0,...,-0.067008,0.444444,-1.114955,0.137931,-0.394631,0.0,1.053316,0.0,1.259352,0.222222,0.103865,0.555556,-1.268443,0.166667,0.712406,0.333333,-0.973512,0.090909,0.441721,0.333333,-0.869365,0.076923,1.172249,0.153846,0.036107,0.363636,-0.287168,0.1,-1.597908,0.230769,1.58512,0.05,-1.725709,0.0,-2.006472,0.074074,1.337503,0.076923,-1.909412,0.0
3,test_3,-1,-0.701662,0.230769,0.075083,0.636364,0.49696,0.375,-0.106186,0.411765,-1.377678,0.333333,1.046537,0.111111,-0.545666,0.517241,1.177484,0.272727,0.928549,0.230769,-0.089603,0.333333,-0.067821,0.2,-0.303197,0.222222,0.154054,0.224138,1.071498,0.090909,0.546658,0.4375,0.393016,0.355556,0.278633,0.1875,-1.434007,0.111111,-1.672483,0.0,...,2.325036,0.333333,-0.188861,0.448276,0.592055,0.375,-0.806681,0.3,-0.496115,0.111111,1.61632,0.0,-1.7711,0.0,-0.818464,0.0,-0.488132,0.545455,0.573143,0.333333,1.395658,0.153846,0.538404,0.076923,-0.181783,0.272727,0.0637,0.2,-0.894858,0.230769,2.310884,0.15,0.304491,0.111111,0.346488,0.333333,-0.947946,0.461538,-0.088724,0.0
4,test_4,-2,0.341126,0.153846,0.369131,0.181818,1.296046,0.125,0.467879,0.176471,-1.218584,0.166667,-0.448353,0.0,1.672087,0.103448,-1.734057,0.181818,0.812587,0.076923,-0.344251,0.285714,0.857384,0.0,-0.004296,0.333333,0.410281,0.162069,0.093146,0.181818,-1.246926,0.0625,0.018669,0.733333,0.993291,0.25,1.314888,0.0,0.351337,0.222222,...,0.261308,0.555556,0.198969,0.206897,-0.912308,0.0,0.154327,0.1,-0.939631,0.222222,2.08071,0.0,-0.191056,0.333333,-1.254432,0.166667,-0.985247,0.181818,0.902671,0.407407,0.222537,0.153846,0.570048,0.230769,-0.434962,0.318182,0.012746,0.3,0.496773,0.307692,-0.10362,0.5,-1.368411,0.111111,-1.760723,0.111111,-0.645869,0.076923,-0.566131,0.285714


In [11]:
# nn model

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.hidden_size = 64
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv1d(2, self.hidden_size, kernel_size=1)
        self.conv2 = nn.Conv1d(self.hidden_size, self.hidden_size*2, kernel_size=1)
        self.conv3 = nn.Conv1d(self.hidden_size*2, self.hidden_size*4, kernel_size=1)
        self.conv4 = nn.Conv1d(self.hidden_size*4, self.hidden_size*8, kernel_size=1)
        self.conv5 = nn.Conv1d(self.hidden_size*8, self.hidden_size*16, kernel_size=1)
        self.conv6 = nn.Conv1d(self.hidden_size*16, self.hidden_size*32, kernel_size=1)
        
        self.fc = nn.Linear(self.hidden_size*32*200, 2)
        
    def forward(self, x_):
        x = self.conv1(x_)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.relu(x)
        
        x = self.conv3(x)
        x = self.relu(x)
        
        x = self.conv4(x)
        x = self.relu(x)
        
        x = self.conv5(x)
        x = self.relu(x)
        
        x = self.conv6(x)
        x = self.relu(x)
        
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

In [12]:
# dataset

class TrainData(torch.utils.data.Dataset):
    def __init__(self, trn_X, trn_y):
        self.trn_X = trn_X
        self.trn_y = trn_y
        
    def __len__(self):
        return self.trn_X.shape[0]
        
    def __getitem__(self, idx):
        return self.trn_X[idx], self.trn_y[idx], idx
    
    def shuffle(self):
        trn_X = self.trn_X.to("cpu").numpy()
        trn_y = self.trn_y.to("cpu").numpy()
        trn_X_pos = trn_X[trn_y==1].transpose(2,0,1)
        trn_X_neg = trn_X[trn_y==0].transpose(2,0,1)
        for c in trn_X_pos:
            np.random.shuffle(c)
        for c in trn_X_neg:
            np.random.shuffle(c)
        trn_X[trn_y==1] = trn_X_pos.transpose(1,2,0)
        trn_X[trn_y==0] = trn_X_neg.transpose(1,2,0)
        self.trn_X = torch.from_numpy(trn_X).to(device)
    
class ValidData(torch.utils.data.Dataset):
    def __init__(self, val_X, val_y):
        self.val_X = val_X
        self.val_y = val_y
        
    def __len__(self):
        return self.val_X.shape[0]
        
    def __getitem__(self, idx):
        return self.val_X[idx], self.val_y[idx], idx
    
class TestData(torch.utils.data.Dataset):
    def __init__(self, test_X):
        self.test_X = test_X
        
    def __len__(self):
        return self.test_X.shape[0]
        
    def __getitem__(self, idx):
        return self.test_X[idx], -1, idx
    

In [None]:
from scipy.special import logit, expit

BATCH_SIZE = 256
EARLY_STOPPING = 20
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED_SKF)
oof = np.zeros(len(train_df))
fold_oof = np.zeros((N_SPLITS, len(train_df)))
fold_preds = np.zeros((N_SPLITS, len(test_df)))
predictions = np.zeros(len(test_df))

loss_func = nn.CrossEntropyLoss()

for fold_, (trn_idx, val_idx) in enumerate(skf.split(train_df.values, target.values)):
    print("fold n°{}".format(fold_))
    
    features = [f"var_{v}_minmax" for v in range(200)] + [f"cnt_{v}_minmax" for v in range(200)]
    
    trn_X_npy, trn_y_npy = train_df.iloc[trn_idx][features].values.astype(np.float32), target.iloc[trn_idx].values
    val_X_npy, val_y_npy = train_df.iloc[val_idx][features].values.astype(np.float32), target.iloc[val_idx].values
    trn_X, trn_y = torch.tensor(trn_X_npy.reshape(-1, 2, 200)).to(device), torch.tensor(trn_y_npy).to(device)       
    val_X, val_y = torch.tensor(val_X_npy.reshape(-1, 2, 200)).to(device), torch.tensor(val_y_npy).to(device)     
    test_X = torch.tensor(test_df[features].values.astype(np.float32).reshape(-1, 2, 200)).to(device)
    trn_dataset = TrainData(trn_X, trn_y)
    val_dataset = ValidData(val_X, val_y)
    test_dataset = TestData(test_X)
    #trn_loader = torch.utils.data.DataLoader(dataset=trn_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=256) #batch_size=len(val_idx))
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=256)
    filename_net = f"net_{fold_}.pth"
    
    net = Model().to(device)
    optimizer = optim.Adam(net.parameters(), lr=0.00002)
    
    best_epoch = 0
    min_auc = 0.5
    for epoch in range(100):
        if epoch - EARLY_STOPPING > best_epoch:
            break
            
        # train dataset with shuffling
        trn_dataset.shuffle()
        trn_loader = torch.utils.data.DataLoader(dataset=trn_dataset, batch_size=BATCH_SIZE, shuffle=True)

        # train
        net = net.train()
        oof_ = np.zeros((len(trn_idx), 2), dtype=np.float32)

        for data, label, idx in trn_loader:
            optimizer.zero_grad()
            output = net(data)
            loss = loss_func(output, label)
            loss.backward()
            oof_[idx.numpy()] = output.detach().cpu().numpy()
            optimizer.step()
            
        # eval
        net = net.eval()
        with torch.no_grad():
            # train data
            loss = loss_func(torch.from_numpy(oof_), torch.from_numpy(trn_y_npy))
            auc = roc_auc_score(trn_y_npy, oof_[:, 1] - oof_[:, 0])
            print(f"epoch {epoch}: train loss: {loss:.5f}, train auc: {auc:.5f}, ", end="")

            # valid data
            output = np.zeros((len(val_idx), 2), dtype=np.float32)
            for data, _, idx in val_loader:
                output[idx.numpy()] = net(data).detach().cpu().numpy()
            loss = loss_func(torch.from_numpy(output), torch.from_numpy(val_y_npy))
            auc = roc_auc_score(val_y_npy, output[:, 1] - output[:, 0])
            print(f"valid loss: {loss:.5f}, valid auc: {auc:.5f}")

            if auc > min_auc:
                torch.save(net.state_dict(), filename_net)
                min_auc = auc
                best_epoch = epoch

    net.load_state_dict(torch.load(filename_net))
    output = np.zeros((len(val_idx), 2), dtype=np.float32)
    for data, _, idx in val_loader:
        output[idx.numpy()] = net(data).detach().cpu().numpy()
    val_auc = roc_auc_score(val_y_npy, output[:, 1] - output[:, 0])
    print(f"fold {fold_} auc: {val_auc:.5f}")
    oof[val_idx] = expit(output[:, 1] - output[:, 0])
    fold_oof[fold_, val_idx] = oof[val_idx]
    
    output = np.zeros((len(test_dataset), 2), dtype=np.float32)
    for data, _, idx in test_loader:
        output[idx.numpy()] = net(data).detach().cpu().numpy()
    fold_preds[fold_, :] = expit(output[:, 1] - output[:, 0])
    predictions += fold_preds[fold_] / N_SPLITS
    
    break  # due to execution time limitation
    

np.save("oof.npy", oof)
np.save("fold_oof.npy", fold_oof)
np.save("fold_preds.npy", fold_preds)
np.save("predictions.npy", predictions)
print("CV score: {:<8.5f}".format(roc_auc_score(target, oof)))

fold n°0
epoch 0: train loss: 0.26154, train auc: 0.79790, valid loss: 0.21507, valid auc: 0.88499
epoch 1: train loss: 0.20963, train auc: 0.88863, valid loss: 0.20686, valid auc: 0.89543
epoch 2: train loss: 0.20270, train auc: 0.89609, valid loss: 0.20075, valid auc: 0.89984
epoch 3: train loss: 0.20035, train auc: 0.89963, valid loss: 0.19880, valid auc: 0.90284
epoch 4: train loss: 0.19658, train auc: 0.90388, valid loss: 0.19816, valid auc: 0.90618
epoch 5: train loss: 0.19400, train auc: 0.90658, valid loss: 0.19344, valid auc: 0.90826
epoch 6: train loss: 0.19105, train auc: 0.91015, valid loss: 0.19023, valid auc: 0.91140
epoch 7: train loss: 0.18709, train auc: 0.91454, valid loss: 0.18625, valid auc: 0.91423
epoch 8: train loss: 0.18678, train auc: 0.91403, valid loss: 0.19635, valid auc: 0.91646
epoch 9: train loss: 0.18392, train auc: 0.91659, valid loss: 0.18672, valid auc: 0.91767
epoch 10: train loss: 0.18243, train auc: 0.91798, valid loss: 0.18104, valid auc: 0.91874


In [None]:
sub_df = pd.DataFrame({"ID_code":test_df["ID_code"].values})
sub_df["target"] = predictions
sub_df.to_csv("submission.csv", index=False)