In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import scale
from sklearn.metrics import roc_auc_score
from torch.optim import Adam
from torch.optim.lr_scheduler import MultiStepLR
from torch.nn.modules.loss import BCEWithLogitsLoss, BCELoss
import torch
from torch.utils.data import TensorDataset, Dataset, DataLoader
import random

In [2]:
train_df = pd.read_csv('train.csv')

In [3]:
label = train_df.target
train = train_df.drop(['ID_code','target'],axis=1)

In [4]:
test = pd.read_csv('test.csv')

In [5]:
test_filtered = pd.read_pickle('test_filtred.pkl')

In [6]:
test_filtered = test_filtered.loc[:,train.columns]

In [7]:
test = test.drop(['ID_code'],axis=1)

In [8]:
train_test = pd.concat([train,test_filtered]).reset_index(drop=True)

In [9]:
vcs_train_test = {}


for col in tqdm(train.columns):

    vcs_train_test[col] = train_test.loc[:,col].value_counts()/300000

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [10]:
for col in tqdm(train.columns):

    vtraintest = vcs_train_test[col]
    
    t = vtraintest[train[col]].fillna(0).values
    train[col+'_train_test_sum_vcs'] = t
    
    train[col+'_train_test_sum_vcs_product'] = train[col]*t

    t = vtraintest[test[col]].fillna(0).values
    test[col+'_train_test_sum_vcs'] = t
    
    test[col+'_train_test_sum_vcs_product'] = test[col]*t


HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [12]:
random_seed = 42

np.random.seed(random_seed)
random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
scaler = StandardScaler()

In [15]:
cols = train.columns

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(pd.DataFrame(scaler.fit_transform(train),columns=cols),label ,test_size=0.2, random_state=42)

In [17]:
test = pd.DataFrame(scaler.transform(test),columns=cols)

In [18]:
device = torch.device('cuda:3')
# device = torch.device('cpu')

In [19]:
train_tensors = []
val_tensors = []
test_tensors = []

for fff in tqdm(range(200)):
    train_t = X_train[[f'var_{fff}',f'var_{fff}_train_test_sum_vcs',f'var_{fff}_train_test_sum_vcs_product']].values
    val_t = X_val[[f'var_{fff}',f'var_{fff}_train_test_sum_vcs',f'var_{fff}_train_test_sum_vcs_product']].values
    test_t =  test[[f'var_{fff}',f'var_{fff}_train_test_sum_vcs',f'var_{fff}_train_test_sum_vcs_product']].values
    train_tensors.append(torch.tensor(train_t, requires_grad=False, device=device, dtype=torch.float32))
    val_tensors.append(torch.tensor(val_t, requires_grad=False, device=device, dtype=torch.float32))

    test_tensors.append(torch.tensor(test_t, requires_grad=False, device=device, dtype=torch.float32))


HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [20]:
val_tensors[0][0]

tensor([ 0.1065, -0.9163, -0.8206], device='cuda:3')

In [21]:
train_tensors = torch.cat(train_tensors,1).view((-1,200,3))
val_tensors = torch.cat(val_tensors,1).view((-1,200,3))

In [22]:
test_tensors = torch.cat(test_tensors,1).view((-1,200,3))

In [23]:
val_tensors[0][0]

tensor([ 0.1065, -0.9163, -0.8206], device='cuda:3')

In [24]:
y_train_t = torch.tensor(y_train.values, requires_grad=False, device=device, dtype=torch.float32)
y_val_t = torch.tensor(y_val.values, requires_grad=False, device=device, dtype=torch.float32)

In [None]:
# nn = NN().to(device)
# nn(val_tensors)

# class customDataset(Dataset):
#     def __init__(self, features, label):
#         self.features = features
#         self.label = label

#     def __len__(self):
#         return len(self.label)

#     def __getitem__(self, idx):
#         features = [fs[idx] for fs in self.features]
#         label = self.label[idx]
#         sample = {'features': features, 'label': label}
#         return sample

# dataset = customDataset(train_tensors,y_train)

In [85]:
class NN(torch.nn.Module):
    random_seed = 42


    def __init__(self, D_in=3, features = 200):
        np.random.seed(random_seed)
        random.seed(random_seed)
        torch.manual_seed(random_seed)
        torch.cuda.manual_seed(random_seed)
        
        super(NN, self).__init__()
        self.layer = []
        layer_size = D_in
        enc_out = 30
        for i in range(features):
            
            layer = torch.nn.Sequential(torch.nn.Linear(layer_size, enc_out//2),
                                       torch.nn.ReLU(),
                                       torch.nn.Linear(enc_out//2, enc_out),
                                       torch.nn.ReLU())
            setattr(self, 'layer_' + str(i), layer)
        

        self.linear3 = torch.nn.Linear(features*enc_out,1)        

    def forward(self, y):
        res = []
        for i in range(200):
            layer = getattr(self, 'layer_' + str(i))
            res.append(layer(y[:,i,:]) )
        y = torch.cat(res,1)
        y = self.linear3(y)
        return y
    

dataset = TensorDataset(train_tensors,y_train_t)
nn = NN().to(device)
loss_f = BCEWithLogitsLoss()

optimizer = Adam(params=nn.parameters(), lr = 0.005)
scheduler = MultiStepLR(optimizer, milestones=[15, 25, 35, 55], gamma=0.5)
batch_size = 2048

In [86]:
random_seed = 42

np.random.seed(random_seed)
random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)

for epoch in tqdm(range(1000)):
    dl = DataLoader(dataset, batch_size=batch_size, shuffle=True,num_workers=0)
    for data,label in dl:
        pred = nn(data)
        loss = loss_f(pred, torch.unsqueeze(label,-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        val_pred = nn(val_tensors)

        print('EPOCH {}'.format(epoch))
        print('LOSS: ',loss_f(val_pred, torch.unsqueeze(y_val_t,-1)).detach().cpu().numpy())
        print('AUC: ',roc_auc_score(y_val,val_pred.detach().cpu().numpy()))
        print('='*50)
        scheduler.step()

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

EPOCH 0
LOSS:  0.21647617
AUC:  0.8856475014774677
EPOCH 1
LOSS:  0.20513403
AUC:  0.8993997134809668
EPOCH 2
LOSS:  0.19711518
AUC:  0.9065939039186015
EPOCH 3
LOSS:  0.19447933
AUC:  0.9097528066140789
EPOCH 4
LOSS:  0.20154816
AUC:  0.9119029978471473
EPOCH 5
LOSS:  0.1915914
AUC:  0.9128838020971145
EPOCH 6
LOSS:  0.19141826
AUC:  0.9136082814901059
EPOCH 7
LOSS:  0.19238143
AUC:  0.915361619245401
EPOCH 8
LOSS:  0.1924795
AUC:  0.9154098161230144
EPOCH 9
LOSS:  0.18816702
AUC:  0.9162106205523219
EPOCH 10
LOSS:  0.19196324
AUC:  0.916917050335318
EPOCH 11
LOSS:  0.18764824
AUC:  0.9172302365625395
EPOCH 12
LOSS:  0.185636
AUC:  0.9171841981599446
EPOCH 13
LOSS:  0.1894116
AUC:  0.9168675413768274
EPOCH 14
LOSS:  0.18588728
AUC:  0.9172118300393521
EPOCH 15
LOSS:  0.18690178
AUC:  0.9173775261389457
EPOCH 16
LOSS:  0.185278
AUC:  0.9189002129928762
EPOCH 17
LOSS:  0.1846408
AUC:  0.9185826282354598
EPOCH 18
LOSS:  0.18799157
AUC:  0.918638585425619
EPOCH 19
LOSS:  0.1852425
AUC:  0

KeyboardInterrupt: 