In [None]:
import pandas as pd
import numpy as np
import torch
import gc
import torch.nn as nn
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv').drop('id', axis=1)
test  = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv').drop('id', axis=1)
ss    = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
X = train.drop('target', axis=1).copy()
y = train['target'].copy()
X_test = test.copy()

del train
gc.collect()
del test
gc.collect()

In [None]:
scaler = StandardScaler()

X = pd.DataFrame(columns=X.columns, data=scaler.fit_transform(X))
X_test = pd.DataFrame(columns=X_test.columns, data=scaler.transform(X_test))

In [None]:
X.head()

In [None]:
%%time
EPOCHS = 100
KFold = StratifiedKFold(n_splits=5, random_state=786, shuffle=True)

for fold, (train_idx, valid_idx) in enumerate(KFold.split(X, y)):
    X_train, X_valid = X.iloc[train_idx].values, X.iloc[valid_idx].values
    y_train, y_valid = y.iloc[train_idx].values, y.iloc[valid_idx].values
    
    X_train = torch.from_numpy(X_train.astype(np.float32))
    X_valid = torch.from_numpy(X_valid.astype(np.float32))
    y_train = torch.from_numpy(y_train.astype(np.float32).reshape(-1,1))
    y_valid = torch.from_numpy(y_valid.astype(np.float32).reshape(-1,1))
    
    model = nn.Sequential(
        nn.Linear(100,1),
        nn.Sigmoid()
    )
    
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.028)
    
    train_losses = np.zeros(EPOCHS)
    valid_losses = np.zeros(EPOCHS)
    
    scores = np.zeros(EPOCHS)
    
    for ep in range(EPOCHS):
        
        optimizer.zero_grad()
        
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        
        loss.backward()
        optimizer.step()
        
        outputs_valid = model(X_valid)
        loss_valid = criterion(outputs_valid, y_valid)
        
        scores += roc_auc_score(y_valid.detach().numpy(), outputs_valid.detach().numpy())
        
        train_losses[ep] = loss.item()
        valid_losses[ep] = loss_valid.item()
        
    print(f"Fold: {fold + 1} Loss: {np.mean(valid_losses)} AUC: {np.mean(scores)}")
    plt.plot(train_losses, label='train loss')
    plt.plot(valid_losses, label='test loss')
    plt.legend()
    plt.show()

In [None]:
X_test = X_test.values
X_test = torch.from_numpy(X_test.astype(np.float32))
predictions = model(X_test)
predictions = predictions.detach().numpy()

In [None]:
ss['target'] = predictions
ss.to_csv('./lg.csv', index=False)
ss.head()