In [26]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [27]:
import numpy as np
import pandas as pd
import random
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, auc, roc_auc_score
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks

In [28]:
SEED = 42
EPOCHS = 1000
BATCH_SIZE = 2048 
ACTIVATION = 'swish'
LEARNING_RATE = 0.001

In [29]:
def seed_everything(seed=42):
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(SEED)

In [30]:
data = pd.read_csv('D:\CALCUL\python\coursera\ML\kaggle_nov21/train.csv')
test_data = pd.read_csv('D:\CALCUL\python\coursera\ML\kaggle_nov21/test.csv')

In [31]:
X = data.drop('target', axis = 1)
y = data['target']
del data

In [32]:
scaler = StandardScaler()

X = pd.DataFrame(columns=X.columns, data=scaler.fit_transform(X))
test_data = pd.DataFrame(columns=test_data.columns, data=scaler.transform(test_data))

In [33]:
def load_model(name:str):
    
    early_stopping = callbacks.EarlyStopping(
        patience=20,
        min_delta=0,
        monitor='val_loss',
        restore_best_weights=True,
        verbose=0,
        mode='min', 
        baseline=None,
    )

    plateau = callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.1, 
            patience=10, 
            verbose=0,
            mode='min',
            min_lr=0.00001
    )

    model = keras.Sequential([
        layers.Dense(108, activation = ACTIVATION, input_shape = [X.shape[1]]),      
        layers.Dense(64, activation =ACTIVATION), 
        layers.Dense(32, activation =ACTIVATION),
        layers.Dense(1, activation='sigmoid'),
    ])

    model.compile(
        optimizer= keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='binary_crossentropy',
        metrics=['AUC'],
    )
    
    return model, early_stopping, plateau

In [34]:
skf = StratifiedKFold(n_splits=5, random_state=SEED, shuffle=True)
preds = []
scores = []
for fold, (idx_train, idx_valid) in enumerate(skf.split(X, y)):
    X_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
    X_valid, y_valid = X.iloc[idx_valid], y.iloc[idx_valid]
    
    model, early_stopping, plateau  = load_model('version1')
    
    history = model.fit(X_train, y_train,
                validation_data = (X_valid, y_valid),
                batch_size = BATCH_SIZE, 
                epochs = EPOCHS,
                callbacks = [early_stopping, plateau],
                shuffle = True,
                verbose = 0
              )
    pred_valid = model.predict(X_valid)
    fpr, tpr, _ = roc_curve(y_valid, pred_valid)
    score = auc(fpr, tpr)
    scores.append(score)
    
    print(f"Fold: {fold + 1} Score: {score}" "\n")
    print('||'*30, "\n")
    
    test_preds = model.predict(test_data)
    preds.append(test_preds)
    
print(f"Overall Validation Score: {np.mean(scores)}")

Fold: 1 Score: 0.7554401209021917

|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 

Fold: 2 Score: 0.753571762916335

|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 

Fold: 3 Score: 0.75479064142273

|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 

Fold: 4 Score: 0.7542074033140961

|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 

Fold: 5 Score: 0.756880148753184

|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 

Overall Validation Score: 0.7549780154617073


In [35]:
submission = pd.read_csv('D:\CALCUL\python\coursera\ML\kaggle_nov21/sample_submission.csv')
predictions = np.mean(preds, axis=0)

submission['target'] = predictions
submission.to_csv('./submission_Nov21_first_NN_keras.csv', index=False)