In [204]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import numpy as np
import pandas as pd
from random import choices
import pickle
import sklearn
from sklearn.metrics import roc_auc_score

In [205]:
SEED = 1111

tf.random.set_seed(SEED)
np.random.seed(SEED)

In [206]:
def load_data():
    
    return pickle.load(open('df_down_sampled_alternative.p', 'rb'))

In [207]:
#Function for splitting data into train/test set!

def train_test_split(test_share, data):
    
    #Split data into initial train/test
    
    train_share = 1 - test_share    
    train_size = int(len(data) * train_share)
    train_set = data[0:train_size]
    test_set = data[train_size:len(data)]    
    
    
    return (train_set, test_set)

In [208]:
def score_predictions(model, X_train, X_test, y_train, y_test):
    
    predictions = model.predict(X_test)
    predictions = (predictions > 0.5).astype(int)
    result = (predictions == y_test).astype(int)       
    test_accuracy = result.mean()
    
    predictions = model.predict(X_train)
    predictions = (predictions > 0.5).astype(int)
    result = (predictions == y_train).astype(int)
    train_accuracy = result.mean()
    
    return train_accuracy, test_accuracy

In [209]:
def contruct_outcome_vector(df, resp_cols):
    
    y = np.stack([(df[c] > 0).astype('int') for c in resp_cols]).T
    
    return y    

In [210]:
def construct_input_data(df):
    
    df = df.loc[:,df.columns.str.contains('feature')]
    
    return df

In [211]:
#Load downsample frame! Zero-weight observations and data < 86 have already been removed!

df = load_data()

In [212]:
df.fillna(df.mean(), inplace = True)

In [213]:
features = [c for c in df.columns if "feature" in c]

In [214]:
resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']

In [215]:
train_set, test_set = train_test_split(test_share = 0.3, data = df)

In [217]:
y_train = contruct_outcome_vector(train_set, resp_cols)

In [218]:
X_train = construct_input_data(train_set)

In [219]:
y_test = contruct_outcome_vector(test_set, resp_cols)

In [220]:
X_test = construct_input_data(test_set)

In [221]:
#Construct NN-model!

model = Sequential()
model.add(Dense(150, input_dim = X_train.shape[1], activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(5, activation = 'sigmoid'))

In [222]:
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics = ['AUC'])

In [None]:
model.fit(X_train, y_train, epochs = 30, batch_size = 5000)

In [198]:
train_accuracy, test_accuracy = score_predictions(model, X_train, X_test, y_train, y_test)

In [201]:
print("Accuarcy on train-set: {}. Accuracy on test-set: {}".format(np.round(train_accuracy,4), np.round(test_accuracy,4)))

Accuarcy on train-set: 0.5523. Accuracy on test-set: 0.5319
