In [93]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import numpy as np
import pandas as pd
from random import choices
import pickle
import sklearn
from sklearn.metrics import roc_auc_score

In [64]:
SEED = 1111

tf.random.set_seed(SEED)
np.random.seed(SEED)

In [65]:
def load_data():
    
    return pickle.load(open('df_down_sampled_alternative.p', 'rb'))

In [66]:
#Function for splitting data into train/test set!

def train_test_split(test_share, data):
    
    #Split data into initial train/test
    
    train_share = 1 - test_share    
    train_size = int(len(data) * train_share)
    train_set = data[0:train_size]
    test_set = data[train_size:len(data)]    
    
    
    return (train_set, test_set)

In [67]:
def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)

    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=tf.keras.metrics.AUC(name="AUC"),
    )

    return model

In [68]:
#Load downsample frame! Zero-weight observations and data < 86 have already been removed!

df = load_data()

In [69]:
df.fillna(df.mean(), inplace = True)

In [70]:
df['action'] = ((df['resp'].values) > 0).astype(int)

In [71]:
features = [c for c in df.columns if "feature" in c]

In [72]:
f_mean = np.mean(df[features[1:]].values,axis=0)

In [73]:
resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']

In [74]:
train_set, test_set = train_test_split(test_share = 0.3, data = df)

In [95]:
y_train = np.stack([(train_set[c] > 0).astype('int') for c in resp_cols]).T

In [96]:
X_train = train_set.loc[:, train_set.columns.str.contains('feature')]

In [97]:
y_test = np.stack([(test_set[c] > 0).astype('int') for c in resp_cols]).T

In [98]:
X_test = test_set.loc[:, test_set.columns.str.contains('feature')]

In [100]:
batch_size = 5000
hidden_units = [150, 150, 150]
dropout_rates = [0.2, 0.2, 0.2, 0.2]
label_smoothing = 1e-2
learning_rate = 1e-3

In [101]:
clf = create_mlp(
    len(features), 5, hidden_units, dropout_rates, label_smoothing, learning_rate
    )

In [102]:
clf.fit(X_train, y_train, epochs = 200, batch_size=5000)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x21e076fb160>

In [None]:
pickle.dump(clf, open('tf_model.csv','wb'))

In [103]:
models = []

models.append(clf)
th = 0.5000

In [141]:
pred = models[0].predict(X_test)

In [153]:
pred = (pred > 0.5).astype(int)

In [157]:
result = (pred == y_test).astype(int)

In [169]:
np.mean(result)

0.5310766293684043

In [None]:
pd.DataFrame(result).mean()

In [None]:
print("Finished!")