In [None]:
from xgboost import XGBRegressor, XGBClassifier
import xgboost as xgb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.utils import shuffle
from sklearn.utils import class_weight
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from datetime import date, timedelta
import os

import neuralNets

def oneHotEncodeData3Classes(targets):
    j=0
    Y_val = np.zeros((targets.shape[0], 3))
    for j in range(targets.shape[0]):
        if targets[j] == 0:
            Y_val[j, 0] = 1
        elif targets[j] == 1:
            Y_val[j, 1] = 1
        elif targets[j] == 2:
            Y_val[j, 2] = 1
        else:
            print("something went wrong, new class", targets[j])
    return Y_val

In [None]:
tic = time.time()
training_data = pd.read_csv("data/numerai_datasets_02.05.21/numerai_training_data.csv")
feature_cols = training_data.columns[training_data.columns.str.startswith('feature')]

training_data[feature_cols] = training_data[feature_cols].astype(np.float16)
training_data.target        = training_data.target.astype(np.float16)

In [None]:
validation_data = pd.read_csv("data/numerai_datasets_02.05.21/numerai_validation_data.csv")  

# train classifier between 0/1 0.25/0.75 and 0.5

In [None]:
X_train_3class = training_data[feature_cols]
Y_train_3class = training_data.target

Y_train_3class = Y_train_3class.replace(1, 0)
Y_train_3class = Y_train_3class.replace([0.25, 0.75], 1)
Y_train_3class = Y_train_3class.replace(0.5, 2)

X_train_3class = X_train_3class.to_numpy()
Y_train_3class = Y_train_3class.to_numpy()

X_train_3class, Y_train_3class = shuffle(X_train_3class, Y_train_3class)

X_train_3class, X_test_3class, Y_train_3class, Y_test_3class = train_test_split(X_train_3class, Y_train_3class, test_size = 0.3)

X_val_3class = validation_data[feature_cols]
Y_val_3class = validation_data.target

Y_val_3class = Y_val_3class.replace(1, 0)
Y_val_3class = Y_val_3class.replace([0.25, 0.75], 1)
Y_val_3class = Y_val_3class.replace(0.5, 2)

X_val_3class = X_val_3class.to_numpy()
Y_val_3class = Y_val_3class.to_numpy()

In [None]:
modelNN_3classes = neuralNets.defineNN_3classes(X_val_3class.shape[1])
optAdam    = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99)

modelNN_3classes.compile(optimizer=optAdam, loss='categorical_crossentropy', metrics='categorical_accuracy')
modelNN_3classes.load_weights("model_3_overfit_100train_100val.h5")

In [None]:
Y_train_3class_oneHot = oneHotEncodeData3Classes(Y_train_3class)
Y_test_3class_oneHot  = oneHotEncodeData3Classes(Y_test_3class)
Y_val_3class_oneHot   = oneHotEncodeData3Classes(Y_val_3class)

test_history = []
val_history = []
class MyCustomCallback_3class(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        res_eval_1 = self.model.evaluate(X_test_3class, Y_test_3class_oneHot, verbose = 0)
        res_eval_2 = self.model.evaluate(X_val_3class, Y_val_3class_oneHot, verbose = 0)
        test_history.append(res_eval_1[0])
        val_history.append(res_eval_2[0])
        print("test ",res_eval_1)
        print("val", res_eval_2)
my_val_callback_3class = MyCustomCallback_3class()

In [None]:
class_weights = class_weight.compute_class_weight('balanced', np.array([0, 1, 2]), Y_train_3class)
class_weights = dict(enumerate(class_weights))

test_history = []
val_history = []
history = modelNN_3classes.fit(X_train_3class, Y_train_3class_oneHot, epochs = 100, class_weight=class_weights, batch_size = 128*256, callbacks = [my_val_callback_3class])#validation_data=(X_test_3class, Y_test_3class_oneHot))

In [None]:
modelNN_3classes.save_weights("model_3class_100train_92val_noValData.h5")


In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = test_history
val_loss = val_history

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
#print(history.history)
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.plot(epoch_count, val_loss, 'g--')
plt.legend(['Training Loss', 'Test Loss', 'val loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# train classifier between 0.25/0.75

In [None]:
validation_data_025 = validation_data.loc[validation_data.target == 0.25]
validation_data_075 = validation_data.loc[validation_data.target == 0.75]
validation_data_025075 = pd.concat([validation_data_025, validation_data_075], axis = 0, ignore_index = True)


X_val_025075 = validation_data_025075[feature_cols].to_numpy()
Y_val_025075 = validation_data_025075.target
Y_val_025075 = Y_val_025075.replace(0.25, 0)
Y_val_025075 = Y_val_025075.replace(0.75, 1)
Y_val_025075 = Y_val_025075.to_numpy()

# X_val_025075, Y_val_025075 = shuffle(X_val_025075, Y_val_025075)

# training_data_025 = training_data.loc[training_data.target == 0.25]
# training_data_075 = training_data.loc[training_data.target == 0.75]
# training_data_025075 = pd.concat([training_data_025, training_data_075], axis = 0, ignore_index = True)


# X_train_025075 = training_data_025075[feature_cols].to_numpy()
# Y_train_025075 = training_data_025075.target
# Y_train_025075 = Y_train_025075.replace(0.25, 0)
# Y_train_025075 = Y_train_025075.replace(0.75, 1)
# Y_train_025075 = Y_train_025075.to_numpy()

# X_train_025075, Y_train_025075 = shuffle(X_train_025075, Y_train_025075)

# X_train_025075, X_test_025075, Y_train_025075, Y_test_025075 = train_test_split(X_train_025075, Y_train_025075, test_size = 0.3)

In [None]:
class_weights = class_weight.compute_class_weight('balanced', np.array([0, 1]), Y_train_025075)
class_weights = dict(enumerate(class_weights))

In [None]:
modelNN_025075 = neuralNets.defineNN_2classes(X_train_025075.shape[1])
optAdam    = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.99, beta_2=0.99999)
modelNN_025075.compile(optimizer=optAdam, loss='binary_crossentropy', metrics='accuracy')
modelNN_025075.load_weights('model_025075_overfit_99train_99val.h5.h5')

In [None]:
class MyCustomCallback_025075(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        res_eval_1 = self.model.evaluate(X_test_025075, Y_test_025075, verbose = 0)
        res_eval_2 = self.model.evaluate(X_val_025075, Y_val_025075, verbose = 0)
        print("test ",res_eval_1)
        print("val", res_eval_2)
my_val_callback_025075 = MyCustomCallback_025075()

In [None]:
history = modelNN_025075.fit(X_train_025075, Y_train_025075, epochs = 50, batch_size = 256*128*10, class_weight=class_weights, callbacks = [my_val_callback_025075])   #validation_data=(X_val_025075, Y_val_025075))#(X_test_025075, Y_test_025075)) #

In [None]:
modelNN_025075.save_weights("model_025075_class_100train_92test_noValData.h5")

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
#print(history.history)
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# train classifier between 0 and 1

In [None]:
training_data_1 = training_data.loc[training_data.target == 1]
training_data_0 = training_data.loc[training_data.target == 0]
training_data_01 = pd.concat([training_data_1, training_data_0], axis = 0, ignore_index = True)

X_train_01 = training_data_01[feature_cols].to_numpy()
Y_train_01 = training_data_01.target.to_numpy()
X_train_01, Y_train_01 = shuffle(X_train_01, Y_train_01)

X_train_01, X_test_01, Y_train_01, Y_test_01 = train_test_split(X_train_01, Y_train_01, test_size = 0.3)

In [None]:
validation_data_1 = validation_data.loc[validation_data.target == 1]
validation_data_0 = validation_data.loc[validation_data.target == 0]
validation_data_01 = pd.concat([validation_data_1, validation_data_0], axis = 0, ignore_index = True)

X_val_01 = validation_data_01[feature_cols].to_numpy()
Y_val_01 = validation_data_01.target.to_numpy()

In [None]:
test_history = []
val_history = []
class MyCustomCallback_01(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        res_eval_1 = self.model.evaluate(X_test_01, Y_test_01, verbose = 0)
        res_eval_2 = self.model.evaluate(X_val_01, Y_val_01, verbose = 0)
        test_history.append(res_eval_1[0])
        val_history.append(res_eval_2[0])
        print("test ",res_eval_1)
        print("val", res_eval_2)
my_val_callback_01 = MyCustomCallback_01()

class_weights = class_weight.compute_class_weight('balanced', np.array([0, 1]), Y_train_01)
class_weights = dict(enumerate(class_weights))

In [None]:
modelNN_01 = neuralNets.defineNN_small_2classes(X_train_01.shape[1])
optAdam    = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.99)

modelNN_01.compile(optimizer=optAdam, loss='binary_crossentropy', metrics='accuracy')
modelNN_01.load_weights('model_01_overfit_99train_99val.h5.h5')


In [None]:
test_history = []
val_history = []
history = modelNN_01.fit(X_train_01, Y_train_01, epochs = 500, batch_size = 256*128, class_weight=class_weights, callbacks = [my_val_callback_01]) #validation_data=(X_val_01, Y_val_01))

In [None]:
modelNN_01.save_weights("model_01class_100train_95val_noValData.h5")

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = test_history
val_loss = val_history

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
#print(history.history)
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.plot(epoch_count, val_loss, 'g--')
plt.legend(['Training Loss', 'Test Loss', 'val loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()