In [None]:
from xgboost import XGBRegressor, XGBClassifier
import xgboost as xgb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.utils import shuffle
from sklearn.utils import class_weight
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Conv1D, Flatten, MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from datetime import date, timedelta
import os

import neuralNets

def oneHotEncodeData3Classes(targets):
    j=0
    Y_val = np.zeros((targets.shape[0], 3))
    for j in range(targets.shape[0]):
        if targets[j] == 0:
            Y_val[j, 0] = 1
        elif targets[j] == 1:
            Y_val[j, 1] = 1
        elif targets[j] == 2:
            Y_val[j, 2] = 1
        else:
            print("something went wrong, new class", targets[j])
    return Y_val

In [None]:
def encodeTrainingData(X_train_2D):
    j = 0
    n_trainingSamples, n_features = X_train_2D.shape
    n_classes = 5
    X_train_3D = np.zeros((n_trainingSamples, n_features, n_classes))
    for j in range(n_trainingSamples):
        for i in range(n_features):
            curValue = X_train[j, i]
            if curValue == 0:
                X_train_3D[j, i, 0] = 1
            elif curValue == 0.25:
                X_train_3D[j, i, 1] = 1
            elif curValue == 0.5:
                X_train_3D[j, i, 2] = 1
            elif curValue == 0.75:
                X_train_3D[j, i, 3] = 1
            elif curValue == 1:
                X_train_3D[j, i, 4] = 1
            else:
                print("something went wrong, new class", curValue)
                
    return X_train_3D

In [None]:
training_data = pd.read_csv("data/numerai_datasets_25.04.21/numerai_training_data.csv")
feature_cols = training_data.columns[training_data.columns.str.startswith('feature')]

training_data[feature_cols] = training_data[feature_cols].astype(np.float16)
training_data.target        = training_data.target.astype(np.float16)

In [None]:
validation_data = pd.read_csv("data/numerai_datasets_25.04.21/numerai_validation_data.csv")  

In [None]:
X_train = training_data[feature_cols].to_numpy()

X_train_3D = encodeTrainingData(X_train)

In [None]:
X_val = validation_data[feature_cols].to_numpy()
X_val_3D = encodeTrainingData(X_val)

In [None]:
Y_train = training_data.target

Y_train_3class = Y_train.replace(1, 0)
Y_train_3class = Y_train_3class.replace([0.25, 0.75], 1)
Y_train_3class = Y_train_3class.replace(0.5, 2)

X_train_3class = X_train_3D
Y_train_3class = Y_train_3class.to_numpy()

X_train_3class, Y_train_3class = shuffle(X_train_3class, Y_train_3class)

X_train_3class, X_test_3class, Y_train_3class, Y_test_3class = train_test_split(X_train_3class, Y_train_3class, test_size = 0.3)

In [None]:
Y_val_3class = validation_data.target

Y_val_3class = Y_val_3class.replace(1, 0)
Y_val_3class = Y_val_3class.replace([0.25, 0.75], 1)
Y_val_3class = Y_val_3class.replace(0.5, 2).to_numpy()

In [None]:
def defineNN_3classes_3D(n_inputFeatures, n_classes):
    activation = "relu"
    regularizationConst_l1 = 0.00000#3
    regularizationConst_l2 = 0.00000#3
    #size = 512
    X_input = Input(shape=(n_inputFeatures, n_classes,1, ))
    X = Conv2D(32, (2,2), activation = activation, input_shape = (n_inputFeatures, n_classes, 1))(X_input)
    X = MaxPooling2D((2,2))(X)
    X = Conv2D(64, (2,2))(X)
    X = Flatten()(X)
#    X = Dense(128, activation=activation)(X)
#    X = Dense(64, activation=activation)(X)
    X = Dense(32, activation=activation)(X)
    X = Dense(32, activation=activation)(X)
    X = Dense(32, activation=activation)(X)
    
    X = Dense(3, activation="softmax")(X)
    
    model = Model(inputs = X_input, outputs = X, name='deepNN')

    return model





In [None]:
modelNN_3classes = defineNN_3classes_3D(X_val_3D.shape[1], X_val_3D.shape[2])
optAdam    = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99)

modelNN_3classes.compile(optimizer=optAdam, loss='categorical_crossentropy', metrics='categorical_accuracy')

In [None]:
Y_train_3class_oneHot = oneHotEncodeData3Classes(Y_train_3class)
Y_test_3class_oneHot  = oneHotEncodeData3Classes(Y_test_3class)
Y_val_3class_oneHot   = oneHotEncodeData3Classes(Y_val_3class)

class MyCustomCallback_3class(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        res_eval_1 = self.model.evaluate(X_test_3class, Y_test_3class_oneHot, verbose = 0)
        res_eval_2 = self.model.evaluate(X_val_3D, Y_val_3class_oneHot, verbose = 0)
        print("test ",res_eval_1)
        print("val", res_eval_2)
my_val_callback_3class = MyCustomCallback_3class()

In [None]:
class_weights = class_weight.compute_class_weight('balanced', np.array([0, 1, 2]), Y_train_3class)
class_weights = dict(enumerate(class_weights))

history = modelNN_3classes.fit(X_train_3class, Y_train_3class_oneHot, epochs = 10, class_weight=class_weights, batch_size = 128, callbacks = [my_val_callback_3class])