In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv1D, Flatten, LeakyReLU, Dropout, Input, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, explained_variance_score
from keras_adabound import AdaBound

In [2]:
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU is available


In [3]:
DF = pd.read_csv("final_data.csv", index_col = ["ticker", "date"])

In [4]:
def get_last(data, target):
    last = {}
    tickers = set(data.index.get_level_values(0))
    for tic in sorted(tickers):
        l = (data.loc[tic][-1:].drop(target, axis = 1)).to_dict(orient = "list")
        last[tic] = l
    last = pd.DataFrame(last).transpose()
    for col in last.columns:
        last[col] = last[col].str[0]
    return last

In [5]:
last = get_last(DF, ["next"])

In [6]:
DF = DF.replace([np.inf, -np.inf], np.nan)

In [7]:
DF = DF.dropna()

In [8]:
DF.shape

(5808022, 85)

In [9]:
X = DF.drop(["next"], axis = 1)
y = DF["next"]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [12]:
scaler = MinMaxScaler()

In [13]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

In [15]:
kernel_size = 2
dropout = 0.2

In [23]:
model = Sequential()
model.add(Conv1D(32, kernel_size, padding = "causal", input_shape = X_train.shape[1:]))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv1D(64, kernel_size, padding = "causal",  dilation_rate = 2))
model.add(LeakyReLU(alpha = 0.01))
model.add(BatchNormalization())

model.add(Conv1D(128, kernel_size, padding = "causal", activation = "relu", dilation_rate = 3))
model.add(BatchNormalization())

model.add(Flatten())

model.add(Dense(128, activation = "relu"))
model.add(Dropout(dropout))
model.add(Dense(1, activation = "relu"))

In [24]:
model.compile(loss= "mean_squared_error", optimizer= "adam")

In [25]:
model.fit(x=X_train, y=y_train, batch_size = 512, epochs=2, validation_data=(X_test, y_test), verbose=1)

Train on 4646417 samples, validate on 1161605 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x19a833ef9c8>

In [26]:
pred = model.predict(X_test)

In [27]:
print(mean_squared_error(y_test, pred))

40.50795281719495


In [28]:
print(explained_variance_score(y_test, pred))

0.9989668862814863


In [29]:
def mean_absolute_percentage_error(y_true, y_pred):  #define the mean percentage error
    #y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean((np.abs(y_true - y_pred)) / y_true) * 100

In [30]:
y_test = np.reshape(y_test, y_test.shape[0])

In [31]:
pred = pred.reshape(pred.shape[0])

In [32]:
print(mean_absolute_percentage_error(y_test, pred))

64.6855464454525


In [33]:
in_pred = model.predict(X_train)

In [34]:
print(explained_variance_score(y_train, in_pred))

0.9989050199205307
