<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Intro" data-toc-modified-id="Intro-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Intro</a></span><ul class="toc-item"><li><span><a href="#Presets" data-toc-modified-id="Presets-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Presets</a></span></li><li><span><a href="#Style,-imports,-and-data" data-toc-modified-id="Style,-imports,-and-data-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Style, imports, and data</a></span></li><li><span><a href="#GPU" data-toc-modified-id="GPU-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>GPU</a></span></li></ul></li><li><span><a href="#Model" data-toc-modified-id="Model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Model</a></span><ul class="toc-item"><li><span><a href="#Functions" data-toc-modified-id="Functions-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Load-data-(freeze-this-and-read-series-of-pickles)" data-toc-modified-id="Load-data-(freeze-this-and-read-series-of-pickles)-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Load data (freeze this and read series of pickles)</a></span></li><li><span><a href="#Modeling" data-toc-modified-id="Modeling-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Modeling</a></span></li><li><span><a href="#Modeling-with-Time-Series-Generator" data-toc-modified-id="Modeling-with-Time-Series-Generator-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Modeling with Time Series Generator</a></span></li></ul></li></ul></div>

# Intro

## Presets

In [1]:
USE_GPU = True
IGNORE_WARN = True
SEED = 42

## Style, imports, and data

In [2]:
import warnings
import matplotlib as mpl
mpl.rcParams['axes.titlesize']='xx-large'
mpl.rcParams['xtick.labelsize'] = 13
mpl.rcParams['ytick.labelsize'] = 13
mpl.rcParams['axes.labelsize'] = 16
if IGNORE_WARN:
    warnings.filterwarnings('ignore')

In [3]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [4]:
import numpy as np
np.random.seed(SEED)

import tensorflow as tf
tf.random.set_seed(SEED)

In [5]:
import importlib
import sys

In [6]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, LSTM, Embedding
# from tensorflow.keras.regularizers import l2, l1
from tensorflow.keras.preprocessing import sequence

## GPU

In [7]:
if USE_GPU:
    # Enable GPU
    physical_devices = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
    os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

    # Show GPU
    print("Using GPU")
    print(tf.config.list_physical_devices('GPU'))
else:
    print("Using CPU")

Using GPU
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Model

## Modeling with Time Series Generator

In [8]:
def model_network(df, model, X_cols, y_cols, n_input):    
    if isinstance(X_cols, str):
        X_cols = [col for col in df.columns if X_cols in col]
    if isinstance(y_cols, str):
        y_cols = [col for col in df.columns if y_cols in col]

    if X_cols == y_cols:
        print('y is the same as x')
        df = df[X_cols]
    else:
        print('y is different than x')
        select_cols = X_cols + y_cols
        df = df[select_cols]
    column_indices = {name: i for i, name in enumerate(df.columns)}

    # Split dataframes
    df_train = df.iloc[:250].copy()
    df_test = df.iloc[250:-20].copy()
    df_val = df.iloc[-20::].copy()

    # Account for another scaler
    if X_cols != y_cols:
        X_df_train = df_train[X_cols]
        y_df_train = df_train[y_cols]
        X_df_test = df_test[X_cols]
        y_df_test = df_test[y_cols]
        X_df_val = df_val[X_cols]
        y_df_val = df_val[y_cols]

        X_scaler = MinMaxScaler()
        y_scaler = MinMaxScaler()

        X_train = X_scaler.fit_transform(X_df_train)
        y_train = y_scaler.fit_transform(y_df_train)
        X_test = X_scaler.transform(X_df_test)
        y_test = y_scaler.transform(y_df_test)
        X_val = X_scaler.transform(X_df_val)
        y_val = y_scaler.transform(y_df_val)

    # If X and y are the same i.e predicting self with self
    else:
        X_scaler = MinMaxScaler()
        y_scaler = 0
        df_train_scaled = X_scaler.fit_transform(df_train)
        df_test_scaled = X_scaler.transform(df_test)
        df_val_scaled = X_scaler.transform(df_val)
        X_train = df_train_scaled.copy()
        y_train = df_train_scaled.copy()
        X_test = df_test_scaled.copy()
        y_test = df_test_scaled.copy()
        X_val = df_val_scaled.copy()
        y_val = df_val_scaled.copy()

    # Get n_features
    X_n_features = X_train.shape[1]
    y_n_features = y_train.shape[1]

    # Reshape data
    X_train_reshaped = X_train.reshape((len(X_train), X_n_features))
    y_train_reshaped = y_train.reshape((len(y_train), y_n_features))
    X_test_reshaped = X_test.reshape((len(X_test), X_n_features))
    y_test_reshaped = y_test.reshape((len(y_test), y_n_features))
    X_val_reshaped = X_val.reshape((len(X_val), X_n_features))
    y_val_reshaped = y_val.reshape((len(y_val), y_n_features))

    # Get data generators
    train_data_gen = sequence.TimeseriesGenerator( \
                         X_train_reshaped,
                         y_train_reshaped,#[:,column_indices[y_cols]],
                         length=n_input)
    test_data_gen = sequence.TimeseriesGenerator( \
                        X_test_reshaped,
                        y_test_reshaped,#[:,column_indices[y_cols]],
                        length=n_input)
    val_data_gen = sequence.TimeseriesGenerator( \
                        X_val_reshaped,
                        y_val_reshaped,#[:,column_indices[y_cols]],
                        length=n_input)

    input_shape = (X_train_reshaped.shape[0], X_train_reshaped.shape[1])

    print(y_n_features)
    # Model
    model.compile(optimizer='adam', loss='mse')

    earlystopping = EarlyStopping(monitor='val_loss', patience=25)
    history = model.fit(train_data_gen,
                        epochs=2000, batch_size=64,
                        validation_data=(test_data_gen),
                        verbose=2, shuffle=False,
                        callbacks=[earlystopping])

    # my_loss= history.history['loss']
    # plt.plot(range(len(my_loss)),my_loss)


    # Predictions (r2_score)

    # If predicting one column for y
    if y_scaler and (len(y_cols) == 1):
        df_train['predicted'] = pd.DataFrame(y_scaler.inverse_transform(model.predict(train_data_gen)), columns=y_cols,
                                            index=df_train[n_input:][y_cols].index)
        df_test['predicted'] = pd.DataFrame(y_scaler.inverse_transform(model.predict(test_data_gen)), columns=y_cols,
                                            index=df_test[n_input:][y_cols].index)
        df_val['predicted'] = pd.DataFrame(y_scaler.inverse_transform(model.predict(val_data_gen)), columns=y_cols,
                                            index=df_val[n_input:][y_cols].index)
        cols = y_cols + ['predicted']
        df_predict_train = df_train[cols].dropna()
        df_predict_test = df_test[cols].dropna()
        df_predict_val = df_val[cols].dropna()

        train_r2 = r2_score(df_predict_train[y_cols], df_predict_train[['predicted']])
        test_r2 = r2_score(df_predict_test[y_cols], df_predict_test[['predicted']])
        val_r2 = r2_score(df_predict_val[y_cols], df_predict_val[['predicted']])

        print("Train r2 =", train_r2)
        print("Test r2 =", test_r2)
        print("Val r2 =", val_r2)


        fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(15, 6))
        df_predict_train[cols].plot(ax=ax1)
        df_predict_test[cols].plot(ax=ax2)
        df_predict_val[cols].plot(ax=ax3)

    # If predicting multiple columns for y
    elif y_scaler:
        df_predict_train = pd.DataFrame(y_scaler.inverse_transform(model.predict(train_data_gen)), columns=y_cols,
                                            index=y_df_train[n_input:].index)
        df_predict_test = pd.DataFrame(y_scaler.inverse_transform(model.predict(test_data_gen)), columns=y_cols,
                                            index=y_df_test[n_input:].index)
        df_predict_val = pd.DataFrame(y_scaler.inverse_transform(model.predict(val_data_gen)), columns=y_cols,
                                            index=y_df_val[n_input:].index)

        y_true_train = y_df_train.loc[df_predict_train.index]
        y_true_test = y_df_test.loc[df_predict_test.index]
        y_true_val = y_df_val.loc[df_predict_val.index]

        train_r2 = r2_score(y_true_train, df_predict_train)
        test_r2 = r2_score(y_true_test, df_predict_test)
        val_r2 = r2_score(y_true_val, df_predict_val)
        print("Train r2 =", train_r2)
        print("Test r2 =", test_r2)
        print("Val r2 =", val_r2)

    # if predicting self with self
    else:
        df_predict_train = pd.DataFrame(X_scaler.inverse_transform(model.predict(train_data_gen)), columns=y_cols,
                                         index=df_train[n_input:].index)
        df_predict_test = pd.DataFrame(X_scaler.inverse_transform(model.predict(test_data_gen)), columns=y_cols,
                                         index=df_test[n_input:].index)
        df_predict_val = pd.DataFrame(X_scaler.inverse_transform(model.predict(val_data_gen)), columns=y_cols,
                                         index=df_val[n_input:].index)

        y_true_train = df_train.loc[df_predict_train.index]
        y_true_test = df_test.loc[df_predict_test.index]
        y_true_val = df_val.loc[df_predict_val.index]

        train_r2 = r2_score(y_true_train, df_predict_train)
        test_r2 = r2_score(y_true_test, df_predict_test)
        val_r2 = r2_score(y_true_val, df_predict_val)
        print("Train r2 =", train_r2)
        print("Test r2 =", test_r2)
        print("Val r2 =", val_r2)

In [9]:
# df = df[[col for col in df.columns if 'VLO' in col or 'TSLA' in col]]
# X_cols = [col for col in df.columns if 'price' not in col]
# y_cols = 'price'
#X_cols = [col for col in df.columns if 'TSLA' in col]
#y_cols = X_cols
#X_cols = [col for col in df.columns if 'TSLA' in col and 'price' not in col]

df = pd.read_pickle('./data/modeling/model_df.pkl')
# X_cols = [col for col in df.columns if 'TSLA_price' not in col]
# y_cols = X_cols
X_cols = [col for col in df.columns if 'TSLA' in col]
y_cols = X_cols
n_features = len(y_cols)
n_days = 1
df.head(1)

split_point = 250
input_shape = (split_point, len(X_cols))

try:
    del model
except NameError:
    pass

model = Sequential()
model.add(LSTM(64, input_shape=input_shape))
model.add(Dropout(.3))
for i in range(1):
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(.1))
model.add(Dense(len(y_cols)))


model_network(df, model, X_cols, y_cols, n_days)

y is the same as x
125
Epoch 1/2000
2/2 - 0s - loss: 0.0646 - val_loss: 0.3849
Epoch 2/2000
2/2 - 0s - loss: 0.0624 - val_loss: 0.3774
Epoch 3/2000
2/2 - 0s - loss: 0.0603 - val_loss: 0.3700
Epoch 4/2000
2/2 - 0s - loss: 0.0582 - val_loss: 0.3624
Epoch 5/2000
2/2 - 0s - loss: 0.0563 - val_loss: 0.3544
Epoch 6/2000
2/2 - 0s - loss: 0.0542 - val_loss: 0.3458
Epoch 7/2000
2/2 - 0s - loss: 0.0514 - val_loss: 0.3365
Epoch 8/2000
2/2 - 0s - loss: 0.0491 - val_loss: 0.3260
Epoch 9/2000
2/2 - 0s - loss: 0.0460 - val_loss: 0.3142
Epoch 10/2000
2/2 - 0s - loss: 0.0432 - val_loss: 0.3012
Epoch 11/2000
2/2 - 0s - loss: 0.0403 - val_loss: 0.2876
Epoch 12/2000
2/2 - 0s - loss: 0.0372 - val_loss: 0.2742
Epoch 13/2000
2/2 - 0s - loss: 0.0341 - val_loss: 0.2614
Epoch 14/2000
2/2 - 0s - loss: 0.0317 - val_loss: 0.2502
Epoch 15/2000
2/2 - 0s - loss: 0.0297 - val_loss: 0.2412
Epoch 16/2000
2/2 - 0s - loss: 0.0287 - val_loss: 0.2347
Epoch 17/2000
2/2 - 0s - loss: 0.0273 - val_loss: 0.2299
Epoch 18/2000
2/2

Epoch 144/2000
2/2 - 0s - loss: 0.0048 - val_loss: 0.1565
Epoch 145/2000
2/2 - 0s - loss: 0.0047 - val_loss: 0.1555
Epoch 146/2000
2/2 - 0s - loss: 0.0049 - val_loss: 0.1553
Epoch 147/2000
2/2 - 0s - loss: 0.0046 - val_loss: 0.1558
Epoch 148/2000
2/2 - 0s - loss: 0.0046 - val_loss: 0.1570
Epoch 149/2000
2/2 - 0s - loss: 0.0046 - val_loss: 0.1585
Epoch 150/2000
2/2 - 0s - loss: 0.0046 - val_loss: 0.1597
Epoch 151/2000
2/2 - 0s - loss: 0.0047 - val_loss: 0.1593
Epoch 152/2000
2/2 - 0s - loss: 0.0046 - val_loss: 0.1578
Epoch 153/2000
2/2 - 0s - loss: 0.0046 - val_loss: 0.1568
Epoch 154/2000
2/2 - 0s - loss: 0.0045 - val_loss: 0.1569
Epoch 155/2000
2/2 - 0s - loss: 0.0043 - val_loss: 0.1584
Epoch 156/2000
2/2 - 0s - loss: 0.0043 - val_loss: 0.1603
Epoch 157/2000
2/2 - 0s - loss: 0.0043 - val_loss: 0.1613
Epoch 158/2000
2/2 - 0s - loss: 0.0043 - val_loss: 0.1610
Epoch 159/2000
2/2 - 0s - loss: 0.0042 - val_loss: 0.1598
Epoch 160/2000
2/2 - 0s - loss: 0.0044 - val_loss: 0.1584
Epoch 161/2000