In [None]:
## Installing TA-Lib
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h516909a_0.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.10/dist-packages/ lib/python3.10/site-packages/talib --strip-components=3
import talib

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4472    0  4472    0     0  20016      0 --:--:-- --:--:-- --:--:-- 20053
100  503k  100  503k    0     0   806k      0 --:--:-- --:--:-- --:--:-- 1849k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4504    0  4504    0     0  17798      0 --:--:-- --:--:-- --:--:-- 17873
100  392k  100  392k    0     0   615k      0 --:--:-- --:--:-- --:--:--  119M


#Libraries

In [None]:
!pip install -q keras-tqdm

In [None]:
# import tqdm

# # quietly deep-reload tqdm
# import sys
# from IPython.lib import deepreload

# stdout = sys.stdout
# sys.stdout = open('junk','w')
# deepreload.reload(tqdm)
# sys.stdout = stdout

# tqdm.__version__

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm.keras import TqdmCallback
from keras_tqdm import TQDMNotebookCallback

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error

#functions/utils

##WindoeGenerator

In [None]:
# data_returns.mean()
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df, val_df, test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

  def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
      labels = tf.stack(
          [labels[:, :, self.column_indices[name]] for name in self.label_columns],
          axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

  def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32,)

    ds = ds.map(self.split_window)

    return ds

  @property
  def train(self):
    return self.make_dataset(self.train_df)

  @property
  def val(self):
    return self.make_dataset(self.val_df)

  @property
  def test(self):
    return self.make_dataset(self.test_df)

  @property
  def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
      # No example batch was found, so get one from the `.train` dataset
      result = next(iter(self.train))
      # And cache it for next time
      self._example = result
    return result


# map() applies a provided function to every datapoint in the dataset

##add_rsi_indicator()

In [None]:
def add_rsi_indicator(symbol_df):
    df = symbol_df.copy()
    df['rsi_val'] = talib.RSI(df.close)
    df = df.dropna(axis = 0, how = 'any')
    return df

##get_returns()

In [None]:
def get_returns(df):
  data1 = df['close'][1:].to_numpy()
  data2 = df['close'][:-1].to_numpy()
  df = df.reset_index(drop = True)
  df.loc[1:, 'returns'] = data1/data2
  df = df.dropna(axis = 0, how = 'any').reset_index(drop = True)
  return df

##get_log_returns()

In [None]:
def get_log_returns(df):
  data1 = df['close'][1:].to_numpy()
  data2 = df['close'][:-1].to_numpy()
  df = df.reset_index(drop = True)
  df.loc[1:, 'returns'] = data1/data2
  df['log_returns'] = np.log(df['returns'])
  df = df.dropna(axis = 0, how = 'any').reset_index(drop = True)
  return df

##train_val_test_split()

In [None]:
def create_seqs(df, feat_cols,feat_len = 20, shift = 19, label_len = 1):
  df_np = df.loc[:,feat_cols].to_numpy()
  test_x, test_y = [], []

  total_window = feat_len + shift + label_len
  for step in range(len(df_np)- total_window):

    # test_x.append(df_np[step: step+feat_len])
    test_x.append(np.expand_dims(df_np[step: step+feat_len], axis = 0))
    # test_y.append([df_np[step+total_window,[1]]])
    test_y.append(np.expand_dims(df_np[step+total_window,[1]], axis=1))
  return (test_x, test_y)

def train_val_test_split(symbol_dfs, feat_cols, split_sizes = [0.7, 0.1, 0.2],feat_len = 20, shift = 19, label_len = 1):
  size = symbol_dfs.get(next(iter(symbol_dfs))).get('full').shape[0]
  for symbol in list(symbol_dfs.keys()):
    symbol_df = symbol_dfs.get(symbol).get('full')

    #train data
    train_df = symbol_df.iloc[:int(size*0.7)].reset_index(drop = True)
    train_sqs = create_seqs(train_df, feat_cols,feat_len, shift, label_len)
    train_ds = tf.data.Dataset.from_tensor_slices(train_sqs)
    symbol_dfs[symbol]['train'] = train_ds

    #validation data
    val_df = symbol_df.iloc[int(size*0.7):int(size*0.8)].reset_index(drop = True)
    val_sqs = create_seqs(val_df, feat_cols,feat_len, shift, label_len)
    val_ds = tf.data.Dataset.from_tensor_slices(val_sqs)
    symbol_dfs[symbol]['val'] = val_ds

    #test data
    test_df = symbol_df.iloc[int(size*0.8):].reset_index(drop = True)
    test_sqs = create_seqs(test_df, feat_cols,feat_len, shift, label_len)
    test_ds = tf.data.Dataset.from_tensor_slices(test_sqs)
    symbol_dfs[symbol]['test'] = test_ds

  return symbol_dfs

##model_compile_and_fit()

In [None]:
def create_lstm(model_name):
  tf.keras.backend.clear_session()
  lstm_model = tf.keras.models.Sequential([

    tf.keras.layers.Input(shape=(20, 2), name='Seq_Input'),
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.LSTM(8),

    tf.keras.layers.Dense(units=1)
  ], name = model_name)

  return lstm_model

def model_compile_and_fit(model_name, train_data, val_data, max_epochs = 20, patience=2):

  model = create_lstm(model_name)

  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')

  model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(),
                metrics=[tf.keras.metrics.MeanAbsoluteError()])

  # tqdm_params = {'metrics': [tf.keras.metrics.MeanAbsoluteError()]}

  history = model.fit(train_data, epochs=max_epochs,
                      validation_data=val_data,
                      callbacks=[early_stopping, TqdmCallback(verbose=0)])
  return history, model

##evaluate_symbol()

In [None]:
def tfds_to_numpy_label(ds):
  test_ds_label_list = []
  for X, y in (ds):
    test_ds_label_list.append(y.numpy().flatten()[0])

  ds_true = np.array(test_ds_label_list)
  return ds_true

def evaluate_symbol(symbol_dfs):
  for symbol in list(symbol_dfs.keys()):
    print('Symbol : {}'.format(symbol))
    train_ds = symbol_dfs.get(symbol).get('train')
    val_ds = symbol_dfs.get(symbol).get('val')
    test_ds = symbol_dfs.get(symbol).get('test')

    hist, model = model_compile_and_fit('test_model', train_ds, val_ds, max_epochs = 5, patience=2)

    symbol_dfs[symbol]['model'] = model
    symbol_dfs[symbol]['history'] = hist

    preds = model.predict(test_ds)
    test_true = tfds_to_numpy_label(test_ds)

    mse = mean_squared_error(test_true, preds)
    symbol_dfs[symbol]['evaluation']['mse'] = mse
    rmse = mean_squared_error(test_true, preds, squared = False)
    symbol_dfs[symbol]['evaluation']['rmse'] = rmse

    print('mse : {} | rmse : {}'.format(mse, rmse))
    print('')
  return symbol_dfs


#initialize

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Charles Schwab/Data/processed/close_price_data_50', index_col= False)
data = data.rename({'index':'timestamp'},axis =1 )
data = data.dropna(axis = 1, how = 'any')
data.head(5)

Unnamed: 0,timestamp,TSLA,META,MSFT,AAPL,NVDA,GOOGL,AMZN,AMD,INTC,...,JPM,C,LUV,GM,KDP,NCLH,QCOM,TXN,PLUG,SBUX
0,2014-02-07 00:00:00-05:00,12.435333,64.32,30.785587,16.368795,3.743081,29.465466,18.054001,3.47,18.418928,...,42.844234,39.150589,19.640068,27.489239,6.269822,35.209999,55.553337,32.300919,3.1,30.928185
1,2014-02-10 00:00:00-05:00,13.104,63.549999,30.987686,16.662035,3.754874,29.352604,18.043501,3.63,18.479792,...,42.935032,39.13472,19.264313,26.568119,6.245455,35.619999,55.762371,32.616619,3.55,31.245647
2,2014-02-11 00:00:00-05:00,13.108,64.849998,31.299236,16.881582,3.832708,29.784285,18.0895,3.7,18.616737,...,43.457157,39.404495,19.539259,26.834572,6.271106,36.25,56.456692,33.209515,3.51,31.120329
3,2014-02-12 00:00:00-05:00,13.021333,64.449997,31.551868,16.880314,3.969505,29.696947,17.4625,3.69,18.677601,...,43.525261,39.642551,19.438437,27.070557,6.41089,35.889999,57.053936,33.340393,3.74,30.873867
4,2014-02-13 00:00:00-05:00,13.308667,67.330002,31.669739,17.148363,4.094512,30.027529,17.860001,3.7,18.791718,...,43.911179,39.56321,19.429276,26.796497,6.421149,34.700001,57.068897,33.679199,3.94,31.19969


In [None]:
symbols = data.columns[1:]
symbols

Index(['TSLA', 'META', 'MSFT', 'AAPL', 'NVDA', 'GOOGL', 'AMZN', 'AMD', 'INTC',
       'F', 'XOM', 'MU', 'T', 'DIS', 'BA', 'JNJ', 'VZ', 'ENPH', 'PFE', 'CMCSA',
       'NFLX', 'BAC', 'RTX', 'KO', 'CCL', 'BMY', 'CSCO', 'ON', 'JPM', 'C',
       'LUV', 'GM', 'KDP', 'NCLH', 'QCOM', 'TXN', 'PLUG', 'SBUX'],
      dtype='object')

In [None]:
# this dict stores all the dataframes belonging to every symbol - every symbol is processed seperatly
symbol_dfs = {symbol :
 {
    'full': '',
    'train': '',
    'val':'' ,
    'test' : '',
    'model':'',
    'history':'',
    'evaluation':
   {
       'mse':'',
       'rmse':''
       }
  }
  for symbol in symbols}

#Preprocessing

##only price

In [None]:
symbol_close_dfs = {}
for symbol in data.columns[1:]:
  symbol_df = data[symbol].to_frame().rename({symbol:'close'}, axis = 1)
  # symbol_df = add_rsi_indicator(symbol_df)
  symbol_df = get_returns(symbol_df)

  #store dataframe in dict
  symbol_close_dfs['{}_close'.format(symbol)] = symbol_df


## with_rsi

In [None]:
symbol_close_dfs = {symbol : {} for symbol in symbols}
for symbol in data.columns[1:]:
  symbol_df = data[symbol].to_frame().rename({symbol:'close'}, axis = 1)
  symbol_df = add_rsi_indicator(symbol_df)
  symbol_df = get_returns(symbol_df)

  #store dataframe in dict
  symbol_close_dfs[symbol]['full'] = symbol_df


In [None]:
def preprocess_data(data, symbol_dfs):
  for symbol in symbols:
    symbol_df = data[symbol].to_frame().rename({symbol:'close'}, axis = 1)
    symbol_df = add_rsi_indicator(symbol_df)
    symbol_df = get_log_returns(symbol_df)

    #store dataframe in dict
    symbol_dfs[symbol]['full'] = symbol_df

  return symbol_dfs

In [None]:
symbols_dfs = preprocess_data(data, symbol_dfs)

#train test split

In [None]:
feat_cols = ['rsi_val', 'log_returns']

symbol_dfs = train_val_test_split(symbol_dfs, feat_cols, split_sizes = [0.7, 0.1, 0.2], feat_len = 20, shift = 19, label_len = 1)

#Evaluation

In [None]:
symbol_dfs = evaluate_symbol(symbol_dfs)

Symbol : TSLA


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.0025681932475778877 | rmse : 0.05067734451979393

Symbol : META


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0010558396067834096 | rmse : 0.032493685644805044

Symbol : MSFT


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0003841517831555898 | rmse : 0.019599790385501314

Symbol : AAPL


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0005999844081962652 | rmse : 0.02449457915940311

Symbol : NVDA


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.001175526793967933 | rmse : 0.03428595622070257

Symbol : GOOGL


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.0006326472020775941 | rmse : 0.025152479044372428

Symbol : AMZN


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.0008210790371918283 | rmse : 0.028654476739103582

Symbol : AMD


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.001320690327755726 | rmse : 0.036341303330449305

Symbol : INTC


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0006869850908006414 | rmse : 0.026210400431901863

Symbol : F


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.0022600836952230406 | rmse : 0.04754033755899342

Symbol : XOM


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.00046706938828136173 | rmse : 0.021611788178708437

Symbol : MU


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0006858492541254302 | rmse : 0.026188723797188555

Symbol : T


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0002958945820708428 | rmse : 0.017201586614927204

Symbol : DIS


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.0004950160158724225 | rmse : 0.022248955388341776

Symbol : BA


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.0028345673906990656 | rmse : 0.0532406554307802

Symbol : JNJ


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.00015047082928887448 | rmse : 0.01226665517934186

Symbol : VZ


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0002199340861714252 | rmse : 0.014830174853029386

Symbol : ENPH


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.001987930200139304 | rmse : 0.04458621087443184

Symbol : PFE


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.00023994904940242105 | rmse : 0.015490288874079175

Symbol : CMCSA


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0003341601359276695 | rmse : 0.01828004748154855

Symbol : NFLX


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0012668564084023903 | rmse : 0.03559292638154933

Symbol : BAC


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.00043490166488810184 | rmse : 0.020854296077501677

Symbol : RTX


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.00028846051842052676 | rmse : 0.01698412548294809

Symbol : KO


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.00014030931053857652 | rmse : 0.011845223110544457

Symbol : CCL


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.001736488397653511 | rmse : 0.041671193859229794

Symbol : BMY


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.00019904584822689003 | rmse : 0.014108360933393007

Symbol : CSCO


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0002703800509421569 | rmse : 0.0164432372403416

Symbol : ON


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.0011195310513603142 | rmse : 0.033459394067441126

Symbol : JPM


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.000273111635890974 | rmse : 0.01652608955231013

Symbol : C


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0004750515043766417 | rmse : 0.021795676277111514

Symbol : LUV


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.0006832735599484469 | rmse : 0.02613950190704572

Symbol : GM


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0010180011267517009 | rmse : 0.03190612992438445

Symbol : KDP


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.0003933114947186435 | rmse : 0.01983208246046399

Symbol : NCLH


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.0015961450554967038 | rmse : 0.039951784134087225

Symbol : QCOM


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.0006342661183196612 | rmse : 0.02518464052393167

Symbol : TXN


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
mse : 0.00032494623617564106 | rmse : 0.01802626517544999

Symbol : PLUG


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
mse : 0.005077765259746368 | rmse : 0.0712584399193974

Symbol : SBUX


0epoch [00:00, ?epoch/s]

Epoch 1/5
Epoch 2/5
Epoch 3/5
mse : 0.0018923757217562392 | rmse : 0.04350144505365585



In [None]:
import os

def save_results(symbol_dfs, path):

  if os.path.exists(path):
    res_df = pd.read_excel(path ,index_col=None)
  else:
    res_df = pd.DataFrame()
  for symbol in list(symbol_dfs.keys()):
    res = {}
    res['symbol'] = symbol
    # res['model_history'] = symbol_dfs.get(symbol).get('history')
    res['mse'] = symbol_dfs.get(symbol).get('evaluation').get('mse')
    res['rmse'] = symbol_dfs.get(symbol).get('evaluation').get('rmse')

    res_df = pd.concat([res_df,pd.DataFrame([res])], ignore_index = True)

  res_df.to_csv(path, index = False)
  print('saved results @ {}'.format(path))

In [None]:
import datetime
dtime = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
res_path = '/content/drive/MyDrive/Charles Schwab/Results/{}_NUS_expected_returns_lstm_v1.csv'.format(dtime)

save_results(symbols_dfs,res_path)

saved results @ /content/drive/MyDrive/Charles Schwab/Results/2024-01-29_12-34_NUS_expected_returns_lstm_v1.csv


#WORK HEREEEEEE

In [None]:
import sys
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
list(symbol_dfs.keys())

In [None]:
def evaluate_symbol(symbol_dfs):
  for symbol in list(symbol_dfs.keys()):

    train_ds = symbol_dfs.get(symbol).get('train')
    val_ds = symbol_dfs.get(symbol).get('val')
    test_ds = symbol_dfs.get(symbol).get('test')

    hist, model = model_compile_and_fit('test_model', train_ds, val_ds, max_epochs = 5, patience=2)

    symbol_dfs[symbol]['model'] = model
    symbol_dfs[symbol]['history'] = hist

    preds = model.predict(test_ds)
    test_true = tfds_to_numpy_label(test_ds)

    mse = mean_squared_error(test_true, preds)
    symbol_dfs[symbol]['evaluation']['mse'] = mse
    rmse = mean_squared_error(test_true, preds, squared = False)
    symbol_dfs[symbol]['evaluation']['rmse'] = rmse

  return symbol_dfs


In [None]:
symbol_dfs.get(next(iter(symbol_dfs))).get('full')

In [None]:
train_ds = symbol_dfs.get(next(iter(symbol_dfs))).get('train')
val_ds = symbol_dfs.get(next(iter(symbol_dfs))).get('val')
test_ds = symbol_dfs.get(next(iter(symbol_dfs))).get('test')

In [None]:
train_ds

In [None]:
hist, model = model_compile_and_fit('test_model', train_ds, val_ds, max_epochs = 5, patience=2)

In [None]:
hist_dataframe = pd.DataFrame(data = hist.history,columns = hist.history.keys())
hist_dataframe

In [None]:
import plotly.express as px

fig = px.line(hist_dataframe, y="loss")
fig.show()

In [None]:
train_ds.take(1)

In [None]:
preds = model.predict(test_ds)
# preds

In [None]:
model.summary()

In [None]:
preds.shape

In [None]:
test_ds.take(1)

In [None]:
test_ds_label_list = []
for X, y in (test_ds):
  test_ds_label_list.append(y.numpy().flatten()[0])

In [None]:
def tfds_to_numpy_label(ds):
  test_ds_label_list = []
  for X, y in (ds):
    test_ds_label_list.append(y.numpy().flatten()[0])

  ds_true = np.array(test_ds_label_list)
  return ds_true

In [None]:
test_ds_true = np.array(test_ds_label_list)
test_ds_true[:5]

In [None]:
preds[:5]

In [None]:
from sklearn.metrics import mean_squared_error
mean_squared_error(test_ds_true, preds), mean_squared_error(test_ds_true, preds, squared = False)

#ENDDDD