In [1]:
%matplotlib inline

In [2]:
import os
import time
import datetime

import IPython
import IPython.display

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.plotting import autocorrelation_plot
# import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

from tensorflow.keras.layers import Input, Flatten, Dense, Dropout,LSTM, SimpleRNN, GRU, Bidirectional
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.keras.layers.experimental.preprocessing import Normalization

# tf.keras.layers.experimental.preprocessing.Normalization
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

from sklearn.model_selection import cross_validate, train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler,MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
 

In [3]:
START_DATE = "2013-02-14"
END_DATE = "2021-12-31"
SPLIT_FACTOR = 0.85
SPLIT_VAL_FACTOR = 0.25

In [4]:
def parser(x):
    return datetime.datetime.strptime(x,'%Y-%m-%d')

In [5]:
raw_data = pd.read_csv(r'data\data_for_time_series\dataset_full_ts.csv', header=0, sep = ";", parse_dates=[0], date_parser=parser)
mask = (raw_data['Date'] >= START_DATE) & (raw_data['Date'] <= END_DATE)
stock_df = raw_data.loc[mask]
stock_df.reset_index(inplace = True, drop = True)
stock_df

Unnamed: 0,Date,Adj Close,Lag1,Lag2,Lag3,Lag4,Lag5,Lag6,Lag7,Lag8,...,MCD,T,NKE,DHR,LOW,LIN,TXN,NEE,T10YIE,T5YIE
0,2013-02-14,24.459604,22.333021,22.153856,22.761450,21.818892,22.075962,22.185017,22.075962,22.075962,...,72.100479,20.558605,23.729067,30.191380,33.949581,92.296761,26.960524,13.315960,2.56,2.22
1,2013-02-15,25.525446,22.153856,22.761450,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,...,72.362450,20.599384,23.872437,30.448080,33.580658,91.417023,26.800943,13.371286,2.54,2.19
2,2013-02-19,25.925140,22.761450,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,...,72.323959,20.779978,23.663897,30.507313,33.812313,91.906708,27.080208,13.445062,2.55,2.20
3,2013-02-20,25.470594,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,22.114906,...,72.370193,20.663462,23.468407,30.097588,33.151680,91.607933,26.378075,13.452440,2.54,2.19
4,2013-02-21,25.164948,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,22.114906,22.418707,...,72.547455,20.640163,23.694309,30.107458,32.319466,91.259361,25.915297,13.376823,2.53,2.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,2021-12-27,66.070000,65.774475,65.565636,66.579994,66.400002,65.660004,66.589996,65.339996,66.419998,...,268.239990,24.286488,167.580002,327.197418,252.866135,343.420013,191.839996,91.320000,2.50,2.75
2233,2021-12-28,66.930000,65.565636,66.579994,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,...,267.880005,24.325691,166.419998,323.230011,253.185089,343.790008,191.100006,91.879997,2.50,2.78
2234,2021-12-29,67.010002,66.579994,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,66.320000,...,268.489990,24.149275,168.779999,326.579987,256.723419,345.399994,190.809998,91.989998,2.53,2.83
2235,2021-12-30,66.860001,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,66.320000,64.400002,...,267.209992,24.286488,167.490005,328.470001,255.547317,344.329987,189.410004,92.769997,2.58,2.86


In [6]:
adj_close_target_org = np.array(stock_df["Adj Close"])
dataset_for_tf =  stock_df.iloc[:, 1:] 

In [7]:
adj_close_target_org.shape, dataset_for_tf.shape

((2237,), (2237, 129))

In [8]:
adj_close_target_org

array([24.45960426, 25.52544594, 25.92514038, ..., 67.01000214,
       66.86000061, 67.58999634])

In [9]:
dataset_for_tf

Unnamed: 0,Adj Close,Lag1,Lag2,Lag3,Lag4,Lag5,Lag6,Lag7,Lag8,Lag9,...,MCD,T,NKE,DHR,LOW,LIN,TXN,NEE,T10YIE,T5YIE
0,24.459604,22.333021,22.153856,22.761450,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,...,72.100479,20.558605,23.729067,30.191380,33.949581,92.296761,26.960524,13.315960,2.56,2.22
1,25.525446,22.153856,22.761450,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,...,72.362450,20.599384,23.872437,30.448080,33.580658,91.417023,26.800943,13.371286,2.54,2.19
2,25.925140,22.761450,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,22.114906,...,72.323959,20.779978,23.663897,30.507313,33.812313,91.906708,27.080208,13.445062,2.55,2.20
3,25.470594,21.818892,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,22.114906,22.418707,...,72.370193,20.663462,23.468407,30.097588,33.151680,91.607933,26.378075,13.452440,2.54,2.19
4,25.164948,22.075962,22.185017,22.075962,22.075962,22.247332,22.138277,22.114906,22.418707,22.309649,...,72.547455,20.640163,23.694309,30.107458,32.319466,91.259361,25.915297,13.376823,2.53,2.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,66.070000,65.774475,65.565636,66.579994,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,...,268.239990,24.286488,167.580002,327.197418,252.866135,343.420013,191.839996,91.320000,2.50,2.75
2233,66.930000,65.565636,66.579994,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,66.320000,...,267.880005,24.325691,166.419998,323.230011,253.185089,343.790008,191.100006,91.879997,2.50,2.78
2234,67.010002,66.579994,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,66.320000,64.400002,...,268.489990,24.149275,168.779999,326.579987,256.723419,345.399994,190.809998,91.989998,2.53,2.83
2235,66.860001,66.400002,65.660004,66.589996,65.339996,66.419998,66.750000,66.320000,64.400002,64.809998,...,267.209992,24.286488,167.490005,328.470001,255.547317,344.329987,189.410004,92.769997,2.58,2.86


In [10]:
dataset_for_tf = dataset_for_tf.sample(frac=1, random_state=42).reset_index(drop=True)
dataset_for_tf

Unnamed: 0,Adj Close,Lag1,Lag2,Lag3,Lag4,Lag5,Lag6,Lag7,Lag8,Lag9,...,MCD,T,NKE,DHR,LOW,LIN,TXN,NEE,T10YIE,T5YIE
0,42.271461,43.640846,42.790886,43.102543,43.244198,43.461411,43.518078,43.631409,43.725842,43.555855,...,204.088791,31.922260,94.679001,159.712128,112.740295,196.849945,116.045158,64.427292,1.65,1.60
1,37.349365,35.763638,36.319023,36.117798,36.206345,35.642887,35.328949,35.361137,35.272606,35.506039,...,76.506180,22.393293,34.869671,36.620605,42.071049,109.618629,38.491997,18.189798,2.29,1.97
2,38.191078,35.480236,35.055000,35.932041,35.630829,35.551102,35.320766,35.515667,35.604252,35.666271,...,161.475342,28.080114,65.011711,101.801254,99.696274,151.636948,101.992561,35.551010,2.09,1.92
3,35.228096,31.762888,31.779840,32.508465,33.745430,34.262238,35.626297,35.990608,36.490479,36.626034,...,104.184685,29.439695,51.861244,50.436386,70.217972,99.401657,53.408726,27.113407,1.40,1.36
4,41.837044,41.553722,41.770935,42.413128,42.687000,43.423637,43.640846,42.790886,43.102543,43.244198,...,201.484848,32.668983,100.312546,161.082184,116.610169,203.497879,124.638100,63.351101,1.68,1.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,35.291176,38.826790,38.761833,38.789669,38.724709,38.901024,38.733986,37.880245,37.406971,37.267780,...,206.257217,29.221880,79.351761,140.368088,91.323021,181.634872,118.094643,52.026993,1.56,1.36
2233,35.972904,36.660381,36.599804,36.374783,36.314205,36.469978,36.539753,36.208370,36.025227,36.190926,...,137.992706,27.662897,50.251244,84.498795,72.784210,122.354103,71.239769,31.801281,1.66,1.54
2234,36.731602,35.876980,35.432224,36.086277,36.156033,36.452545,35.842087,35.964180,35.589195,35.135712,...,140.586823,28.052492,56.443066,77.601883,71.220734,117.528450,72.294762,33.302753,1.80,1.68
2235,39.578804,37.284241,38.007423,37.650303,37.177113,37.069973,36.828907,37.677086,38.141354,38.498474,...,147.396240,26.851704,65.179420,95.055702,82.339920,134.061813,90.548790,37.431396,2.07,2.01


In [79]:
target =  dataset_for_tf["Adj Close"]
features = dataset_for_tf.iloc[:, 1:] 

In [80]:
features

Unnamed: 0,Lag1,Lag2,Lag3,Lag4,Lag5,Lag6,Lag7,Lag8,Lag9,Lag10,...,MCD,T,NKE,DHR,LOW,LIN,TXN,NEE,T10YIE,T5YIE
0,43.640846,42.790886,43.102543,43.244198,43.461411,43.518078,43.631409,43.725842,43.555855,43.773060,...,204.088791,31.922260,94.679001,159.712128,112.740295,196.849945,116.045158,64.427292,1.65,1.60
1,35.763638,36.319023,36.117798,36.206345,35.642887,35.328949,35.361137,35.272606,35.506039,36.101700,...,76.506180,22.393293,34.869671,36.620605,42.071049,109.618629,38.491997,18.189798,2.29,1.97
2,35.480236,35.055000,35.932041,35.630829,35.551102,35.320766,35.515667,35.604252,35.666271,35.648563,...,161.475342,28.080114,65.011711,101.801254,99.696274,151.636948,101.992561,35.551010,2.09,1.92
3,31.762888,31.779840,32.508465,33.745430,34.262238,35.626297,35.990608,36.490479,36.626034,36.236309,...,104.184685,29.439695,51.861244,50.436386,70.217972,99.401657,53.408726,27.113407,1.40,1.36
4,41.553722,41.770935,42.413128,42.687000,43.423637,43.640846,42.790886,43.102543,43.244198,43.461411,...,201.484848,32.668983,100.312546,161.082184,116.610169,203.497879,124.638100,63.351101,1.68,1.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,38.826790,38.761833,38.789669,38.724709,38.901024,38.733986,37.880245,37.406971,37.267780,37.351299,...,206.257217,29.221880,79.351761,140.368088,91.323021,181.634872,118.094643,52.026993,1.56,1.36
2233,36.660381,36.599804,36.374783,36.314205,36.469978,36.539753,36.208370,36.025227,36.190926,36.190926,...,137.992706,27.662897,50.251244,84.498795,72.784210,122.354103,71.239769,31.801281,1.66,1.54
2234,35.876980,35.432224,36.086277,36.156033,36.452545,35.842087,35.964180,35.589195,35.135712,35.667686,...,140.586823,28.052492,56.443066,77.601883,71.220734,117.528450,72.294762,33.302753,1.80,1.68
2235,37.284241,38.007423,37.650303,37.177113,37.069973,36.828907,37.677086,38.141354,38.498474,38.418125,...,147.396240,26.851704,65.179420,95.055702,82.339920,134.061813,90.548790,37.431396,2.07,2.01


In [65]:
num_train_samples = int((SPLIT_FACTOR-SPLIT_VAL_FACTOR) * len(features))
num_val_samples = int(SPLIT_VAL_FACTOR * len(features))
num_test_samples = len(features) - num_train_samples - num_val_samples
print("num_train_samples:", num_train_samples)
print("num_val_samples:", num_val_samples)
print("num_test_samples:", num_test_samples)

num_train_samples: 1342
num_val_samples: 559
num_test_samples: 336


In [13]:
def min_max_normalize(features, target):
    features_n = (features - target.min ()) / (target.max () - target.min ())
    return features_n

In [14]:
def min_max_normalize_inverse_transform(features_n, target):
    features_i = features_n * (target.max () - target.min ())+ target.min ()
    return features_i

In [15]:
def standart_scaler(aray):
    mean = aray.mean()
    std = aray.std()
    return (aray - mean) / std

In [13]:
# features = min_max_normalize(features, target)

In [63]:
# features.min(), features.max()

In [15]:
# features = features.sample(frac=1, random_state=42).reset_index(drop=True)

In [64]:
# features

In [None]:
# https://www.tensorflow.org/tutorials/load_data/pandas_dataframe

In [66]:
# train_data = features.iloc[0:num_train_samples,:] 
# # val_data = features.iloc[num_train_samples:num_val_samples,:]
# val_data = features.iloc[num_train_samples:num_train_samples+num_val_samples,:]
# test_data = features.iloc[num_train_samples:num_train_samples+num_val_samples+num_val_samples:,:]

In [67]:
train_data.shape, val_data.shape, test_data.shape

((1342, 128), (559, 128), (895, 128))

In [68]:
SHUFFLE_BUFFER = 500
BATCH_SIZE = 2


In [69]:
train_data_tf = tf.convert_to_tensor(train_data)
val_data_tf = tf.convert_to_tensor(val_data)
test_data_tf = tf.convert_to_tensor(test_data)
target = tf.convert_to_tensor(target)


In [17]:
normalizer = Normalization(axis=-1)
normalizer.adapt(train_data_tf)
normalizer.adapt(val_data_tf)
normalizer.adapt(test_data_tf)


In [96]:
tf.keras.backend.clear_session()

In [98]:
# train_data_tf.shape

In [88]:
sequence_length = train_data_tf.shape[0]
sequence_length

1342

In [89]:
shape=(sequence_length, features.shape[-1])
shape

(1342, 128)

In [128]:
inputs = Input(shape=(1342, 128))
x = normalizer(inputs)
x = Flatten()(x)

# x = Dense(16, activation="relu")(x)
outputs = Dense(1)(x)
model = Model(inputs, outputs)
model.summary()

Model: "functional_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 1342, 128)]       0         
_________________________________________________________________
normalization_1 (Normalizati multiple                  257       
_________________________________________________________________
flatten_6 (Flatten)          (None, 171776)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 171777    
Total params: 172,034
Trainable params: 171,777
Non-trainable params: 257
_________________________________________________________________


In [95]:
# inputs = Input(shape=(1342, 127)) #shape=(sequence_length, features.shape[-1])
# x = normalizer(inputs)
# x = Flatten()(x)
# # x = Dense(16, activation="relu")(x)
# outputs = Dense(1)(x)
# model = Model(inputs, outputs)
  
# callbacks = [
#     ModelCheckpoint("stock_predictor_linear_regression.keras",  
#                                     save_best_only=True)
# ] 
# model.compile(optimizer="Adam", loss="mse", metrics=["mae"])
# history = model.fit(train_data_tf,
#                     epochs=10,
#                     validation_data=val_data_tf,
#                     callbacks=callbacks)
  
# model = load_model("stock_predictor_linear_regression.keras")      
# print(f"Test MAE: {model.evaluate(test_dataset)[1]:.2f}")

In [100]:
target

0       42.271461
1       37.349365
2       38.191078
3       35.228096
4       41.837044
          ...    
2232    35.291176
2233    35.972904
2234    36.731602
2235    39.578804
2236    37.236046
Name: Adj Close, Length: 2237, dtype: float64

In [83]:
# normalizer = Normalization(axis=-1)

In [126]:
features_tf = min_max_normalize(features_tf, target)

In [128]:
features_tf = tf.convert_to_tensor(features)

In [129]:
features_tf.shape

TensorShape([2237, 128])

In [130]:
# features_tf=features_tf.reshape(-1,128,1)
# features_tf = tf.reshape(features_tf, [128,1])
tf.keras.backend.clear_session()

In [88]:
len(features_tf[0:]), len(features_tf[1])

(2237, 128)

In [132]:
features_tf[1]

<tf.Tensor: shape=(128,), dtype=float64, numpy=
array([3.57636375e+01, 3.63190231e+01, 3.61177979e+01, 3.62063446e+01,
       3.56428871e+01, 3.53289490e+01, 3.53611374e+01, 3.52726059e+01,
       3.55060387e+01, 3.61016998e+01, 3.64800301e+01, 3.68422470e+01,
       3.74298592e+01, 3.69790840e+01, 3.71481247e+01, 3.81140671e+01,
       3.85165405e+01, 3.86936188e+01, 3.82670021e+01, 3.84440842e+01,
       3.80899124e+01, 3.87177696e+01, 3.87016716e+01, 3.87982597e+01,
       3.88707047e+01, 3.89834022e+01, 3.87338600e+01, 3.86936188e+01,
       3.82911453e+01, 3.76230469e+01, 4.68600006e+01, 4.61899986e+01,
       4.64799995e+01, 4.64000015e+01, 3.67740000e+06, 3.78847368e+01,
       3.83635919e+01, 3.80841626e+01, 4.58726650e+01, 4.34668849e-01,
       4.90934683e+01, 4.57441987e+01, 1.93067004e+03, 1.65633008e+04,
       4.36977002e+03, 1.07264297e+04, 1.69500008e+01, 2.20156201e+03,
       1.33899283e+00, 1.02773003e+02, 1.68916058e+00, 1.79999990e-02,
       1.76300001e+00, 2.5559

In [89]:
normalizer.adapt(features_tf)

In [119]:
input_shape = (128, 128)

In [127]:
inputs = Input(shape=(128,)) #shape=(sequence_length, features.shape[-1])
# x = normalizer(inputs)
x = Flatten()(x)
x = Dense(16, activation="relu")(x)
outputs = Dense(1)(x)
model = Model(inputs, outputs)
  
callbacks = [
    ModelCheckpoint("stock_predictor_linear_regression.keras",  
                                    save_best_only=True)
] 
model.compile(optimizer="Adam", loss="mse", metrics=["mae"])
history = model.fit(features_tf,
                    epochs=10,
#                     validation_data=val_data_tf,
                    callbacks=callbacks)
  
model = load_model("stock_predictor_linear_regression.keras")      
print(f"Test MAE: {model.evaluate(test_dataset)[1]:.2f}")

ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_2:0", shape=(None, 1, 128), dtype=float32) at layer "normalization". The following previous layers were accessed without issue: []

In [133]:
def get_basic_model():
    model = tf.keras.Sequential([
    normalizer,
    Dense(10, activation='relu'),
    Dense(10, activation='relu'),
    Dense(1)
    ])

    model.compile(optimizer="Adam", loss="mse", metrics=["mae"])
    return model

In [134]:
model = get_basic_model()
model.fit(features_tf, target, epochs=15, batch_size=1)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x272079f32e0>