In [None]:
import pandas as pd 
import numpy as np
from pandas_datareader import data as pdr

import yfinance as yf #yahoo finance stock data
import datetime
import matplotlib
import matplotlib.pyplot as plt
from math import sqrt

import tensorflow as tf
from tensorflow import keras

from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.tree import export_graphviz
from sklearn import tree

from keras.models import Sequential
from keras.layers import Activation,Dropout,Dense,LSTM,BatchNormalization

stock="^GSPC"

In [None]:
min_=yf.download(tickers=stock,period="3d",interval="1m")
thirt_min=yf.download(tickers=stock,period="60d", interval="30m")
hour=yf.download(tickers=stock,period="150d",interval="1h")
daily=yf.download(tickers=stock,period="1000d",interval="1d")
weekly=yf.download(tickers=stock,period="3800d",interval="1wk")

In [None]:
def Prepare_dataset(df,data_size=255,forecast_window=1):
    from sklearn.model_selection import train_test_split

    df.dropna(inplace=True,how="all",axis=0)
    df.fillna(inplace=True,method="ffill")
    df=df.iloc[-data_size:]
    y=df.shift(-forecast_window).iloc[:-forecast_window,3].values
    X=df.drop("Adj Close",1)
    X=X.iloc[:-forecast_window,:]
    return train_test_split(X, y, test_size=0.2, random_state=0,shuffle=False)

def Mape(y,y_pred):
    mape=np.mean(np.abs((y-y_pred)/y))
    return "{:.2%}".format(mape)

def Rmse(y,y_pred):
    MSE = np.square(np.subtract(y,y_pred)).mean() 
    return sqrt(MSE)

def normalize(data):
    data_mean = data.mean(axis=0)
    data_std = data.std(axis=0)
    return (data - data_mean) / data_std


def Arima_build(ts_train,ts_test):
  global arima_model
  def arimamodel(timeseriesarray):
    import pmdarima as pmd
    autoarima_model = pmd.auto_arima(timeseriesarray, 
                              start_p=1,d=1,start_q=0, start_P=0,D=1, start_Q=0,
                              max_p=5,max_d=5,max_q=5, max_P=5,max_D=5,max_Q=5, 
                              m=12, seasonal=True,error_action='warn',trace=True,
                              supress_warnings=True,stepwise=True,
                              random_state=123,n_fits=10)
    return autoarima_model

  arima_model = arimamodel(ts_train)
  preds=arima_model.predict(len(ts_test))
  return preds

def plot_history(history):
    hist=pd.DataFrame(history.history)
    hist["epoch"]=history.epoch

    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Mean Abs Error")
    plt.plot(hist.epoch,hist.mae,label="Train error")
    plt.plot(hist.epoch,hist.val_mae,label="Val error")
    plt.legend()
    plt.ylim([0,hist.mae.max()])

def Build_lstm(X_train,X_test,y_train,scale=True):
    keras.backend.clear_session()
    tf.compat.v1.reset_default_graph() 

    global lstm_model

    if scale==True:     
        x_scaler = StandardScaler()
        X_train=x_scaler.fit_transform(X_train)
        X_test=x_scaler.transform(X_test)
        
    x_train=np.array(X_train).reshape(X_train.shape[0],X_train.shape[1],1)
    x_test=np.array(X_test).reshape(X_test.shape[0],X_test.shape[1],1)

    lstm_model = Sequential()
    lstm_model.add(LSTM(64,return_sequences=True))
    lstm_model.add(LSTM(128,return_sequences=False))
    lstm_model.add(Dense(units=32))
    lstm_model.add(Dense(units=1))


    lstm_model.compile(loss="mse",optimizer="nadam",metrics=['accuracy'])
    lstm_model.fit(x_train, np.array(y_train), epochs=1000,verbose=0,batch_size=32)
    preds=lstm_model.predict(x_test, batch_size=1)

    return preds



In [None]:
datasets={}
datasets["1m"]=Prepare_dataset(min_,forecast_window=1)
datasets["30m"]=Prepare_dataset(thirt_min,forecast_window=1)
datasets["1h"]=Prepare_dataset(hour,forecast_window=1)
datasets["D"]=Prepare_dataset(daily,forecast_window=1)
datasets["W"]=Prepare_dataset(weekly,forecast_window=1)

In [None]:
mape=pd.DataFrame(columns=["1m","30m","1h","D","W"])
rmse=pd.DataFrame(columns=["1m","30m","1h","D","W"])

arima_models={}
elastic_nets={}
rf_models={}
ltsm_models={}

preds={}

for key in datasets:
    X_train=datasets[key][0]
    X_test=datasets[key][1]
    y_train=datasets[key][2]
    y_test=datasets[key][3]

    elastic=ElasticNet().fit(X_train, y_train)
    elastic_preds=elastic.predict(X_test)
    mape.loc["Elastic net",key]=[Mape(y_test,elastic_preds)]
    rmse.loc["Elastic net",key]=[Rmse(y_test,elastic_preds)]
    elastic_nets[key]=elastic

    rf=RandomForestRegressor(n_estimators=100,max_depth=4,random_state=0).fit(X_train, y_train)
    rf_preds=rf.predict(X_test)
    mape.loc["Random forest",key]=[Mape(y_test,rf_preds)]
    rmse.loc["Random forest",key]=[Rmse(y_test,rf_preds)]
    rf_models[key]=rf

    arima_preds=Arima_build(datasets[key][0]["Close"],datasets[key][1]["Close"])
    mape.loc["ARIMA",key]=[Mape(y_test,arima_preds)]
    rmse.loc["ARIMA",key]=[Rmse(y_test,arima_preds)]
    arima_models[key]=arima_model

    lstm_preds=Build_lstm(X_train,X_test,y_train) 
    mape.loc["LSTM scaled",key]=[Mape(y_test,lstm_preds)]
    rmse.loc["LSTM scaled",key]=[Rmse(y_test,lstm_preds)]
    ltsm_models[key]=lstm_model

    preds[key]=[arima_preds,elastic_preds,rf_preds,lstm_preds]

    print(f"Model for {key} dataset is done")

In [None]:
rmse

In [None]:
mape

In [None]:
m=1

matplotlib.rcParams.update(matplotlib.rcParamsDefault)

fig, axs = plt.subplots(3, 2,figsize=(10,10))
fig.suptitle('Random forest model results')

axs[0, 0].set_title('1 minute prediction')
axs[0,0].plot(preds['1m'][m], linestyle='dashed',label="Pred")
axs[0,0].plot(datasets['1m'][3],label="True")
axs[0,0].legend(loc="lower right")

axs[0, 1].set_title('30 minute prediction')
axs[0,1].plot(preds['30m'][m], linestyle='dashed',label="Pred")
axs[0,1].plot(datasets['30m'][3],label="True")
axs[0,1].legend(loc="lower right")

axs[1, 0].set_title('1 hour prediction')
axs[1,0].plot(preds['1h'][m], linestyle='dashed',label="Pred")
axs[1,0].plot(datasets['1h'][3],label="True")
axs[1,0].legend(loc="lower right")

axs[1, 1].set_title('Daily prediction')
axs[1,1].plot(preds['D'][m], linestyle='dashed',label="Pred")
axs[1,1].plot(datasets['D'][3],label="True")
axs[1,1].legend(loc="lower right")

axs[2, 0].set_title('Weekly prediction')
axs[2,0].plot(preds['W'][m], linestyle='dashed',label="Pred")
axs[2,0].plot(datasets['W'][3],label="True")
axs[2,0].legend(loc="lower right")
plt.show()

In [None]:
arima_models

In [None]:
plot_preds(key):
    preds[key][0]

In [None]:
arima_models["1m"],arima_models["30m"],arima_models["1h"],arima_models["D"],arima_models["W"]

In [None]:
#train = data[:training_data_len]
#valid = data[training_data_len:]
#valid['Predictions'] = predictions

plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price' ,fontsize=18)
plt.plot(daily['Close'],linewidth=3.5)
#plt.plot(datasets["W"][1].index,preds["W"][0],linewidth=3.5)
#plt.plot(datasets["W"][1].index,preds["W"][1],linewidth=3.5)
#plt.plot(datasets["W"][1].index,preds["W"][2],linewidth=3.5)
plt.plot(datasets["D"][1].index,preds["D"][3],linewidth=3.5)
plt.legend(['Train','Predictions'], loc='upper_center')