In [2]:
import pandas as pd 
import numpy as np
from pandas_datareader import data as pdr

import yfinance as yf #yahoo finance stock data
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from math import sqrt

import tensorflow as tf
from tensorflow import keras

from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Activation,Conv2D,MaxPooling2D,Flatten,Dropout,Dense,LSTM,BatchNormalization

stock="GOOG"
period=14

In [3]:
end=datetime.datetime(2019,6,15,0,0,0)

In [4]:
min=yf.download(tickers=stock,period="3d",enddate=end,interval="1m")
thirt_min=yf.download(tickers=stock,period="60d",enddate=end, interval="30m")
hour=yf.download(tickers=stock,period="150d",enddate=end,interval="1h")
daily=yf.download(tickers=stock,period="1000d",enddate=end,interval="1d")
weekly=yf.download(tickers=stock,period="3800d",enddate=end,interval="1wk")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [25]:
def Prepare_dataset(df):
    from sklearn.model_selection import train_test_split

    df.dropna(inplace=True,how="all",axis=0)
    df.fillna(inplace=True,method="ffill")
    df=df.iloc[-755:]
    y=df.shift(-1).iloc[:-1,3].values
    X=df.drop(["Adj Close","Close"],1)
    X=X.iloc[:-1,:]
    return train_test_split(X, y, test_size=0.2, random_state=0,shuffle=False)

def Rmse(y,y_pred):
    return np.sqrt(((y_pred - y) ** 2).mean())

def Arima_build(ts_train,ts_test):
  def arimamodel(timeseriesarray):
    import pmdarima as pmd
    autoarima_model = pmd.auto_arima(timeseriesarray, 
                              start_p=1,
                              max_p=5,
                              start_q=1,
                              max_q=5,
                              test="adf",
                              max_order=5,
                              seasonal_test="OCSB",
                              stepwise=False,
                              n_jobs=-1,
                              trace=True)
    return autoarima_model

  arima_model = arimamodel(ts_train)
  preds=arima_model.predict(len(ts_test))
  return ts_test

def plot_history(history):
    hist=pd.DataFrame(history.history)
    hist["epoch"]=history.epoch

    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Mean Abs Error")
    plt.plot(hist.epoch,hist.mae,label="Train error")
    plt.plot(hist.epoch,hist.val_mae,label="Val error")
    plt.legend()
    plt.ylim([0,hist.mae.max()])

def Build_lstm(X_train,X_test,y_train,scale=True):
    if scale==True:     
        x_scaler = StandardScaler()
        X_train=x_scaler.fit_transform(X_train)
        X_test=x_scaler.transform(X_test)
        
    x_train=np.array(X_train).reshape(X_train.shape[0],X_train.shape[1],1)
    x_test=np.array(X_test).reshape(X_test.shape[0],X_test.shape[1],1)

    model = Sequential()
    model.add(LSTM(50,return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(50,return_sequences=True))
    model.add(Dropout(0.2))
    model.add(Dense(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=25))
    model.add(Dropout(0.2))
    model.add(Dense(units=1,activation="linear"))

    model.compile(loss="mse",optimizer="nadam",metrics=['accuracy'])
    model.fit(x_train, np.array(y_train), epochs=1000,verbose=0)
    preds=model.predict(x_test, batch_size=1)

    return preds



In [6]:
datasets={}
datasets["1m"]=Prepare_dataset(min)
datasets["30m"]=Prepare_dataset(thirt_min)
datasets["1h"]=Prepare_dataset(hour)
datasets["D"]=Prepare_dataset(daily)
datasets["W"]=Prepare_dataset(weekly)

In [26]:
result=pd.DataFrame(columns=["1m","30m","1h","D","W"])

for key in datasets:
    X_train=datasets[key][0]
    X_test=datasets[key][1]
    y_train=datasets[key][2]
    y_test=datasets[key][3]

    elastic=ElasticNet().fit(X_train, y_train)
    elastic_preds=elastic.predict(X_test)
    result.loc["Elastic net",key]=[Rmse(y_test,elastic_preds)]

    rf=RandomForestRegressor(n_estimators=100,max_depth=12,random_state=0).fit(X_train, y_train)
    rf_preds=rf.predict(X_test)
    result.loc["Random forest",key]=[Rmse(y_test,rf_preds)]

    arima_preds=Arima_build(datasets[key][0]["Open"],datasets[key][1]["Open"])
    result.loc["ARIMA",key]=[Rmse(y_test,arima_preds)]

    lstm_preds=Build_lstm(X_train,X_test,y_train,scale=False)
    result.loc["LSTM",key]=[Rmse(y_test,lstm_preds)]

    lstm_preds=Build_lstm(X_train,X_test,y_train)
    
    result.loc["LSTM scaled",key]=[Rmse(y_test,lstm_preds)]

    print(f"Model for {key} dataset is done")
result

Total fit time: 1.427 seconds
Model for 1m dataset is done
Total fit time: 2.182 seconds
Model for 30m dataset is done
Total fit time: 2.046 seconds
Model for 1h dataset is done
Total fit time: 2.176 seconds
Model for D dataset is done
Total fit time: 2.413 seconds
Model for W dataset is done


Unnamed: 0,1m,30m,1h,D,W
Elastic net,[1.1411072490183436],[6.564852060828309],[16.140731757448503],[29.650418901294884],[50.37260552545277]
Random forest,[1.526357582451029],[12.113962351133306],[53.978064649027154],[104.99005197519087],[252.6637814746399]
ARIMA,[1.26714565749628],[7.879019230953363],[18.076279461606294],[36.09259168901256],[60.61618628524018]
LSTM,[19.36004049450776],[209.80724562596626],[205.26026703996428],[363.54813731704957],[264.5207440470159]
LSTM scaled,[4.610502078970468],[20.222476014350352],[102.23507423694375],[143.71911865999573],[239.45296977231777]


In [27]:
result

Unnamed: 0,1m,30m,1h,D,W
Elastic net,[1.1411072490183436],[6.564852060828309],[16.140731757448503],[29.650418901294884],[50.37260552545277]
Random forest,[1.526357582451029],[12.113962351133306],[53.978064649027154],[104.99005197519087],[252.6637814746399]
ARIMA,[1.26714565749628],[7.879019230953363],[18.076279461606294],[36.09259168901256],[60.61618628524018]
LSTM,[19.36004049450776],[209.80724562596626],[205.26026703996428],[363.54813731704957],[264.5207440470159]
LSTM scaled,[4.610502078970468],[20.222476014350352],[102.23507423694375],[143.71911865999573],[239.45296977231777]
