In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import talib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

from tensorflow import keras
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Activation,Dropout,Dense,LSTM,BatchNormalization
from sklearn.preprocessing import MinMaxScaler

RANDOM_SEED=42
np.random.seed(42)

In [2]:
min_df=pd.read_csv(f"../Data/Prices/AAPL_min.csv",index_col=0)
thirt_min=pd.read_csv(f"../Data/Prices/AAPL_thirt_min.csv",index_col=0)
hour=pd.read_csv(f"../Data/Prices/AAPL_hour.csv",index_col=0)
daily=pd.read_csv(f"../Data/Prices/AAPL_daily.csv",index_col=0)
weekly=pd.read_csv(f"../Data/Prices/AAPL_weekly.csv",index_col=0)

In [3]:
def Sliding_windows(data,seq_length=5):
    xs=[]
    
    for i in range(len(data)-seq_length-1):
        x=data[i:(i+seq_length)]
        xs.append(x)
    return np.array(xs)

def Data_preparation(df,data_size=500,scale=True):
    data=df.dropna(how="all",axis=0)
    data=data.iloc[-data_size:]
    data["Target"]=data["Close"].shift(-1)
    data.drop(["Dividends","Stock Splits"],axis=1,inplace=True)
    
    sma15=df["Close"].transform(lambda x: x.rolling(window=15).mean())
    sma5=df["Close"].transform(lambda x: x.rolling(window=5).mean())
    rsi=talib.RSI(df["Close"])
    mfi=talib.MFI(df["High"],df["Low"],df["Close"],df["Volume"])
    obv=talib.OBV(df["Close"],df["Volume"])
    atr=talib.ATR(df["High"],df["Low"],df["Close"],14)
    logret=np.log1p(df.Close.pct_change())
       
    n=len(data)
    s1=int(n*0.8)
    train=data.iloc[:s1,:]
    test=data.iloc[s1:,:]
        
    if scale==True:
        scaler=MinMaxScaler()
        scaler=scaler.fit(train)
        
        train=scaler.transform(train)
        test=scaler.transform(test)
    
    #train[["SMA15","SMA5","RSI","MFI","OBV","ATR","Log return"]]=[sma15[:s1],sma5[:s1],rsi[:s1],mfi[:s1],obv[:s1],atr[:s1],logret[:s1]]
    #test[["SMA15","SMA5","RSI","MFI","OBV","ATR","Log return"]]=[sma15[s1:],sma5[s1:],rsi[s1:],mfi[s1:],obv[s1:],atr[s1:],logret[s1:]]
    
    return train.drop("Target",axis=1),test.drop("Target",axis=1),train.iloc[:,-1],test.iloc[:,-1]

In [4]:
X_train,X_test,y_train,y_test=Data_preparation(daily,scale=False)

In [6]:
def define_model(len_ts, hidden_neurons = 10, nfeature=1, batch_size=None,stateful=False):
    in_out_neurons = 1
    
    inp = layers.Input(batch_shape= (batch_size, len_ts, nfeature),
                       name="input")  

    rnn = layers.LSTM(hidden_neurons, 
                    return_sequences=True,
                    stateful=stateful,
                    name="RNN")(inp)

    dens = layers.TimeDistributed(layers.Dense(in_out_neurons,name="dense"))(rnn)
    model = models.Model(inputs=[inp],outputs=[dens])
    
    model.compile(loss="mean_squared_error",
                  sample_weight_mode="temporal",
                  optimizer="rmsprop")
    return(model,(inp,rnn,dens))

In [None]:
#https://towardsdatascience.com/predicting-stock-prices-using-a-keras-lstm-model-4225457f0233

In [5]:
X_train_slided=torch.from_numpy(Sliding_windows(X_train)).float()
X_test_slided=torch.from_numpy(Sliding_windows(X_train)).float()
y_train_slided=torch.from_numpy(np.asarray(y_train)).float()
y_test_slided=torch.from_numpy(np.asarray(y_test)).float()

NameError: name 'torch' is not defined

In [41]:
class LSTM_model(nn.Module):
    def __init__(self,input_dim,hidden_dim,seq_length,num_layers=2):
        super(LSTM_model,self).__init__()
        
        self.input_dim=input_dim
        self.hidden_dim=hidden_dim
        self.seq_length=seq_length
        self.num_layers=num_layers
        
        self.lstm=nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=0.5
        )
        
        self.linear=nn.Linear(in_features=hidden_dim,out_features=1)
        
    def reset_hidden_state(self):
        self.hidden=(
            torch.zeros(self.num_layers,self.seq_length,self.hidden_dim),
            torch.zeros(self.num_layers,self.seq_length,self.hidden_dim)
        )
    def forward(self,input):
        lstm_out, _ =self.lstm(
            input.view(len(input),self.seq_length,-1),
            self.hidden
        )
        
        y_pred=self.linear(
            lstm_out.view(self.seq_length,len(input),self.hidden_dim)[-1]    
        )
        return y_pred
            
        

In [42]:
def train_model(model, train_data,train_labels,test_data=None,test_labels=None):
    loss_fn=nn.MSELoss(reduction="sum")
    
    optimiser=optim.Adam(model.parameters(), lr=1e-3)
    
    num_epochs=60
    
    train_hist=np.zeros(num_epochs)
    test_hist=np.zeros(num_epochs)
    
    for t in range(num_epochs):
        model.reset_hidden_state()
        y_pred=model(X_train_slided)
        loss=loss_fn(y_pred.float(),y_train_slided)
        
        if test_data is not None:
            with torch.no_grad():
                y_test_pred=model(X_test_slided)
                test_loss=loss_fn(y_test_pred.float(),y_test_slided)
            test_hist[t]=test_loss.item()
            
            if t % 10 ==0:
                print(f"Epoch {t} train loss: {loss.item()} test loss: {test_loss.item()}")
        elif t%10==0:
            print(f"Epoch {t} train loss: {loss.item()}")
        
        train_hist[t]=loss.item()
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
    return model.eval(), train_hist, test_hist
        
    
    

In [43]:
model=LSTM_model(5,200,5)

In [44]:
model, train_hist,test_hist=train_model(model,X_train_slided,y_train_slided,X_test_slided,y_test_slided)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0 train loss: 842494848.0 test loss: nan
Epoch 10 train loss: 740598464.0 test loss: nan
Epoch 20 train loss: 663491456.0 test loss: nan
Epoch 30 train loss: 612794624.0 test loss: nan
Epoch 40 train loss: 570038016.0 test loss: nan
Epoch 50 train loss: 531231744.0 test loss: nan


In [35]:
y_train_slided.shape,X_test_slided.shape

(torch.Size([394, 5]), torch.Size([394, 5, 5]))

In [None]:
#https://www.youtube.com/watch?v=8A6TEjG2DNw