<a href="https://colab.research.google.com/github/lunarforest0318/Stock-Prediction/blob/master/Stock_RNN_v3_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**0. Colab Set-up**

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [4]:
import os
os.chdir('/content/gdrive/My Drive/Deep Learning/RNN Stock')
!ls

 models   ngrok-stable-linux-amd64.zip	'Stock RNN-v2 multiclass.ipynb'
 ngrok	  stock_data			'Stock RNN-v3 .ipynb'


In [0]:
import pandas as pd
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import pandas_datareader as pdr
from datetime import datetime,date
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint
import time

## 1. Set up Parameters

In [0]:
stocks=['TQQQ','^IXIC','UCO','GLD','^TNX','^VIX']
SEQ_LEN=40 #Last X days price to predict
FUTURE_PERIOD_PREDICT=5 #Next X days price
STOCK_TO_PREDICT="TQQQ"
SPLIT_RATIO=0.1
BIG_BULL = 0.1
SML_BULL = 0.05
BIG_BEAR = -0.1
SML_BEAR = -0.05
EPOCHS=100
BATCH_SIZE=64
NAME=f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"


## 2. Preprocess Data

In [0]:
def prepare_data(stocks):
    main_df=pd.DataFrame()
    for stock in stocks:
        df=pdr.DataReader(stock,'yahoo',datetime(2010,1,1), date.today())
        df.rename(columns={"Close":f"{stock}_close","Volume":f"{stock}_volume"},inplace=True)
        df=df[[f"{stock}_close",f"{stock}_volume"]]
        if len(main_df)==0:
            main_df=df
        else:
            main_df=main_df.join(df)
    return main_df

# #Join other macro data
# unrate=pd.read_csv("stock_data/UNRATE.csv")
# unrate.set_index("Date",inplace=True)
# main_df=main_df.join(unrate)
# main_df=main_df.fillna(method='ffill')
# main_df=main_df.fillna(method='bfill')        
        
def classify(current,future):
    pct_diff = (float(future)-float(current))/float(current)
    if pct_diff >= BIG_BULL:   
        return 4
    elif pct_diff >= SML_BULL and pct_diff < BIG_BULL:
        return 3
    elif pct_diff >= BIG_BEAR and pct_diff < SML_BEAR:
        return 1
    elif pct_diff <= BIG_BEAR:
        return 0
    else:
        return 2

def preprocess_df(df):
    df = df.drop('future',1) # we only need target column
    df = df.drop('^TNX_volume',1) #10 yr bond - no volume
    df = df.drop('^VIX_volume',1) #Volatility Index - no volume
    for col in df.columns:
        if col != "target":
            df[col]=df[col].pct_change() #Percentage change between the current and a prior element.
            df.dropna(inplace=True)
            df[col]=preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    sequential_data = []
    prev_days=deque(maxlen=SEQ_LEN)
     
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
 
    random.shuffle(sequential_data)#for good measure
 
    #Balance data
    class_bi=[]#big increase
    class_si=[]#small increase
    class_sc=[]#stay consistent
    class_sd=[]#small decrease
    class_bd=[]#big decrease
    
    for seq,target in sequential_data:
        if target==4:
            class_bi.append([seq,target])
        elif target==3:
            class_si.append([seq,target])
        elif target==2:
            class_sc.append([seq,target])
        elif target==1:
            class_sd.append([seq,target]) 
        elif target==0:
            class_bd.append([seq,target])
        
    random.shuffle(class_bi)
    random.shuffle(class_si)
    random.shuffle(class_sc)
    random.shuffle(class_sd)
    random.shuffle(class_bd)
            
    lower=min(len(class_bi),len(class_si),len(class_sc),len(class_sd),len(class_bd))

    class_bi=class_bi[:lower]
    class_si=class_si[:lower]
    class_sc=class_sc[:lower]
    class_sd=class_sd[:lower]
    class_bd=class_bd[:lower]

    sequential_data=class_bi+class_si+class_sc+class_sd+class_bd    
    random.shuffle(sequential_data)
    
    X,y=[],[]
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y

def train_valid_data(main_df):
    main_df['future']=main_df[f"{STOCK_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
    main_df['target']=list(map(classify,main_df[f"{STOCK_TO_PREDICT}_close"], main_df["future"]))
    times = sorted(main_df.index.values) #Sort the index array
    last_pct=times[-int(SPLIT_RATIO*len(main_df))]
    validation_main_df=main_df[(main_df.index>=last_pct)]
    train_main_df=main_df[(main_df.index<last_pct)]
    train_x,train_y=preprocess_df(train_main_df)
    validation_x,validation_y = preprocess_df(validation_main_df)
    return train_x,train_y,validation_x,validation_y

def data_prep_summary():
    print(f"train data: {len(train_x)} validation: {len(validation_x)}")
    print(f"Big Bear: {train_y.count(0)} Small Bear:{train_y.count(1)} Consistent: {train_y.count(2)} Small Bull: {train_y.count(3)} Big Bull: {train_y.count(4)}")
  

## 3. Build the RNN+LSTM Model

In [0]:
def create_model(train_x):
    model=Sequential()
    model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(CuDNNLSTM(128, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(CuDNNLSTM(128))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(32, activation="relu"))
    model.add(Dropout(0.2))

    model.add(Dense(5, activation="softmax"))
    return model

## 4. Train Model

In [0]:
def train_main(stocks,SEQ_LEN,FUTURE_PERIOD_PREDICT,STOCK_TO_PREDICT,SPLIT_RATIO,BIG_BULL,SML_BULL,BIG_BEAR,SML_BEAR,EPOCHS,BATCH_SIZE,NAME):

    main_df=prepare_data(stocks)
    train_x,train_y,validation_x,validation_y=train_valid_data(main_df)
    model=create_model(train_x)
    model.compile(loss='sparse_categorical_crossentropy',
             optimizer=keras.optimizers.Adam(lr=0.001, decay=1e-6),
             metrics=['accuracy'])
    tensorboard=TensorBoard(log_dir=f'logs/{NAME}')
    filepath="RNNLSTM_Final_Best"
    checkpoint=ModelCheckpoint("models/{}.hdf5".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))
    history=model.fit(train_x,train_y,batch_size=BATCH_SIZE,epochs=EPOCHS,validation_data=(validation_x,validation_y),callbacks=[tensorboard,checkpoint])

#train_main(stocks,SEQ_LEN,FUTURE_PERIOD_PREDICT,STOCK_TO_PREDICT,SPLIT_RATIO,BIG_BULL,SML_BULL,BIG_BEAR,SML_BEAR,EPOCHS,BATCH_SIZE,NAME)

Set up Tensorboard on Google Colab

In [0]:
# !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
# !unzip ngrok-stable-linux-amd64.zip
tensorboard=TensorBoard(log_dir='./log', histogram_freq=1,
                         write_graph=True,
                         write_grads=True,
                         batch_size=BATCH_SIZE,
                         write_images=True) #tensorboard on google colab
LOG_DIR = './log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

get_ipython().system_raw('./ngrok http 6006 &')

! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

## 5.Load Model and Make prediction

In [0]:
def process_input(df,train_x): 
    df = df.drop('^TNX_volume',1) #10 yr bond - no volume
    df = df.drop('^VIX_volume',1) #Volatility Index - no volume
    df = df.drop('future',1) 
    df = df.drop('target',1)  
    for col in df.columns:
        if col != "target":
            df[col]=df[col].pct_change() #Percentage change between the current and a prior element.
            df[col]=preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    x,y = train_x.shape[1],train_x.shape[2]
    df = np.array(df)
    df = df[-x:,:].reshape(1,x,y)
    return df

In [0]:
def make_prediction(stocks):
    main_df=prepare_data(stocks)
    train_x,train_y,validation_x,validation_y=train_valid_data(main_df)
    model=create_model(train_x)
    model.load_weights("models/RNNLSTM_Final_Best.hdf5")
    
    input=main_df.iloc[-int(1.5*SEQ_LEN):,:]
    input=process_input(input,train_x)
    output = model.predict_classes(input)
    output_prob = model.predict_proba(input)
    
    if output==4:
        pred="significantly increase"
    elif output==3:
        pred="increase"
    elif output==1:
        pred="decrease"
    elif output==0:
        pred="significantly decrease"
    else:
        pred="stay consistent"
    print(f"The stock price for {STOCK_TO_PREDICT} in the next {FUTURE_PERIOD_PREDICT} days will {pred}")
    print("Details:")
    print("Big Bull:", f"{output_prob[0][4]:.02%}")
    print("Small Bull:", f"{output_prob[0][3]:.02%}")
    print("Consistent:", f"{output_prob[0][2]:.02%}")
    print("Small Bear:", f"{output_prob[0][1]:.02%}")
    print("Big Bear:", f"{output_prob[0][0]:.02%}")
 

In [11]:
make_prediction(stocks)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
The stock price for TQQQ in the next 5 days will increase
Details:
Big Bull: 11.10%
Small Bull: 87.74%
Consistent: 1.10%
Small Bear: 0.07%
Big Bear: 0.01%
