# Libraries

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import pandas_datareader.data as web
import tensorflow as tf 
from tensorflow import keras 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Dropout, BatchNormalization, Concatenate
from keras import optimizers
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


# Model with All Drivers 

In [4]:
def data(ticker,start,end):
    df = web.DataReader(ticker.upper(),'yahoo',start,end)[['Adj Close','High','Low']]
    df['sm3'] = df['Adj Close'].rolling(window=3).mean() #3 day moving average
    df['sm9'] = df['Adj Close'].rolling(window=9).mean() #9 day moving average
    drivers = ['MSFT','AAPL','GOOG','GOOGL','AMZN','FB','^VIX'] #make a large portion of the etf's holdings
    portfolio = pd.DataFrame() #create an emy dataframe to place data 
    failed = [] #empty list for failed queries
    for stock in drivers: #for loop for all the features I want to try
        try: #try clause just in case it failed
            portfolio[stock] = web.DataReader(stock,'yahoo',start,end)['Adj Close'] #adj close of each feature
            #moving averages to normalize the features
            portfolio[f'{stock} sma3'] = portfolio[stock].rolling(window=3).mean() #moving averages as features
            portfolio[f'{stock} sma9'] = portfolio[stock].rolling(window=9).mean()
            portfolio = portfolio.drop([stock],axis=1) #drop the adj close price
        except:
            failed.append(stock) #add failed query ticker to list
            print(f'{failed} was not properly calculated for. Are you sure this ticker is on an exchange?')
    data = pd.concat([df,portfolio],axis = 1).dropna() #combine both dataframes
    data['Target'] = data['Adj Close'].shift(-1) #shift adj close back 1: we are forecasting one day into the future
    data = data.drop(['Adj Close','High','Low'],axis =1) #drop these bc nn works better 
    return data.dropna() #no null values



def predict(df,pred_df):
    X = df[df.columns] #feature data
    del X['Target'] #don't want target variable in training data
    Y = df[['Target']] #label data
    x_train,x_test,y_train,y_test = train_test_split(X,Y,random_state=50,test_size=0.2) #training and testing
    x_val,x_test,y_val,y_test = train_test_split(x_test,y_test,random_state=50,test_size=0.5) #test and validation
    model = Sequential() #basic sequential model 
    model.add(Dense(100, input_dim=x_train.shape[1], #need the input shape of the data in tensorflow 2x
                        activation=tf.nn.leaky_relu, # was better than relu
                        kernel_initializer='he_normal'))
    model.add(Dense(75, input_dim=100, #100 'neurons' in the input layer
                        activation=tf.nn.leaky_relu,
                        kernel_initializer='he_normal'))
    model.add(Dense(50, input_dim=75, #75 'neurons' in the first hidden layer
                    activation=tf.nn.leaky_relu,
                    kernel_initializer='he_normal'))
    model.add(Dense(25, input_dim=50, #50 'neurons' in the second hidden layer
                    activation=tf.nn.leaky_relu,
                    kernel_initializer='he_normal'))
    model.add(Dense(1, activation=tf.nn.leaky_relu, #only one answer so you need one 'neuron'
                        kernel_initializer='he_normal'))
    model.compile(loss='mean_squared_error', #mse loss function
                      optimizer='adam', #adam optimizer
                      metrics=['mape']) #mean absolute percentage error metric to determine the performance of the model
    scaler = MinMaxScaler() #normalize the data since it is pretty different in terms of share price
    x_train_scaled = scaler.fit_transform(x_train) #apply the normalizer to the training features
    history = model.fit(x_train, y_train,  #fit the training data to the model
                        validation_data=(x_val, y_val), #validation data to better see how the model is doing
                        batch_size=32,
                        epochs=10,
                        verbose=0)
    #pred_df = pred[features.columns]
    pred_features = pred_df.iloc[-1] #these will be in the pred_data function 
    prediction = model.predict(np.array([pred_features])) #need it in numpy array 
    print(f'The predicted stock price for {ticker.upper()} tomorrow is ${float(prediction[0])}.')
    if float(prediction[0])>float(df['Target'].iloc[-1:].values):
        print('Buy: ', float(prediction[0]), '>',float(df['Target'].iloc[-1:].values),'\nPCT DIFF: ',(float(prediction[0]) - float(df['Target'].iloc[-1:].values))/float(df['Target'].iloc[-1:].values)*100,'%',)
    else:
        print('Sell: ', float(prediction[0]), '<',float(df['Target'].iloc[-1:].values),'\nPCT DIFF: ',(float(prediction[0]) - float(df['Target'].iloc[-1:].values))/float(df['Target'].iloc[-1:].values)*100,'%')



def pred_data(ticker,start,end):
    df = web.DataReader(ticker.upper(),'yahoo',start,end)[['Adj Close']] #query data of target security
    df['sm3'] = df['Adj Close'].rolling(window=3).mean() #moving averages
    df['sm9'] = df['Adj Close'].rolling(window=9).mean()
    drivers = ['MSFT','AAPL','GOOG','GOOGL','AMZN','FB','^VIX'] #drivers 
    failed = []
    portfolio = pd.DataFrame() #dataframe to add data to during the for loop
    for stock in drivers: #for loop
        try: #try clause
            portfolio[stock] = web.DataReader(stock,'yahoo',start,end)['Adj Close']
            portfolio[f'{stock} sma3'] = portfolio[stock].rolling(window=3).mean()
            portfolio[f'{stock} sma9'] = portfolio[stock].rolling(window=9).mean()
            portfolio = portfolio.drop([stock],axis=1) #drop adj close
        except:
            failed.append(stock) #add to failed list to see which ticker failed
            print(f'{failed} was not properly calculated for. Are you sure this ticker is on an exchange?')
    data = pd.concat([df,portfolio],axis = 1).dropna() #combine data lists
    data = data.drop(['Adj Close'],axis = 1)
    return data.dropna()

        
        
if __name__ == '__main__':
    ticker = 'spy' #target security
    start = dt.datetime.now() - dt.timedelta(days=365*5) #5 year time frame
    end = dt.datetime.now() #today
    predict(data(ticker,start,end),pred_data(ticker,start,end)) #calling the function we want to predict the target security

The predicted stock price for SPY tomorrow is $372.0769348144531.
Buy:  372.0769348144531 > 357.4599914550781 
PCT DIFF:  4.0891131060220784 %


# Model with Just Moving Averages

In [6]:
def data(ticker,start,end):
    df = web.DataReader(ticker.upper(),'yahoo',start,end)[['Adj Close','High','Low']]
    df['sm3'] = df['Adj Close'].rolling(window=3).mean() #3 day moving average
    df['sm9'] = df['Adj Close'].rolling(window=9).mean() #9 day moving average
    df['Target'] = df['Adj Close'].shift(-1) #shift adj close back 1: we are forecasting one day into the future
    df = df.drop(['Adj Close','High','Low'],axis =1) #drop these bc nn works better 
    return df.dropna() #no null values



def predict(df,pred_df):
    X = df[df.columns] #feature data
    del X['Target'] #don't want target variable in training data
    Y = df[['Target']] #label data
    x_train,x_test,y_train,y_test = train_test_split(X,Y,random_state=50,test_size=0.2) #training and testing
    x_val,x_test,y_val,y_test = train_test_split(x_test,y_test,random_state=50,test_size=0.5) #test and validation
    model = Sequential() #basic sequential model 
    model.add(Dense(100, input_dim=x_train.shape[1], #need the input shape of the data in tensorflow 2x
                        activation=tf.nn.leaky_relu, # was better than relu
                        kernel_initializer='he_normal'))
    model.add(Dense(75, input_dim=100, #100 'neurons' in the input layer
                        activation=tf.nn.leaky_relu,
                        kernel_initializer='he_normal'))
    model.add(Dense(50, input_dim=75, #75 'neurons' in the first hidden layer
                    activation=tf.nn.leaky_relu,
                    kernel_initializer='he_normal'))
    model.add(Dense(25, input_dim=50, #50 'neurons' in the second hidden layer
                    activation=tf.nn.leaky_relu,
                    kernel_initializer='he_normal'))
    model.add(Dense(1, activation=tf.nn.leaky_relu, #only one answer so you need one 'neuron'
                        kernel_initializer='he_normal'))
    model.compile(loss='mean_squared_error', #mse loss function
                      optimizer='adam', #adam optimizer
                      metrics=['mape']) #mean absolute percentage error metric to determine the performance of the model
    scaler = MinMaxScaler() #normalize the data since it is pretty different in terms of share price
    x_train_scaled = scaler.fit_transform(x_train) #apply the normalizer to the training features
    history = model.fit(x_train, y_train,  #fit the training data to the model
                        validation_data=(x_val, y_val), #validation data to better see how the model is doing
                        batch_size=32,
                        epochs=20,
                        verbose=1)
    #pred_df = pred[features.columns]
    pred_features = pred_df.iloc[-1] #these will be in the pred_data function 
    prediction = model.predict(np.array([pred_features])) #need it in numpy array 
    print(f'The predicted stock price for {ticker.upper()} tomorrow is ${float(prediction[0])}.')
    if float(prediction[0])>float(df['Target'].iloc[-1:].values):
        print('Buy: ', float(prediction[0]), '>',float(df['Target'].iloc[-1:].values),'\nPCT DIFF: ',(float(prediction[0]) - float(df['Target'].iloc[-1:].values))/float(df['Target'].iloc[-1:].values)*100,'%',)
    else:
        print('Sell: ', float(prediction[0]), '<',float(df['Target'].iloc[-1:].values),'\nPCT DIFF: ',(float(prediction[0]) - float(df['Target'].iloc[-1:].values))/float(df['Target'].iloc[-1:].values)*100,'%')



def pred_data(ticker,start,end):
    df = web.DataReader(ticker.upper(),'yahoo',start,end)[['Adj Close']] #query data of target security
    df['sm3'] = df['Adj Close'].rolling(window=3).mean() #moving averages
    df['sm9'] = df['Adj Close'].rolling(window=9).mean()
    df = df.drop(['Adj Close'],axis = 1)
    return df.dropna()

        
        
if __name__ == '__main__':
    ticker = 'spy' #target security
    start = dt.datetime.now() - dt.timedelta(days=365*5) #5 year time frame
    end = dt.datetime.now() #today
    predict(data(ticker,start,end),pred_data(ticker,start,end)) #calling the function we want to predict the target security

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The predicted stock price for SPY tomorrow is $359.0047302246094.
Buy:  359.0047302246094 > 357.4599914550781 
PCT DIFF:  0.43214312271514077 %


# Model with Moving Averages and High / Low MA strategy

In [12]:
def data(ticker,start,end):
    df = web.DataReader(ticker.upper(),'yahoo',start,end)[['Adj Close','High','Low']]
    df['sm3'] = df['Adj Close'].rolling(window=3).mean() #3 day moving average
    df['sm9'] = df['Adj Close'].rolling(window=9).mean() #9 day moving average
    df['HighSM'] = df['High'].rolling(window=5).mean() #high moving average
    df['LowSM'] = df['Low'].rolling(window=5).mean() #low moving average
    df['Target'] = df['Adj Close'].shift(-1) #shift adj close back 1: we are forecasting one day into the future
    df = df.drop(['Adj Close','High','Low'],axis =1) #drop these bc nn works better 
    return df.dropna() #no null values



def predict(df,pred_df):
    X = df[df.columns] #feature data
    del X['Target'] #don't want target variable in training data
    Y = df[['Target']] #label data
    x_train,x_test,y_train,y_test = train_test_split(X,Y,random_state=50,test_size=0.2) #training and testing
    x_val,x_test,y_val,y_test = train_test_split(x_test,y_test,random_state=50,test_size=0.5) #test and validation
    model = Sequential() #basic sequential model 
    model.add(Dense(100, input_dim=x_train.shape[1], #need the input shape of the data in tensorflow 2x
                        activation=tf.nn.leaky_relu, # was better than relu
                        kernel_initializer='he_normal'))
    model.add(Dense(75, input_dim=100, #100 'neurons' in the input layer
                        activation=tf.nn.leaky_relu,
                        kernel_initializer='he_normal'))
    model.add(Dense(50, input_dim=75, #75 'neurons' in the first hidden layer
                    activation=tf.nn.leaky_relu,
                    kernel_initializer='he_normal'))
    model.add(Dense(25, input_dim=50, #50 'neurons' in the second hidden layer
                    activation=tf.nn.leaky_relu,
                    kernel_initializer='he_normal'))
    model.add(Dense(1, activation=tf.nn.leaky_relu, #only one answer so you need one 'neuron'
                        kernel_initializer='he_normal'))
    model.compile(loss='mean_squared_error', #mse loss function
                      optimizer='adam', #adam optimizer
                      metrics=['mape']) #mean absolute percentage error metric to determine the performance of the model
    scaler = MinMaxScaler() #normalize the data since it is pretty different in terms of share price
    x_train_scaled = scaler.fit_transform(x_train) #apply the normalizer to the training features
    history = model.fit(x_train, y_train,  #fit the training data to the model
                        validation_data=(x_val, y_val), #validation data to better see how the model is doing
                        batch_size=32,
                        epochs=20,
                        verbose=1)
    #pred_df = pred[features.columns]
    pred_features = pred_df.iloc[-1] #these will be in the pred_data function 
    prediction = model.predict(np.array([pred_features])) #need it in numpy array 
    print(f'The predicted stock price for {ticker.upper()} tomorrow is ${float(prediction[0])}.')
    if float(prediction[0])>float(df['Target'].iloc[-1:].values):
        print('Buy: ', float(prediction[0]), '>',float(df['Target'].iloc[-1:].values),'\nPCT DIFF: ',(float(prediction[0]) - float(df['Target'].iloc[-1:].values))/float(df['Target'].iloc[-1:].values)*100,'%',)
    else:
        print('Sell: ', float(prediction[0]), '<',float(df['Target'].iloc[-1:].values),'\nPCT DIFF: ',(float(prediction[0]) - float(df['Target'].iloc[-1:].values))/float(df['Target'].iloc[-1:].values)*100,'%')



def pred_data(ticker,start,end):
    df = web.DataReader(ticker.upper(),'yahoo',start,end)[['Adj Close','High','Low']] #query data of target security
    df['sm3'] = df['Adj Close'].rolling(window=3).mean() #moving averages (3 day)
    df['sm9'] = df['Adj Close'].rolling(window=9).mean() #moving averages (9 day)
    df['HighSM'] = df['High'].rolling(window=5).mean() #high moving average
    df['LowSM'] = df['Low'].rolling(window=5).mean() #low moving average
    df = df.drop(['Adj Close','High','Low'],axis = 1)
    return df.dropna()

        
        
if __name__ == '__main__':
    ticker = 'spy' #target security
    start = dt.datetime.now() - dt.timedelta(days=365*5) #5 year time frame
    end = dt.datetime.now() #today
    predict(data(ticker,start,end),pred_data(ticker,start,end)) #calling the function we want to predict the target security

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
The predicted stock price for SPY tomorrow is $359.84527587890625.
Buy:  359.84527587890625 > 357.4599914550781 
PCT DIFF:  0.6672871036891643 %
