<a href="https://colab.research.google.com/github/manishmawatwal/DataScience/blob/main/StockMovementClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Description: Use stock indicators with Machine Learning to predict stock price

In [2]:
#install the below package
#pip install sklearn
#pip install yfinance
#pip install bokeh

In [None]:
#import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from pandas_datareader import data as web
import requests
from datetime import datetime
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import yfinance as yf
from bokeh.plotting import figure, show
import streamlit as st

In [None]:
#get stock symbol/tickers in the portfolio
assets = ['GOOG']
#get the stock/portfolio starting date
stockStartDate = '2013-01-01'
#get the ending date
today = datetime.today().strftime('%Y-%m-%d')
#create a dataframe to store the adjusted close price of the stocks
df = pd.DataFrame()

In [None]:
#store the adjusted close price of the stock into the df and display
for stock in assets:
  df[stock] = web.DataReader(stock, data_source='yahoo', start=stockStartDate, end=today)['Close']

#show the df
df

In [None]:
st.write(""" Stock Price App """)
ticker_symbol = 'GOOGL'
tickerData = yf.Ticker(ticker_symbol)
tickerDf = tickerData.history(period='1d', start='2010-5-31', end='2020-5-31')
st.line_chart(tickerDf.Close)
st.line_chart(tickerDf.Volume)

In [None]:
#Create functions to calculate the Simple Moving Average (SMA) and Exponnetial Moving Average (EMA)
#typical time period for moving averages are 15, 20, and 30
#create the simple moving average (SMA)
def SMA(data, period = 30, column = 'GOOG'):
  return data[column].rolling(window = period).mean()

#create the exponential moving average (EMA)
def EMA(data, period = 20, column = 'GOOG'):
  return data[column].ewm(span = period, adjust = False).mean()

In [None]:
#create a function to calculate the Moving Average Convergence/Divergence (MACD)
def MACD(data, period_long = 26, period_short = 12, period_signal = 9, column = 'GOOG'):
  #calculate the short term EMA
  ShortEMA = EMA(data, period = period_short, column = column)
  #calculate the long term EMA
  LongEMA = EMA(data, period = period_long, column = column)
  #calculate and store the MACD into the data frame
  data['MACD'] = ShortEMA - LongEMA
  #calculate the signal line and store it into the data frame
  data['Signal_Line'] = EMA(data, period = period_signal, column = 'MACD')

  return data

In [None]:
#create a function to calculate Relative Strength Index (RSI)
def RSI(data, period = 14, column = 'GOOG'):
  delta = data[column].diff(1)
  delta = delta.dropna()
  up = delta.copy()
  down = delta.copy()
  up[up < 0] = 0
  down[down > 0] = 0
  data['up'] = up
  data['down'] = down
  AVG_Gain = SMA(data, period, column = 'up')
  AVG_Loss = abs(SMA(data, period, column = 'down'))
  RS = AVG_Gain / AVG_Loss
  RSI = 100.0 - (100.0 / (1.0 + RS))

  data['RSI'] = RSI
  return data

In [None]:
#Add the indicators to the data set
MACD(df)
RSI(df)
df['SMA'] = SMA(df)
df['EMA'] = EMA(df)
# show the data
df

In [None]:
#create the Target column
#if tommorrow's close price is gonna increase or decrease 
df['Target'] = np.where(df['GOOG'].shift(-1) > df['GOOG'], 1, 0)
#show the data
df

In [None]:
#Remove the first 29 days of data since the RSI and SMA have NaN values
df = df[29:]
#show the data
df

In [None]:
#split the data set into a feature or independent data set (X) and a Target or dependent data set (Y)
keep_columns = ['GOOG', 'MACD', 'Signal_Line', 'RSI', 'SMA', 'EMA']
X = df[keep_columns].values
Y = df['Target'].values

In [None]:
#split the data again but this time into 80% training and 20% testing data set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [None]:
#create and train the decision tree classifier model
tree = DecisionTreeClassifier().fit(X_train, Y_train)

In [None]:
#check how well the model did on the training data set
print(tree.score(X_train, Y_train))

In [None]:
#check how well the model did on the testing data set
print(tree.score(X_test, Y_test))

In [None]:
#show the model tree predictions
tree_predictions = tree.predict(X_test)
print(tree_predictions)

In [None]:
#show the actual values from the test data
Y_test

In [None]:
#get the model metrics
from sklearn.metrics import classification_report
print(classification_report(Y_test, tree_predictions))