<a href="https://colab.research.google.com/github/manishmawatwal/DataScience/blob/main/StockMovementClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [118]:
#Description: Use stock indicators with Machine Learning to predict stock price

In [119]:
#import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from pandas_datareader import data as web
import requests
from datetime import datetime
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from bokeh.plotting import figure, show

In [None]:
'''
#load the data
from google.colab import files
files.upload()
#store the data and print
df = pd.read_csv('_filename_.csv')
#set the date as index for better understanding
df = df.set_index(pd.DatetimeIndex(df['Date'].values))
#remove the date column as it has already been used as index
df.drop(columns=['Date'], axis = 1, inplace = True)
df
'''
#get stock symbol/tickers in the portfolio
stockSymbols = ['GOOG', 'TSLA', 'NFLX', 'FB', 'AMZN', 'AAPL']
#Assign weights to the stocks
weights = np.array([0.2, 0.2, 0.2, 0.2, 0.1, 0.1])
#get the stock/portfolio starting date
stockStartDate = '2013-01-01'
#get the ending date in the format YYYY-mm-dd / today's date
today = datetime.today().strftime('%Y-%m-%d')
#create a dataframe to store the adjusted close price of the stocks
df = pd.DataFrame()
#get the number of assets in the portfolio
numAssets = len(stockSymbols)
print('You have '+str(numAssets)+' assets in your portfolio')

In [None]:
#create function to get the stock price in the portfolio
def getMyPortfolio(stocks = stockSymbols, start = stockStartDate, end = today, col='Adj Close'):
  data = web.DataReader(stocks, data_source = 'yahoo', start = start, end = end)[col]
  return data

#get the stock portfolio Adj Close Price
my_stocks = getMyPortfolio(stockSymbols)
my_stocks
#create a function to visualize the portfolio
def showGraph(stocks = stockSymbols, start = stockStartDate, end = today, col = 'Adj Close'):
  #Create the title for the portfolio
  title = 'Portfolio '+col+' Price History'
  #get the stocks
  my_stocks = getMyPortfolio(stocks = stocks, start= start, end =end, col=col)
  #give the figure size
  plt.figure(figsize=(12.2, 4.5))
  #loop through each stock and plot the price
  for c in my_stocks.columns.values:
    plt.plot(my_stocks[c], label = c, alpha = 0.7)
  plt.title(title)
  plt.xlabel('Date', fontsize = 18)
  plt.ylabel(col+'Price USD ($)', fontsize = 18)
  plt.legend(my_stocks.columns.values, loc = 'upper left')
  plt.show()

#show the adjusted close price
showGraph(stockSymbols)
'''
#visually show the stock/portfolio
title = 'Portfolio Adj Close Price History'
#get the stocks
my_stocks = df
#create and plot the graph
for c in my_stocks.columns.values:
  plt.plot(my_stocks[c], label = c)

plt.title(title)
plt.xlabel('Date', fontsize = 18)
plt.ylabel('Adj. Price USD ($)', fontsize = 18)
plt.legend(my_stocks.columns.values, loc = 'upper left')
plt.show()
'''

In [None]:
#calculate and print the daily simple returns
daily_simple_returns = my_stocks.pct_change(1)
daily_simple_returns
#show the daily simple stock correlation
daily_simple_returns.corr()
#create and show the annualized covariance matrix
daily_simple_returns.cov()
#show variance
daily_simple_returns.var()
#print the standard deviation for daily simple returns
daily_simple_returns.std()
#show the mean of daily simple return
daily_simple_returns.mean()
#visualize the stocks daily simple returns
plt.figure(figsize=(12,4.5))
#loop through each stock and plot the simple returns
for c in daily_simple_returns.columns.values:
  plt.plot(daily_simple_returns.index, daily_simple_returns[c], lw=2, label=c, alpha = 0.7)
#create a legend
plt.legend(loc='upper right', fontsize = 10)
plt.title('Volatility')
plt.xlabel('Date')
plt.ylabel('Daily simple returns')

In [None]:

#calculate the expected simple annual portfolio return
#there are only 252 trading days in a year
portfolioSimpleAnnualReturn = np.sum(daily_simple_returns.mean() * weights)* 252
#create and print the annualized covariance matrix
cov_matrix_annual = daily_simple_returns.cov() * 252
cov_matrix_annual
#Calculate the portfolio variance
port_variance = np.dot(weights.T, np.dot(daily_simple_returns.cov() * 252, weights))
port_variance
#calulate the portfolio volatility aka standard deviation
port_volatility = np.sqrt(port_variance)
port_volatility
#calculate the expected annual return, volatility (risk), and variance in percentage
percent_var = str(round(port_variance, 2) * 100) + '%'
percent_vols = str(round(port_volatility, 2) * 100) + '%'
percent_ret = str(round(portfolioSimpleAnnualReturn, 2) * 100) + '%'
#print 
print('Expected annual return: ' + percent_ret)
print('Annual volatility/risks:' + percent_vols)
print('Annual variance' + percent_var)

In [None]:
#calculate the growth of the investment
dailyCumulSimpleReturn = (daily_simple_returns + 1).cumprod()
#show the cumulative simple returns
dailyCumulSimpleReturn
#Visualize the daily cumulative simple return
plt.figure(figsize = (12.2, 4.5))
for c in dailyCumulSimpleReturn.columns.values:
  plt.plot(dailyCumulSimpleReturn.index, dailyCumulSimpleReturn[c], lw = 2, label = c, alpha = 0.7)

plt.legend(loc = 'upper left', fontsize = 10)
plt.xlabel('Date')
plt.ylabel('Growth of $1 investment')
plt.title('Daily Cumulative Simple Returns')
plt.show()

**Portfolio Optimization**

---



In [125]:
#pip install PyPortfolioOpt
#pip install pulp
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

In [None]:
#Porfolio Optimization
#Calculate the expected returns and the annualised sample covariance matrix of asset returns
mu = expected_returns.mean_historical_return(my_stocks)
s = risk_models.sample_cov(my_stocks)
#create Efficient Frontier Object
#optimize for max sharpe ratio
ef = EfficientFrontier(mu, s)
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
print(cleaned_weights)
ef.portfolio_performance(verbose = True)
#get the discrete allocation of each share per stock
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
total_portfolio_value = 5000
latest_prices = get_latest_prices(my_stocks)
weights = cleaned_weights
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value)

allocation , leftover = da.lp_portfolio()
print('Discrete allocation:', allocation)
print('Funds remaining: ${:.2f}'.format(leftover))

In [127]:
#create a function to get the companies name
def get_company_name(symbol):
  url = 'http://d.yimg.com/autoc.finance.yahoo.com/autoc?query='+symbol+'&region=1&lang=en'
  result = requests.get(url).json()
  for r in result['ResultSet']['Result']:
    if r['symbol'] == symbol:
      return r['name']

#store the company name into a list
#get the discrete allocation values
discrete_allocation_list = []
company_name = []
for symbol in allocation:
  company_name.append(get_company_name(symbol))
  discrete_allocation_list.append(allocation.get(symbol))

In [None]:
#create a dataframe for the portfolio
portfolio_df = pd.DataFrame(columns = ['Company_name', 'Company_Ticker', 'Discrete_val_'+str(total_portfolio_value)])
portfolio_df['Company_name'] = company_name
portfolio_df['Company_Ticker'] = allocation
portfolio_df['Discrete_val_'+str(total_portfolio_value)] = discrete_allocation_list 
#show the portfolio
portfolio_df

**Machine Learning**

---





In [129]:
#Create functions to calculate the Simple Moving Average (SMA) and Exponnetial Moving Average (EMA)
#typical time period for moving averages are 15, 20, and 30
#create the simple moving average (SMA)
def SMA(data, period = 30, column = 'GOOG'):
  return data[column].rolling(window = period).mean()

#create the exponential moving average (EMA)
def EMA(data, period = 20, column = 'GOOG'):
  return data[column].ewm(span = period, adjust = False).mean()

In [130]:
#create a function to calculate the Moving Average Convergence/Divergence (MACD)
def MACD(data, period_long = 26, period_short = 12, period_signal = 9, column = 'GOOG'):
  #calculate the short term EMA
  ShortEMA = EMA(data, period = period_short, column = column)
  #calculate the long term EMA
  LongEMA = EMA(data, period = period_long, column = column)
  #calculate and store the MACD into the data frame
  data['MACD'] = ShortEMA - LongEMA
  #calculate the signal line and store it into the data frame
  data['Signal_Line'] = EMA(data, period = period_signal, column = 'MACD')

  return data

In [131]:
#create a function to calculate Relative Strength Index (RSI)
def RSI(data, period = 14, column = 'GOOG'):
  delta = data[column].diff(1)
  delta = delta.dropna()
  up = delta.copy()
  down = delta.copy()
  up[up < 0] = 0
  down[down > 0] = 0
  data['up'] = up
  data['down'] = down
  AVG_Gain = SMA(data, period, column = 'up')
  AVG_Loss = abs(SMA(data, period, column = 'down'))
  RS = AVG_Gain / AVG_Loss
  RSI = 100.0 - (100.0 / (1.0 + RS))

  data['RSI'] = RSI
  return data

In [None]:
#Add the indicators to the data set
MACD(my_stocks)
RSI(my_stocks)
df['SMA'] = SMA(my_stocks)
df['EMA'] = EMA(my_stocks)
# show the data
df
#create the Target column
#if tommorrow's close price is gonna increase or decrease 
my_stocks['Target'] = np.where(my_stocks['GOOG'].shift(-1) > my_stocks['GOOG'], 1, 0)
#show the data
my_stocks
#Remove the first 29 days of data since the RSI has nan values
df = df[29:]
#show the data
df

In [None]:
#split the data set into a feature or independent data set (X) and a Target or dependent data set (Y)
keep_columns = ['GOOG', 'MACD', 'Signal_Line', 'RSI', 'SMA', 'EMA']
X = df[keep_columns].values
Y = df['Target'].values
#split the data again but this time into 80% training and 20% testing data set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)
#create and train the decision tree classifier model
tree = DecisionTreeClassifier().fit(X_train, Y_train)
#check how well the model did on the training data set
print(tree.score(X_train, Y_train))

In [None]:
#check how well the model did on the testing data set
print(tree.score(X_test, Y_test))
#show the model tree predictions
tree_predictions = tree.predict(X_test)
print(tree_predictions)
#show the actual values from the test data
Y_test

In [None]:
#get the model metrics
from sklearn.metrics import classification_report
print(classification_report(Y_test, tree_predictions))