In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import yfinance as yf
from copy import copy
from scipy import stats
import plotly.express as px
from plotly.tools import FigureFactory as ff
import plotly.graph_objects as go

%load_ext autoreload
%autoreload 2

!pip install --upgrade git+https://github.com/MTSUDataScience/DATA3500.git

from course_solutions.Data3500_ICPS import *

In [None]:
tickers = ["AAPL", "BA", "T", "MGM", "AMZN", "IBM", "TSLA", "GOOG", "SPY"]

stocks_df = yf.download(tickers, start = "2012-01-01", end = "2020-12-31")['Adj Close'].reset_index()
stocks_df.columns.name=None

In [None]:
stocks_df.head()

In [None]:
stocks_df.tail()

In [None]:
# Sort the stock data by date

stocks_df = stocks_df.sort_values(by = ['Date'])
stocks_df.head()

In [None]:
# Print out the number of stocks

print('Total Number of stocks : {}'.format(len(stocks_df.columns[1:])))

In [None]:
# Print the name of stocks

print('Stocks under consideration are:')
print('-'*25)
for i in stocks_df.columns[1:]:
  print(i)

# **Problem #1:** 
- **What is the average price of the S&P500 (SPY)?**
- **Which stock or index has the minimum dispersion (lowest standard deviation) from the mean in dollar value?**
- **What is the maximum price for AMZN stock over the specified time period?** 

In [None]:
# Place your answer to #1 here.
# If you need help, you can use solution5_1().


## PERFORM EXPLORATORY DATA ANALYSIS AND BASIC VISUALIZATION

In [None]:
# Check if data contains any null values

stocks_df.isnull().sum()

In [None]:
# Getting dataframe info

stocks_df.info()

In [None]:
stocks_df.plot(x = 'Date', figsize = (15,7), linewidth = 3, title = 'Figure without Function')
plt.grid()
plt.show()

In [None]:
# Define a function to plot the entire dataframe
# The function takes in a dataframe df as an input argument and does not return anything back!
# The function performs data visualization
# Pandas works great with matplotlib, you can simply plot data directly from a Pandas DataFrame using plot() method

def show_plot(df, fig_title):
  df.plot(x = 'Date', figsize = (15,7), linewidth = 3, title = fig_title)
  plt.grid()
  plt.show()

In [None]:
# Plot the data

show_plot(stocks_df, 'RAW STOCK PRICES (WITHOUT NORMALIZATION)')

In [None]:
stocks_df.head()

In [None]:
# Consider how to normalize our prices.
# Function to normalize the prices based on the initial price
# The function simply divides every stock by it's price at the start date (i.e.: Date = 2012-01-03)	

def normalize(df):
  x = df.copy()

  # Loop through each stock (while ignoring time columns with index 0)
  for i in x.columns[1:]:
    x[i] = x[i]/(x[i][0]) # For each value i in a column within x, divide the value by the first observation
  return x

In [None]:
normalize(stocks_df)

# **Problem #2:**
- **Graph normalized (scaled) stock prices using the two functions above**  


In [None]:
# Place your answer to #2 here.
# If you need help, you can use solution5_2().


## PERFORM INTERACTIVE DATA VISUALIZATION

In [None]:
# Function to perform an interactive data plotting using plotly express
# Plotly.express module which is imported as px includes functions that can plot interactive plots easily and effectively. 
# Every Plotly Express function uses graph objects internally and returns a plotly.graph_objects.Figure instance. 

def interactive_plot(df, title):
  fig = px.line(title = title)
  
  # Loop through each stock (while ignoring time columns with index 0)
  for i in df.columns[1:]:
    fig.add_scatter(x = df['Date'], y = df[i], name = i) # add a new Scatter trace

  fig.show()

In [None]:
# Plot interactive chart
interactive_plot(stocks_df, 'Prices')

# **Problem #3:**
- **Plot normalized stock data in an interactive way**
- **It seems that most stocks experienced massive drops in 2020, let's assume that you own 100 shares of the S&P500 and you bought them on Feb 19th, 2020. How much did you lose (in $) by March 23rd, 2020?**


In [None]:
# Place your answer to #3 here.
# If you need help, you can use solution5_3().


solution5_3()

## CALCULATE INDIVIDUAL STOCKS PERCENT CHANGE

In [None]:
stocks_df['SPY']

In [None]:
# percent change = (today - yesterday) / yesterday

# today = 12
# yesterday = 10

# percent change = (12 - 10) / 10
# 2 / 10
# .2
# .2 * 100 = 20%

In [None]:
# Let's calculate daily return for a single security
# Let's take the S&P500 as an example first

df = stocks_df['SPY']

# Define a dataframe names df_daily_return 
df_daily_return = df.copy()

#Loop through every element in the dataframe
for j in range(1, len(df)):

  # Calculate the percentage of change from the previous day
  df_daily_return[j] = ((df[j]- df[j-1])/df[j-1]) * 100

# put zero in the first line item
df_daily_return[0] = 0
df_daily_return

# **Problem #4:**
- **Calculate the daily return for Amazon stock (AMZN).**


In [None]:
# Place your answer to #4 here.
# If you need help, you can use solution5_4().


## CALCULATE MULTIPLE STOCKS DAILY RETURNS

In [None]:
stocks_df.head()

In [None]:
# Let's define a function to calculate stocks daily returns (for all stocks) 

def daily_return(df):
  df_daily_return = df.copy()

  # Loop through each stock (while ignoring time columns with index 0)
  for i in df.columns[1:]:
    
    # Loop through each row belonging to the stock
    for j in range(1, len(df)):

      # Calculate the percentage of change from the previous day
      df_daily_return[i][j] = ((df[i][j]- df[i][j-1])/df[i][j-1]) * 100
    
    # set the value of first row to zero since the previous value is not available
    df_daily_return[i][0] = 0
  
  return df_daily_return

In [None]:
# Get the daily returns 

stocks_daily_return = daily_return(stocks_df)
stocks_daily_return

# **Problem #5:**
- **Plot the returns vs. time using both static and interactive plots**
- **What is the maximum daily return in % values obtained from the plots**

In [None]:
# Place your answer to #5 here.
# If you need help, you can use solution5_5().


## CALCULATE THE CORRELATIONS BETWEEN DAILY RETURNS 

In [None]:
# Daily Return Correlation
cm = stocks_daily_return.drop(columns = ['Date']).corr()

In [None]:
plt.figure(figsize=(10, 10))
ax = plt.subplot()
sns.heatmap(cm, annot = True, ax = ax);

# **Problem #6:**
- **What are the top 2 stocks that are positively correlated with the S&P500 (SPY)?**
- **What is the correlation between Amazon (AMZN) and Boeing (BA)? Comment on your answer**
- **What is the correlation between MGM (MGM) and Boeing (BA)? Comment on your answer**

In [None]:
# Place your answer to #6 here.
# If you need help, you can use solution5_6().


## PLOT THE HISTOGRAM FOR DAILY RETURNS

In [None]:
# Histogram of daily returns
# Stock returns are normally distributed with zero mean 
# Notice how Tesla Standard deviation is high indicating a more volatile stock

stocks_daily_return.hist(figsize=(10, 10), bins = 40);

# **Problem #7:**
- **Based on the histogram, which of the following stocks are more risky? T or TSLA**

In [None]:
# Place your answer to #7 here.
# If you need help, you can use solution5_7().


## LOOK AT THE RELATIONSHIP BETWEEN APPL AND SPY USING A REGRESSION

In [None]:
#Beta for a single stock
#Select any stock

stocks_daily_return['AAPL']

In [None]:
# Select the Market (SPY)

stocks_daily_return['SPY']

In [None]:
# plot a scatter plot between the selected stock and the S&P500 (Market)

stocks_daily_return.plot(kind = 'scatter', x = 'SPY', y = 'AAPL');

In [None]:
# Fit a polynomial between the selected stock and the S&P500 (Poly with order = 1 is a straight line)

# beta represents the slope of the line regression line (market return vs. stock return). 
# Beta is a measure of the volatility or systematic risk of a security or portfolio compared to the entire market (S&P500) 
# Beta is used in the CAPM and describes the relationship between systematic risk and expected return for assets 

# Beta = 1.0, this indicates that its price activity is strongly correlated with the market. 
# Beta < 1, indicates that the security is theoretically less volatile than the market (Ex: Utility stocks). If the stock is included, this will make the portfolio less risky compared to the same portfolio without the stock.
# Beta > 1, indicates that the security's price is more volatile than the market. For instance, Tesla stock beta is 1.26 indicating that it's 26% more volatile than the market. 
# Tech stocks generally have higher betas than S&P500 but they also have excess returns
# MGM is 65% more volatile than the S&P500!


beta, alpha = np.polyfit(stocks_daily_return['SPY'], stocks_daily_return['AAPL'], 1)


print('Beta for {} stock is = {} and alpha is = {}'.format('AAPL', beta, alpha)) 

In [None]:
# Now let's plot the scatter plot and the straight line on one plot

stocks_daily_return.plot(kind = 'scatter', x = 'SPY', y = 'AAPL')

# Straight line equation with alpha and beta parameters 
# Straight line equation is y = beta * rm + alpha

plt.plot(stocks_daily_return['SPY'], beta * stocks_daily_return['SPY'] + alpha, '-', color = 'r');


In [None]:
# We can also do this with Seaborn and find the same line.

ax = sns.regplot(x="SPY", y="AAPL", data=stocks_daily_return)

# **Problem #8:**
- **Create the seaborn (sns) style graph for all stocks in our sample using a function. Remember that we always want SPY on the x axis and the y axis values will alternate across all of our stocks**

In [None]:
# Place your answer to #8 here.
# If you need help, you can use solution5_8().


## USING THE REGRESSION OUT, PREDICT EXPECTED RETURN FOR A STOCK


In [None]:
# We can calculate the average daily rate of return for SPY (S&P 500)

stocks_daily_return['SPY'].mean()

In [None]:
# Let's calculate the annualized rate of return for S&P500 
# Note that out of 365 days/year, stock exchanges are closed for 104 days during weekend days (Saturday and Sunday) 
# Check your answers with: https://dqydj.com/sp-500-return-calculator/

rm = stocks_daily_return['SPY'].mean() * 252
rm

In [None]:
# Assume risk free rate is zero
# Also you can use the yield of a 10-years U.S. Government bond as a risk free rate

rf = 0 

# Calculate return for any security (APPL) using CAPM  

ER_AAPL = rf + ( beta * (rm-rf) ) 
ER_AAPL

## REPEAT THE ABOVE BUT TO ALL STOCKS, NOT JUST ONE

In [None]:
# Let's create a placeholder for all betas and alphas (empty dictionaries)

beta = {}
alpha = {}

# Loop on every stock daily return
for i in stocks_daily_return.columns:

  # Ignoring the date and S&P500 Columns 
  if i != 'Date' and i != 'SPY':
        
    # plot a scatter plot between each individual stock and the S&P500 (Market)
    stocks_daily_return.plot(kind = 'scatter', x = 'SPY', y = i)
    
    # Fit a polynomial between each stock and the S&P500 (Poly with order = 1 is a straight line)
    b, a = np.polyfit(stocks_daily_return['SPY'], stocks_daily_return[i], 1)
    
    plt.plot(stocks_daily_return['SPY'], b * stocks_daily_return['SPY'] + a, '-', color = 'r')
    
    beta[i] = b
    
    alpha[i] = a
    
    plt.show()

In [None]:
# Let's view Beta for every stock 

beta

In [None]:
# Let's view alpha for each of the stocks
# Alpha describes the strategy's ability to beat the market (S&P500)
# Alpha indicates the “excess return” or “abnormal rate of return,” 
# A positive 0.175 alpha for Tesla means that the portfolio’s return exceeded the benchmark S&P500 index by 17%.

alpha

# **Problem #9:**
- **Calculate the expected return for each stock using a function**

In [None]:
# Place your answer to #9 here.
# If you need help, you can use solution5_9().


In [None]:
# An alternative method that relies on already calculated inputs.

keys = list(beta.keys())

# Define the expected return dictionary
ER = {}

rf = 0 # assume risk free rate is zero in this case
rm = stocks_daily_return['SPY'].mean() * 252 # this is the expected return of the market 

for i in keys:
  # Calculate return for every security using CAPM  
  ER[i] = rf + ( beta[i] * (rm-rf) )
  print('Expected Return Based on CAPM for {} is {}%'.format(i, ER[i]))

## CALCULATE THE EXPECTED RETURN FOR AN EQUAL WEIGHTED PORTFOLIO

In [None]:
p_weights = 1/len(stocks_daily_return.columns[1:-1]) * np.ones(len(stocks_daily_return.columns[1:-1]))
p_weights

In [None]:
# Calculate the portfolio return 

ER_portfolio = sum(list(ER.values()) * p_weights)
ER_portfolio

In [None]:
print('Expected Return Based on CAPM for the portfolio is {:.2f}%.'.format(ER_portfolio))

# **Problem #10:**
- **Calculate the expected return for the portfolio assuming we have 50% in Apple, 30% in Amazon, and 20% in Google. You can use the ER dictionary above and the weight_dictionary created below as a starting point.**

In [None]:
# Place your answer to #10 here.
# If you need help, you can use solution5_10().

keys = list(beta.keys())
weight_dictionary = {}

for i in keys:
    weight_dictionary[i] = 0
    
weight_dictionary