In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import date
import questionary

In [None]:
today = date.today()

# # once we have todays date we can run a formula to replace the year output from the date.today() with whatever timeframe we enter
# # in our program we will set this input at 10 years
def sub_years(years):
    today = date.today()
    
    try:
        return today.replace(year = today.year - years)
    except ValueError:
        return today + (date(today.year + years, 1, 1) - date(today.year, 1, 1))


def start_end(today):
    # historical data - define START and END dates
    # to calculate the start_date we must use the sub_years function defined above to get today's date and subtract 10 years
    # then using the .strftime('%Y-%m-%d') we format it so that it can be passed to yahoo finance
    start_date = sub_years(10).strftime('%Y-%m-%d')

    # for the end_date we just have to reformat the today variable with the .strftime('%Y-%m-%d') we format it so that it can be passed to yahoo finance 
    end_date = today.strftime('%Y-%m-%d')

    return start_date, end_date 

In [None]:
class CAPM:
    def __init__(self, stocks, start_date, end_date):
        self.data = None
        self.stocks = stocks
        self.start_date = start_date
        self.end_date = end_date

    def download_data(self):
        data = {}

        for stock in self.stocks:
            ticker = yf.download(stock, self.start_date, self.end_date)
            # Adjusted Closing Price takes into account factors such as dividends, stock splits, etc.
            # Adjusted Closing Price is a more accurate reflection of the stock's value
            data[stock] = ticker['Adj Close']

        return pd.DataFrame(data)


In [2]:
stocks = ['AAPL', 'AMZN', 'FB', 'TSLA', '^GSPC']

start_date = '2010-01-01'

end_date = '2017-01-01'

data = {}

for stock in stocks:
    ticker = yf.download(stock, start_date, end_date)
    # Adjusted Closing Price takes into account factors such as dividends, stock splits, etc.
    # Adjusted Closing Price is a more accurate reflection of the stock's value
    data[stock] = ticker['Adj Close']

pd.DataFrame(data)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,AAPL,AMZN,FB,TSLA,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,6.562591,133.899994,,,1132.989990
2010-01-05,6.573935,134.690002,,,1136.520020
2010-01-06,6.469369,132.250000,,,1137.140015
2010-01-07,6.457407,130.000000,,,1141.689941
2010-01-08,6.500339,133.520004,,,1144.979980
...,...,...,...,...,...
2016-12-23,27.459557,760.590027,117.269997,42.667999,2263.790039
2016-12-27,27.633949,771.400024,118.010002,43.905998,2268.879883
2016-12-28,27.516117,772.130005,116.919998,43.948002,2249.919922
2016-12-29,27.509048,765.150024,116.349998,42.936001,2249.260010


In [3]:
stocks_data = pd.DataFrame(data)

stocks_data = stocks_data.resample('M').last()
stocks_data

Unnamed: 0_level_0,AAPL,AMZN,FB,TSLA,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-31,5.889495,125.410004,,,1073.869995
2010-02-28,6.274645,118.400002,,,1104.489990
2010-03-31,7.206243,135.770004,,,1169.430054
2010-04-30,8.006289,137.100006,,,1186.689941
2010-05-31,7.877193,125.459999,,,1089.410034
...,...,...,...,...,...
2016-08-31,24.876219,769.159973,126.120003,42.402000,2170.949951
2016-09-30,26.505720,837.309998,128.270004,40.806000,2168.270020
2016-10-31,26.620598,789.820007,130.990005,39.546001,2126.149902
2016-11-30,26.045574,750.570007,118.419998,37.880001,2198.810059


In [4]:
data_cleaned = stocks_data.fillna(stocks_data.rolling(6, min_periods=1).mean())
data_cleaned = data_cleaned.dropna()

In [5]:
sp500 = data_cleaned.iloc[:,-1]
sp500 = pd.DataFrame(sp500)

In [6]:
sp500.head()

Unnamed: 0_level_0,^GSPC
Date,Unnamed: 1_level_1
2012-05-31,1310.329956
2012-06-30,1362.160034
2012-07-31,1379.319946
2012-08-31,1406.579956
2012-09-30,1440.670044


In [7]:
sp500['market_adjclose'] = sp500.sum(axis=1)
sp500.head()

Unnamed: 0_level_0,^GSPC,market_adjclose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-31,1310.329956,1310.329956
2012-06-30,1362.160034,1362.160034
2012-07-31,1379.319946,1379.319946
2012-08-31,1406.579956,1406.579956
2012-09-30,1440.670044,1440.670044


In [8]:
stocks = data_cleaned.iloc[:,:-1]
stocks = pd.DataFrame(stocks)
stocks.head()

Unnamed: 0_level_0,AAPL,AMZN,FB,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-05-31,17.716019,212.910004,29.6,5.9
2012-06-30,17.908285,228.350006,31.1,6.258
2012-07-31,18.728878,233.300003,21.709999,5.484
2012-08-31,20.487083,248.270004,18.059999,5.704
2012-09-30,20.544365,254.320007,21.66,5.856


In [9]:
# we have to have a sector return value to compare against the market return
# so we sum up all the stocks in the sector per day to figure out the daily_return compared to the market
stocks['sector_adjclose'] = stocks.sum(axis=1)
stocks.head()

Unnamed: 0_level_0,AAPL,AMZN,FB,TSLA,sector_adjclose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-05-31,17.716019,212.910004,29.6,5.9,266.126023
2012-06-30,17.908285,228.350006,31.1,6.258,283.616292
2012-07-31,18.728878,233.300003,21.709999,5.484,279.22288
2012-08-31,20.487083,248.270004,18.059999,5.704,292.521087
2012-09-30,20.544365,254.320007,21.66,5.856,302.380372


In [12]:
data = pd.DataFrame({'stock_adjclose':stocks['sector_adjclose'], 'market_adjclose':sp500['market_adjclose']})
data.head()

Unnamed: 0_level_0,stock_adjclose,market_adjclose
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-31,266.126023,1310.329956
2012-06-30,283.616292,1362.160034
2012-07-31,279.22288,1379.319946
2012-08-31,292.521087,1406.579956
2012-09-30,302.380372,1440.670044


In [None]:
    def initialize(self):
            stocks_data = self.download_data()
            # we use monthly returns ('M') instead of daily returns
            stocks_data = stocks_data.resample('M').last()

            # create a pandas dataframe to store stock information for analysis
            self.data = pd.DataFrame({'stock_adjclose':stocks_data[self.stocks[0]], 'market_adjclose':stocks_data[self.stocks[-1]]})

            # add 2 columns for the s_returns and m_returns
            # logarithmic monthly returns
            self.data[['stock_returns', 'market_returns']] = np.log(self.data[['stock_adjclose', 'market_adjclose']] / self.data[['stock_adjclose', 'market_adjclose']].shift(1))

            # remove NaN values
            self.data =  self.data[1:]

            print(self.data)