In [1]:
import numpy as np
import pandas as pd
import statsmodels.regression.linear_model as lm
import statsmodels.tools.tools as ct
from sklearn.metrics import mean_squared_error, r2_score

import yfinance as yf
from pandas_datareader import data
import pandas_datareader as pdr
import datetime

## APT

$$E(R_p)=R_f+\beta_1f_1+\beta_2f_2+...+\beta_nf_n$$

In [98]:
def apt(stock, rf, country):
    
    #Import Damodaran's database
    Damodaran=pd.ExcelFile('ctryprem.xlsx')
    RWA_tab= pd.read_excel(Damodaran, 'Regional Weighted Averages',header=0,index_col=0)
    
    #load RF data
    if rf=='30y': #3oy T-Bonds
        rf_t = '^TYX'
    if rf=='10y': #10y T-Bonds
        rf_t = '^TNX'
    if rf=='3m': #3 months T-bills
        rf_t = '^IRX'
    if (rf!='30y')&(rf!='10y')&(rf!='3m'):
        return print('ArgumentError: rf can only accept 30y, 10y, or 3m.')
    
    rf = yf.download(tickers=rf_t, period='5y', interval='1d')[['Close']]
    rf = rf[['Close']].resample('M').mean()
    rf.columns = ['rf']
    
    #load factors data
    indicator_list = ['CPIEALL', 'INDPRO']
    indicator = pdr.DataReader(indicator_list, 'fred')
    indicator = indicator.resample('M').mean().pct_change()
    indicator.columns = ['Inflation', 'IndustrialProduction']
    
    #prepare stock data
    print('Downloading Data')
    close = yf.download(tickers=stock, period='5y', interval='1d')[['Close']]
    print()
    
    #calculate return and excess return
    close = close.resample('M').mean().pct_change().dropna()
    data = close[['Close']].merge(rf, how='left', left_index=True, right_index=True)
    data['excessReturn'] = data['Close'] - data['rf']
    data = data.merge(indicator, how='left', left_index=True, right_index=True)
    
    #country risk
    if country not in RWA_tab.index:
        country = 'Global'
    data['CR'] = RWA_tab[RWA_tab.index==country]['Country Risk Premium'][0]
    data = data[['Inflation', 'IndustrialProduction', 'CR', 'excessReturn']].dropna()
    
    #split data
    train, val = data.iloc[:-12], data.iloc[-12:]
    
    print('\nData:')
    print(data.head())
    print()
    
    #fit data
    formula =  "excessReturn~ Inflation+IndustrialProduction+CR"
    model = lm.OLS.from_formula(formula, data = train).fit()
    print('\nModel Summary:')
    print(model.summary())
    
    #evaluate model
    pred = model.predict(val.iloc[:,:-1])
    print()
    print('R2 Score: ', r2_score(val.iloc[:,-1], pred))
    print('MSE Score: ', mean_squared_error(val.iloc[:,-1], pred))
    return model

In [99]:
model = apt('NFLX', '30y', 'Japan')

[*********************100%***********************]  1 of 1 completed
Downloading Data
[*********************100%***********************]  1 of 1 completed


Data:
            Inflation  IndustrialProduction        CR  excessReturn
Date                                                               
2017-03-31   0.001049              0.007512  0.006959     -3.076054
2017-04-30   0.002536              0.009382  0.006959     -2.926476
2017-05-31   0.001045              0.001107  0.006959     -2.872059
2017-06-30   0.001341              0.001570  0.006959     -2.807983
2017-07-31  -0.000315             -0.000947  0.006959     -2.802803


Model Summary:
                            OLS Regression Results                            
Dep. Variable:           excessReturn   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                 -0.025
Method:                 Least Squares   F-statistic:                    0.4591
Date:                Tue, 25

## Fama French

$$R_{it}-R_{ft}=\alpha_{it}+\beta_1(R_{Mt}-R_{ft})+\beta_2SMB_t+\beta_3HML_t+\epsilon_{it}$$

In [77]:
def famafrench(stock, factor=3):
    '''Return a 3 or 5 factor Fama French model for the chosen stock along with the evaluation metrics.
    
    Arguments
    stock: ticker of the stock
    factor: 3 (default), 5
    '''
    #check factors, return error message if it is not 3 or 5
    if factor==5:
        print('5 Factor Fama-French\n\n')
        FF = pd.read_csv('F-F_Research_Data_5_Factors_2x3_daily.csv', header=2, index_col=0).dropna()
        FF.index = pd.to_datetime(FF.index, format='%Y%m%d')
        FF.columns = ['Mkt_RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
    if factor==3:  
        print('3 Factor Fama-French\n\n')
        FF=pd.read_csv('F-F_Research_Data_Factors_Daily.csv', header=3, index_col=0).dropna()
        FF.index = pd.to_datetime(FF.index, format='%Y%m%d')
        FF.columns = ['Mkt_RF', 'SMB', 'HML', 'RF']
    
    if (factor!=3) & (factor!=5):
        return print('ArgumentError: Factor must be either 3 or 5.')
    
    #prepare stock data
    print('Downloading Data')
    close = yf.download(tickers=stock, period='5y', interval='1d')[['Close']]
    print()
    
    #calculate return and excess return
    close = close.pct_change().dropna()
    data = close[['Close']].merge(FF, how='left', left_index=True, right_index=True)
    data['excessReturn'] = data['Close'] - data['RF']
    data = data.drop(['Close', 'RF'], axis=1).dropna()
    print('\nData:')
    print(data)
    print()
    
    #split data for training and validation
    train, val = data.iloc[:-300], data.iloc[-300:]
    #fit model
    formula =  "excessReturn~ {}".format('+'.join(data.columns[:-1]))
    model = lm.OLS.from_formula(formula, data = train).fit()
    print('\nModel Summary:')
    print(model.summary()) #print model summary
    
    pred = model.predict(val.iloc[:,:-1])
    print()
    #print evaluation metrics
    print('R2 Score: ', r2_score(val.iloc[:,-1], pred))
    print('MSE Score: ', mean_squared_error(val.iloc[:,-1], pred))
    return model

In [79]:
model = famafrench('NFLX')

3 Factor Fama-French


Downloading Data
[*********************100%***********************]  1 of 1 completed


Data:
            Mkt_RF   SMB   HML  excessReturn
Date                                        
2017-01-26   -0.10 -0.62  0.41     -0.006014
2017-01-27   -0.12  0.00 -0.68      0.023115
2017-01-30   -0.68 -0.73 -0.46     -0.010635
2017-01-31    0.00  0.88 -0.57     -0.005611
2017-02-01    0.04 -0.08  0.04     -0.001503
...            ...   ...   ...           ...
2021-10-25    0.58  0.49 -0.16      0.010349
2021-10-26    0.04 -0.71 -0.32     -0.004675
2021-10-27   -0.76 -0.74 -1.19     -0.008377
2021-10-28    1.14  0.87 -0.47      0.016789
2021-10-29    0.22  0.19 -0.82      0.024123

[1200 rows x 4 columns]


Model Summary:
                            OLS Regression Results                            
Dep. Variable:           excessReturn   R-squared:                       0.392
Model:                            OLS   Adj. R-squared:                  0.390
Method:            

In [80]:
model = famafrench('NFLX', 5)

5 Factor Fama-French


Downloading Data
[*********************100%***********************]  1 of 1 completed


Data:
            Mkt_RF   SMB   HML   RMW   CMA  excessReturn
Date                                                    
2017-01-26   -0.10 -0.58  0.41 -0.05 -0.16     -0.006014
2017-01-27   -0.12 -0.06 -0.68  0.13  0.05      0.023115
2017-01-30   -0.68 -0.73 -0.46  0.66 -0.16     -0.010635
2017-01-31    0.00  0.81 -0.57 -0.41 -0.25     -0.005611
2017-02-01    0.04 -0.08  0.04  0.21 -0.23     -0.001503
...            ...   ...   ...   ...   ...           ...
2021-11-23   -0.03 -0.17  1.74  0.23  1.16     -0.007797
2021-11-24    0.30 -0.15 -0.86 -0.52 -0.20      0.006467
2021-11-26   -2.22 -1.30 -0.93  0.26 -0.59      0.011165
2021-11-29    1.11 -1.31 -0.87  0.40 -0.33     -0.002704
2021-11-30   -1.95  0.16 -1.12 -0.16  0.40     -0.033050

[1221 rows x 6 columns]


Model Summary:
                            OLS Regression Results                            
Dep. Variable:       