In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
import matplotlib.pyplot as plt
import warnings

plt.style.use('seaborn')
# plt.style.use('seaborn-colorblind') #alternative
plt.rcParams['figure.figsize'] = [8, 4.5]
plt.rcParams['figure.dpi'] = 300
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import numpy as np

In [4]:
RISKY_ASSET = 'TSLA'
START_DATE = '2015-12-31'
END_DATE = '2020-12-31'

In [5]:
# load data from csv
factor_df = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows=3)

# identify where the annual data starts
STR_TO_MATCH = ' Annual Factors: January-December '
indices = factor_df.iloc[:, 0] == STR_TO_MATCH
start_of_annual = factor_df[indices].index[0]

# keep only monthly data
factor_df = factor_df[factor_df.index < start_of_annual]

In [6]:
# rename columns
factor_df.columns = ['date', 'mkt', 'smb', 'hml', 'rf']

# convert strings to datetime
factor_df['date'] = pd.to_datetime(factor_df['date'], 
                                   format='%Y%m') \
                      .dt.strftime("%Y-%m")

# set index
factor_df = factor_df.set_index('date')

# filter only required dates
factor_df = factor_df.loc[START_DATE:END_DATE]

In [7]:
factor_df = factor_df.apply(pd.to_numeric, 
                            errors='coerce') \
                     .div(100)
factor_df.head()

Unnamed: 0_level_0,mkt,smb,hml,rf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01,-0.0577,-0.0336,0.0201,0.0001
2016-02,-0.0008,0.0083,-0.0059,0.0002
2016-03,0.0696,0.0082,0.0111,0.0002
2016-04,0.0092,0.0069,0.0328,0.0001
2016-05,0.0178,-0.0027,-0.016,0.0001


In [8]:
asset_df = yf.download(RISKY_ASSET,
                       start=START_DATE,
                       end=END_DATE,
                       adjusted=True,
                       progress=False)

print(f'Downloaded {asset_df.shape[0]} rows of data.')

Downloaded 1260 rows of data.


In [9]:
y = asset_df['Adj Close'].resample('M') \
                         .last() \
                         .pct_change() \
                         .dropna()

y.index = y.index.strftime('%Y-%m')
y.name = 'rtn'
y.head()

Date
2016-01   -0.203366
2016-02    0.003818
2016-03    0.197155
2016-04    0.047830
2016-05   -0.072811
Name: rtn, dtype: float64

In [10]:
ff_data = factor_df.join(y)
ff_data['excess_rtn'] = ff_data.rtn - ff_data.rf

In [11]:
# define and fit the regression model 
ff_model = smf.ols(formula='excess_rtn ~ mkt + smb + hml', 
                   data=ff_data).fit()

# print results 
print(ff_model.summary())

                            OLS Regression Results                            
Dep. Variable:             excess_rtn   R-squared:                       0.291
Model:                            OLS   Adj. R-squared:                  0.253
Method:                 Least Squares   F-statistic:                     7.663
Date:                Thu, 22 Apr 2021   Prob (F-statistic):           0.000223
Time:                        17:46:20   Log-Likelihood:                 24.294
No. Observations:                  60   AIC:                            -40.59
Df Residuals:                      56   BIC:                            -32.21
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0231      0.023      0.991      0.3

In [12]:
from pandas_datareader.famafrench import get_available_datasets
import pandas_datareader.data as web

In [13]:
get_available_datasets()[:5]

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily']

In [14]:
ff_dict = web.DataReader('F-F_Research_Data_Factors', 'famafrench', 
                         start='2015-01-01')

In [15]:
ff_dict.keys()

dict_keys([0, 1, 'DESCR'])

In [16]:
print(ff_dict['DESCR'])

F-F Research Data Factors
-------------------------

This file was created by CMPT_ME_BEME_RETS using the 202102 CRSP database. The 1-month TBill return is from Ibbotson and Associates, Inc. Copyright 2021 Kenneth R. French

  0 : (74 rows x 4 cols)
  1 : Annual Factors: January-December (6 rows x 4 cols)


In [17]:
ff_dict[0].head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01,-3.11,-0.51,-3.56,0.0
2015-02,6.14,0.52,-1.81,0.0
2015-03,-1.12,3.02,-0.41,0.0
2015-04,0.59,-3.04,1.88,0.0
2015-05,1.36,0.89,-1.1,0.0


In [18]:
print='시장팩터(MKT): CAMP과 유사한 시장의 초과 수익률을 측정한다.'
print='크기팩터(SMB): 시가 총액이 큰 주식 대비 시가 총액이 작은 주식의 초과 수익률을 측정한다.'
print='가치팩터(HML): 성장 주식 대비 초과 가치 주식의 초과 수익률을 측정한다.'

In [19]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web

In [20]:
ASSETS = ['AMZN', 'GOOG', 'AAPL', 'MSFT']
WEIGHTS = [0.25, 0.25, 0.25, 0.25]
START_DATE = '2009-12-31'
END_DATE = '2018-12-31'

In [21]:
df_three_factor = web.DataReader('F-F_Research_Data_Factors', 'famafrench', 
                                 start=START_DATE)[0]
df_three_factor = df_three_factor.div(100)
df_three_factor.index = df_three_factor.index.format()

In [22]:
asset_df = yf.download(ASSETS,
                       start=START_DATE,
                       end=END_DATE,
                       adjusted=True,
                       progress=False)


In [23]:
asset_df = asset_df['Adj Close'].resample('M') \
                                .last() \
                                .pct_change() \
                                .dropna()
# reformat index for joining
asset_df.index = asset_df.index.strftime('%Y-%m')