In [7]:
import datetime as dt
import os

import yfinance as yf
import pandas as pd
import statsmodels.api as sm


def pull_stock_data(tickers:str, start:str, end:str, interval:str, stripdateindex:str, column:str) -> pd.DataFrame:
    """
    Returns a dataframe with chosen stock information for last day of the month.

    Parameters:
    -------------
    tickers (str): Yahoo finance tickers for companies divided ONLY by single space
    start (str): start date of the period (format yyyy-mm-dd)
    end (str): end date of the period (format yyyy-mm-dd)
    interval (str): wanted interval (1d, 1m, 1y)
    stripdateindex (str): argument for stripping datetime index down (d - day, m - month, y- year)
    columns (list): list of wanted values, args same as in yahoo finance
    """
    # data download
    df = yf.download(tickers = tickers, start = start, end = end, interval = interval, groupby = 'ticker')
    # changing index from datetime to just year and month
    df['Date'] = df.index
    dfg = df.groupby([df.index.year, df.index.month], as_index=False).last()
    dfg.reset_index(inplace=True, drop=True)
    dfg.set_index('Date', inplace=True)
    dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)
    # dropping na rows
    dfg.dropna(inplace = True)
    return dfg[column]

    
def save_to_desktop(dataframe:pd.DataFrame, file_name:str):
    """
    Saves dataframe to desktop in csv format under filename provided

    Parameters:
    ------------
    dataframe (pd.DataFrame): dataframe that is to be saved
    file_name (str): name of the file
    """
    # defining path to desktop on running unit
    desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    # saving file under chosen name on desktop
    dataframe.to_csv(desktop+'/'+file_name+'.csv', sep=';')



In [2]:
tick = "NKE MSFT XOM INTC CAT WMT JPM F UPS MKC"
st = '2000-12-01'
en = '2020-12-31'
intv = '1d'
strpdt = 'm'
col = ['Close', 'Volume']

dftest = pull_stock_data(tick, st, en, intv, strpdt, col)
dftest.head(5)

[*********************100%***********************]  10 of 10 completed




Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2000-12,23.65625,23.4375,30.0625,45.4375,9.015625,21.6875,6.976563,58.75,53.125,43.46875,3051800,4746800,54053600,9657000,998000,99977600,8772800,1096000,6582300,9564000
2001-01,22.110001,28.190001,37.0,54.990002,9.1125,30.53125,6.87625,61.900002,56.799999,42.075001,3889000,4553600,71894200,13130800,264000,81898800,7408000,2782200,10848100,17017600
2001-02,20.799999,27.809999,28.5625,46.66,9.825,29.5,4.87875,56.529999,50.09,40.525002,3025200,4803100,48601400,10510600,962800,84608400,23287200,850200,7748200,11591200
2001-03,22.190001,28.120001,26.3125,44.900002,10.4975,27.34375,5.04375,56.900002,50.5,40.5,3046800,3807000,47297100,8463000,1899600,91201600,4391200,925700,8308400,13186600
2001-04,25.1,29.48,30.91,47.98,9.825,33.875,5.22625,57.450001,51.740002,44.299999,5055000,5593100,41266300,7737700,956400,74368200,9381600,1041500,6597800,15066600


### Class 2 Problem set

In [3]:
# We only care about returns -
df1 = dftest['Close']
df1 = df1.pct_change()
df1.head()

Unnamed: 0_level_0,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-12,,,,,,,,,,
2001-01,-0.065363,0.202773,0.230769,0.210234,0.010745,0.407781,-0.014379,0.053617,0.069176,-0.032063
2001-02,-0.059249,-0.01348,-0.228041,-0.151482,0.078189,-0.033777,-0.290493,-0.086753,-0.118134,-0.036839
2001-03,0.066827,0.011147,-0.078775,-0.03772,0.068448,-0.073093,0.03382,0.006545,0.008185,-0.000617
2001-04,0.13114,0.048364,0.174727,0.068597,-0.064063,0.238857,0.036183,0.009666,0.024554,0.093827


In [None]:
#save_to_desktop(dfsave, 'data')

#### Process F-F risk free rates downloaded previously

In [5]:
fama = pd.read_csv(r"F-F_Research_Data_Factors.CSV", sep=';')
fama['Date'] = pd.to_datetime(fama['Date'].astype('string'), format='%Y%m')
fama.set_index('Date', inplace=True)
fama.index = fama.index.to_period('m')
# Values given in CSV are plain percentages - division by 100
fama = fama.divide(100)
fama.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07,0.0296,-0.0256,-0.0243,0.0022
1926-08,0.0264,-0.0117,0.0382,0.0025
1926-09,0.0036,-0.014,0.0013,0.0023
1926-10,-0.0324,-0.0009,0.007,0.0032
1926-11,0.0253,-0.001,-0.0051,0.0031


In [20]:
# Join risk free rates to stock returns in periods
dffull = pd.merge(df1,fama, how='left', left_index=True, right_index=True)
# drop first NULL row which is causing regression to fail
dffull = dffull.dropna()

In [50]:
# Calculate excess stock return for each company
for i in dffull.columns.tolist()[:10]:
    dffull[i+'_re'] = dffull[i]-dffull['RF']
# Calculate excess Market return
dffull['reM'] = dffull['Mkt-RF']-dffull['RF']
dffull.info()

<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 240 entries, 2001-01 to 2020-12
Freq: M
Data columns (total 25 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CAT      240 non-null    float64
 1   F        240 non-null    float64
 2   INTC     240 non-null    float64
 3   JPM      240 non-null    float64
 4   MKC      240 non-null    float64
 5   MSFT     240 non-null    float64
 6   NKE      240 non-null    float64
 7   UPS      240 non-null    float64
 8   WMT      240 non-null    float64
 9   XOM      240 non-null    float64
 10  Mkt-RF   240 non-null    float64
 11  SMB      240 non-null    float64
 12  HML      240 non-null    float64
 13  RF       240 non-null    float64
 14  CAT_re   240 non-null    float64
 15  F_re     240 non-null    float64
 16  INTC_re  240 non-null    float64
 17  JPM_re   240 non-null    float64
 18  MKC_re   240 non-null    float64
 19  MSFT_re  240 non-null    float64
 20  NKE_re   240 non-null    float64
 2

#### Regress Excess Stock returns (<STOCK_NAME>_re) on Excess Market returns (reM)
* intercept is the Alpha 
* coefficient for reM is the company's Beta

**TODO**: funkcje z tego jakąś elegancką, bo potem będziemy dzielić te 20 lat (nasz `dffull`) na 4 periody i w kadym puszczać regresję.

In [44]:
def get_significance(pvalue):
    '''
    Helper function to make p-value visual with use of stars
    '''
    if pvalue > 0.1: return ''
    elif pvalue > 0.05: return '*'
    elif pvalue> 0.01: return '**'
    else: return '***'


In [51]:
colsreg = [x for x in dffull.columns.tolist() if '_re' in x]
# Exog variable is constant - it's always Excess Market return
X = sm.add_constant(dffull['reM'])
# Store results in dict
reg_results = {}
for stock in colsreg:
    y = dffull[stock]
    reg_model = sm.OLS(y, X).fit()
    CAPM_params = {'AVG_excess_return' : y.mean(), 
        "alpha" : reg_model.params[0], # In Excel it's in %, but I'd rather keep it as fraction here for further calculations 
        "beta" : reg_model.params[1],
        "beta_pvalue" : get_significance(reg_model.pvalues[1]),
        "R2adj" : reg_model.rsquared_adj,
    }
    reg_results[stock[:-3]] = CAPM_params

results_df = pd.DataFrame.from_dict(reg_results, orient='index')
# Save it to .csv in our folder (you can copy from this output dataframe and paste to excel directly tough...)
# results_df.to_csv('Q1_results.csv', sep=';')
results_df


Unnamed: 0,AVG_excess_return,alpha,beta,beta_pvalue,R2adj
CAT,0.011259,0.004257,1.318228,***,0.461642
F,0.003513,-0.0054,1.678019,***,0.289176
INTC,0.005094,-0.001265,1.197149,***,0.353397
JPM,0.006734,-0.000202,1.305788,***,0.489124
MKC,0.009868,0.008069,0.338575,***,0.097466
MSFT,0.011207,0.006089,0.963456,***,0.34971
NKE,0.013655,0.009598,0.763648,***,0.277536
UPS,0.005042,0.000963,0.76804,***,0.327946
WMT,0.004354,0.00229,0.3886,***,0.114213
XOM,0.000399,-0.003497,0.733508,***,0.323427


#### Estimate SML
* regress Average Excess Stock returns on estimated market Betas for each stock
* we obtain lambda_0 (constant) and lambda_1 (price of risk)

In [56]:
SML_model = sm.OLS(results_df['AVG_excess_return'], sm.add_constant(results_df['beta'])).fit()
print(f'''
    lambda_0 : {SML_model.params[0]},
    lambda_1 : {SML_model.params[1]},
    R2_adj : {SML_model.rsquared_adj}
'''
)



    lambda_0 : 0.007795225145643754,
    lambda_1 : -0.0007221373275028279,
    R2_adj : -0.11886624814672841

