In [6]:
import yfinance as yf
import pandas as pd
import datetime as dt
import os

def pull_stock_data(tickers:str, start:str, end:str, interval:str, stripdateindex:str, column:str) -> pd.DataFrame:
    """
    Returns a dataframe with chosen stock information for last day of the month.

    Parameters:
    -------------
    tickers (str): Yahoo finance tickers for companies divided ONLY by single space
    start (str): start date of the period (format yyyy-mm-dd)
    end (str): end date of the period (format yyyy-mm-dd)
    interval (str): wanted interval (1d, 1m, 1y)
    stripdateindex (str): argument for stripping datetime index down (d - day, m - month, y- year)
    columns (list): list of wanted values, args same as in yahoo finance
    """
    # data download
    df = yf.download(tickers = tickers, start = start, end = end, interval = interval, groupby = 'ticker')
    # changing index from datetime to just year and month
    df['Date'] = df.index
    dfg = df.groupby([df.index.year, df.index.month], as_index=False).last()
    dfg.reset_index(inplace=True, drop=True)
    dfg.set_index('Date', inplace=True)
    dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)
    # dropping na rows
    dfg.dropna(inplace = True)
    return dfg[column]
    
def save_to_desktop(dataframe:pd.DataFrame, file_name:str):
    """
    Saves dataframe to desktop in csv format under filename provided

    Parameters:
    ------------
    dataframe (pd.DataFrame): dataframe that is to be saved
    file_name (str): name of the file
    """
    # defining path to desktop on running unit
    desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    # saving file under chosen name on desktop
    dataframe.to_csv(desktop+'/'+file_name+'.csv', sep=';', encoding='UTF-8')



In [7]:
tick = "NKE MSFT XOM INTC CAT WMT JPM F UPS MKC"
st = '2000-12-01'
en = '2020-12-31'
intv = '1d'
strpdt = 'm'
col = ['Close', 'Volume']

dftest = pull_stock_data(tick, st, en, intv, strpdt, col)
dftest.head(20)

[*********************100%***********************]  11 of 11 completed


  df.index = pd.to_datetime(df.index).to_period(stripdateindex)


Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,...,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,^IRX
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2000-12,13.426954,12.51059,17.972366,23.754143,5.809334,13.602913,5.512078,33.709515,35.416012,21.869669,...,116791300.0,1120692000.0,200159000.0,9706000.0,2056668000.0,242557600.0,31549600.0,172787800.0,246466000.0,0.0
2001-01,12.549323,15.047402,22.119844,28.748064,5.904675,19.149916,5.440065,35.516918,37.913372,21.168455,...,126191800.0,1211435000.0,247102800.0,11913600.0,2005531000.0,307727200.0,32679300.0,179925700.0,311749600.0,0.0
2001-02,11.895669,15.004977,17.075615,24.57196,6.366357,18.503101,3.85977,32.435726,33.434525,20.388624,...,90557300.0,909408600.0,154547500.0,11599600.0,1536896000.0,310156800.0,18528800.0,136950200.0,202850800.0,0.0
2001-03,12.690614,15.172235,15.739306,23.645111,6.802119,17.150644,3.990307,32.753685,33.708187,20.482918,...,131030200.0,1421495000.0,204257600.0,18193600.0,1895350000.0,227849600.0,23005000.0,169447700.0,277832200.0,0.0
2001-04,14.35487,15.906034,18.489395,25.267097,6.397518,21.247196,4.141493,33.070267,34.586437,22.404755,...,113711700.0,1232602000.0,167154700.0,12639200.0,2075807000.0,198206400.0,24133300.0,149743600.0,265479800.0,0.0
2001-05,15.59669,13.272593,16.15654,26.096527,6.576585,21.695667,4.071163,34.221554,34.59314,22.442692,...,145986800.0,966714700.0,149322200.0,17928400.0,1777306000.0,202687200.0,27980500.0,146569900.0,256474800.0,0.0
2001-06,14.413116,13.381607,17.507395,23.61165,6.840298,22.893667,4.159323,33.378872,32.621155,22.197529,...,118509600.0,1008249000.0,157719500.0,12656400.0,1449178000.0,221994400.0,21233100.0,122370700.0,249846400.0,0.0
2001-07,15.86739,13.883074,17.842579,22.990427,6.982382,20.757959,4.716828,32.818714,37.420879,21.229017,...,84425600.0,965577600.0,131864500.0,12342400.0,1455201000.0,198777600.0,15583900.0,154027100.0,225712600.0,0.0
2001-08,14.493451,10.830654,16.73527,21.076153,7.392922,17.891554,4.959862,31.894722,32.16589,20.410551,...,131204300.0,951559900.0,135245300.0,11084800.0,1153896000.0,186447200.0,15824400.0,151455300.0,190175200.0,0.0
2001-09,12.986133,9.569782,12.241856,18.26779,7.491058,16.047514,4.643422,30.120956,33.136551,20.1416,...,129333500.0,1066765000.0,175067700.0,15087200.0,1510824000.0,242140800.0,32051500.0,174327700.0,194900700.0,0.0


In [38]:
df1 = dftest['Close']
df1.head()

Unnamed: 0_level_0,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,^IRX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-12,13.426954,12.51059,17.972366,23.754143,5.809334,13.602913,5.512078,33.709515,35.416012,21.869669,5.73
2001-01,12.549323,15.047402,22.119844,28.748064,5.904675,19.149916,5.440065,35.516918,37.913372,21.168455,4.84
2001-02,11.895669,15.004977,17.075615,24.57196,6.366357,18.503101,3.85977,32.435726,33.434525,20.388624,4.72
2001-03,12.690614,15.172235,15.739306,23.645111,6.802119,17.150644,3.990307,32.753685,33.708187,20.482918,4.18
2001-04,14.35487,15.906034,18.489395,25.267097,6.397518,21.247196,4.141493,33.070267,34.586437,22.404755,3.83


In [39]:
df1 = df1.pct_change()

In [10]:
save_to_desktop(dftest, 'data')

In [42]:
fama = pd.read_csv(r"F-F_Research_Data_Factors.CSV", sep=';')
fama['Date'] = fama['Date'].astype('string')

fama['Date']= pd.to_datetime(fama['Date'], format='%Y%m')
fama.set_index('Date', inplace=True)
fama.index = fama.index.to_period('m')
fama = fama.divide(100)
fama.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07,0.0296,-0.0256,-0.0243,0.0022
1926-08,0.0264,-0.0117,0.0382,0.0025
1926-09,0.0036,-0.014,0.0013,0.0023
1926-10,-0.0324,-0.0009,0.007,0.0032
1926-11,0.0253,-0.001,-0.0051,0.0031


In [34]:

#fama.head()

In [47]:
dffull = pd.merge(df1,fama, how='left', left_index=True, right_index=True)
del dffull['^IRX']
dffull.head()

Unnamed: 0_level_0,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-12,,,,,,,,,,,0.0119,0.0072,0.0761,0.005
2001-01,-0.065363,0.202773,0.23077,0.210234,0.016412,0.407781,-0.013064,0.053617,0.070515,-0.032063,0.0313,0.0669,-0.0509,0.0054
2001-02,-0.052087,-0.002819,-0.228041,-0.145266,0.078189,-0.033776,-0.290492,-0.086753,-0.118134,-0.036839,-0.1005,-0.0079,0.1248,0.0038
2001-03,0.066826,0.011147,-0.078258,-0.03772,0.068448,-0.073094,0.03382,0.009803,0.008185,0.004625,-0.0726,0.0024,0.0642,0.0042
2001-04,0.131141,0.048365,0.174727,0.068597,-0.059482,0.238857,0.037888,0.009666,0.026055,0.093826,0.0794,0.0055,-0.0468,0.0039


In [50]:

cols = dffull.columns.tolist()
# Calculate excess stock return for each company
for i in cols[:10]:
    dffull[i+'_re'] = dffull[i]-dffull['RF']
    # Calculate excess Market return
dffull['reM'] = dffull['Mkt-RF']-dffull['RF']
dffull.info()

<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 241 entries, 2000-12 to 2020-12
Freq: M
Data columns (total 25 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CAT      240 non-null    float64
 1   F        240 non-null    float64
 2   INTC     240 non-null    float64
 3   JPM      240 non-null    float64
 4   MKC      240 non-null    float64
 5   MSFT     240 non-null    float64
 6   NKE      240 non-null    float64
 7   UPS      240 non-null    float64
 8   WMT      240 non-null    float64
 9   XOM      240 non-null    float64
 10  Mkt-RF   241 non-null    float64
 11  SMB      241 non-null    float64
 12  HML      241 non-null    float64
 13  RF       241 non-null    float64
 14  CAT_re   240 non-null    float64
 15  F_re     240 non-null    float64
 16  INTC_re  240 non-null    float64
 17  JPM_re   240 non-null    float64
 18  MKC_re   240 non-null    float64
 19  MSFT_re  240 non-null    float64
 20  NKE_re   240 non-null    float64
 2

In [52]:
import statsmodels.api as sm

In [51]:
cols1 = dffull.columns.tolist()

colsreg = [x for x in cols1 if '_re' in x]

In [56]:
X = sm.add_constant(dffull['reM'])
for col in colsreg[:2]:
    y = dffull[col]
    # fajna regresja y na X tutaj
    


         const    CAT_re
Date                    
2000-12    1.0       NaN
2001-01    1.0 -0.070763
2001-02    1.0 -0.055887
2001-03    1.0  0.062626
2001-04    1.0  0.127241
         const      F_re
Date                    
2000-12    1.0       NaN
2001-01    1.0  0.197373
2001-02    1.0 -0.006619
2001-03    1.0  0.006947
2001-04    1.0  0.044465
