In [1]:
import yfinance as yf
import pandas as pd
import datetime as dt
import os

def pull_stock_data(tickers:str, start:str, end:str, interval:str, stripdateindex:str, column:str) -> pd.DataFrame:
    """
    Returns a dataframe with chosen stock information for last day of the month.

    Parameters:
    -------------
    tickers (str): Yahoo finance tickers for companies divided ONLY by single space
    start (str): start date of the period (format yyyy-mm-dd)
    end (str): end date of the period (format yyyy-mm-dd)
    interval (str): wanted interval (1d, 1m, 1y)
    stripdateindex (str): argument for stripping datetime index down (d - day, m - month, y- year)
    columns (list): list of wanted values, args same as in yahoo finance
    """
    # data download
    df = yf.download(tickers = tickers, start = start, end = end, interval = interval, groupby = 'ticker')
    # changing index from datetime to just year and month
    df['Date'] = df.index
    dfg = df.groupby([df.index.year, df.index.month], as_index=False).last()
    dfg.reset_index(inplace=True, drop=True)
    dfg.set_index('Date', inplace=True)
    dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)
    # dropping na rows
    dfg.dropna(inplace = True)
    return dfg[column]
    
def save_to_desktop(dataframe:pd.DataFrame, file_name:str):
    """
    Saves dataframe to desktop in csv format under filename provided

    Parameters:
    ------------
    dataframe (pd.DataFrame): dataframe that is to be saved
    file_name (str): name of the file
    """
    # defining path to desktop on running unit
    desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    # saving file under chosen name on desktop
    dataframe.to_csv(desktop+'/'+file_name+'.csv', sep=';', encoding='UTF-8')



In [2]:
tick = "NKE MSFT XOM INTC CAT WMT JPM F UPS MKC"
st = '2000-12-01'
en = '2020-12-31'
intv = '1d'
strpdt = 'm'
col = ['Close', 'Volume']

dftest = pull_stock_data(tick, st, en, intv, strpdt, col)
dftest.head(20)

[*********************100%***********************]  10 of 10 completed


  dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)


Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2000-12,23.65625,23.4375,30.0625,45.4375,9.015625,21.6875,6.976563,58.75,53.125,43.46875,3051800,4746800,54053600,9657000,998000,99977600,8772800,1096000,6582300,9564000
2001-01,22.110001,28.190001,37.0,54.990002,9.1125,30.53125,6.87625,61.900002,56.799999,42.075001,3889000,4553600,71894200,13130800,264000,81898800,7408000,2782200,10848100,17017600
2001-02,20.799999,27.809999,28.5625,46.66,9.825,29.5,4.87875,56.529999,50.09,40.525002,3025200,4803100,48601400,10510600,962800,84608400,23287200,850200,7748200,11591200
2001-03,22.190001,28.120001,26.3125,44.900002,10.4975,27.34375,5.04375,56.900002,50.5,40.5,3046800,3807000,47297100,8463000,1899600,91201600,4391200,925700,8308400,13186600
2001-04,25.1,29.48,30.91,47.98,9.825,33.875,5.22625,57.450001,51.740002,44.299999,5055000,5593100,41266300,7737700,956400,74368200,9381600,1041500,6597800,15066600
2001-05,27.08,24.35,27.01,49.150002,10.1,34.59,5.1375,59.450001,51.75,44.375,2933600,5977700,38761100,5875300,413200,70682600,11191200,1716100,5880700,11855800
2001-06,25.025,24.549999,29.25,44.470001,10.505,36.5,5.24875,57.799999,48.799999,43.674999,8198400,6331100,49436600,6180700,584800,94283800,23972800,2619300,9306500,15372000
2001-07,27.549999,25.469999,29.809999,43.299999,10.6725,33.095001,5.94375,56.830002,55.900002,41.759998,5094800,3841800,50354000,8469900,462400,59031600,11809600,660600,7725600,11069300
2001-08,25.0,19.870001,27.959999,39.400002,11.3,28.525,6.25,55.23,48.049999,40.150002,3360200,4713100,38127600,4918600,281600,57900800,8666400,1157100,7555100,7608200
2001-09,22.4,17.35,20.440001,34.150002,11.45,25.584999,5.85125,51.98,49.5,39.400002,4313000,6198300,72640600,10302300,1039600,116641200,10322400,2781500,9560200,14064200


In [3]:
df1 = dftest['Close']
df1.head()

Unnamed: 0_level_0,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-12,23.65625,23.4375,30.0625,45.4375,9.015625,21.6875,6.976563,58.75,53.125,43.46875
2001-01,22.110001,28.190001,37.0,54.990002,9.1125,30.53125,6.87625,61.900002,56.799999,42.075001
2001-02,20.799999,27.809999,28.5625,46.66,9.825,29.5,4.87875,56.529999,50.09,40.525002
2001-03,22.190001,28.120001,26.3125,44.900002,10.4975,27.34375,5.04375,56.900002,50.5,40.5
2001-04,25.1,29.48,30.91,47.98,9.825,33.875,5.22625,57.450001,51.740002,44.299999


In [4]:
df1 = df1.pct_change()

In [5]:
save_to_desktop(dftest, 'data')

In [6]:
fama = pd.read_csv(r"F-F_Research_Data_Factors.CSV", sep=';')
fama['Date'] = fama['Date'].astype('string')

fama['Date']= pd.to_datetime(fama['Date'], format='%Y%m')
fama.set_index('Date', inplace=True)
fama.index = fama.index.to_period('m')
fama = fama.divide(100)
fama.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07,0.0296,-0.0256,-0.0243,0.0022
1926-08,0.0264,-0.0117,0.0382,0.0025
1926-09,0.0036,-0.014,0.0013,0.0023
1926-10,-0.0324,-0.0009,0.007,0.0032
1926-11,0.0253,-0.001,-0.0051,0.0031


In [7]:

#fama.head()

In [9]:
dffull = pd.merge(df1,fama, how='left', left_index=True, right_index=True)
dffull.head()

Unnamed: 0_level_0,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-12,,,,,,,,,,,0.0119,0.0072,0.0761,0.005
2001-01,-0.065363,0.202773,0.230769,0.210234,0.010745,0.407781,-0.014379,0.053617,0.069176,-0.032063,0.0313,0.0669,-0.0509,0.0054
2001-02,-0.059249,-0.01348,-0.228041,-0.151482,0.078189,-0.033777,-0.290493,-0.086753,-0.118134,-0.036839,-0.1005,-0.0079,0.1248,0.0038
2001-03,0.066827,0.011147,-0.078775,-0.03772,0.068448,-0.073093,0.03382,0.006545,0.008185,-0.000617,-0.0726,0.0024,0.0642,0.0042
2001-04,0.13114,0.048364,0.174727,0.068597,-0.064063,0.238857,0.036183,0.009666,0.024554,0.093827,0.0794,0.0055,-0.0468,0.0039


In [10]:

cols = dffull.columns.tolist()
# Calculate excess stock return for each company
for i in cols[:10]:
    dffull[i+'_re'] = dffull[i]-dffull['RF']
    # Calculate excess Market return
dffull['reM'] = dffull['Mkt-RF']-dffull['RF']
dffull.info()

<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 241 entries, 2000-12 to 2020-12
Freq: M
Data columns (total 25 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CAT      240 non-null    float64
 1   F        240 non-null    float64
 2   INTC     240 non-null    float64
 3   JPM      240 non-null    float64
 4   MKC      240 non-null    float64
 5   MSFT     240 non-null    float64
 6   NKE      240 non-null    float64
 7   UPS      240 non-null    float64
 8   WMT      240 non-null    float64
 9   XOM      240 non-null    float64
 10  Mkt-RF   241 non-null    float64
 11  SMB      241 non-null    float64
 12  HML      241 non-null    float64
 13  RF       241 non-null    float64
 14  CAT_re   240 non-null    float64
 15  F_re     240 non-null    float64
 16  INTC_re  240 non-null    float64
 17  JPM_re   240 non-null    float64
 18  MKC_re   240 non-null    float64
 19  MSFT_re  240 non-null    float64
 20  NKE_re   240 non-null    float64
 2

In [11]:
import statsmodels.api as sm

In [12]:
cols1 = dffull.columns.tolist()

colsreg = [x for x in cols1 if '_re' in x]

In [56]:
X = sm.add_constant(dffull['reM'])
for col in colsreg[:2]:
    y = dffull[col]
    # fajna regresja y na X tutaj
    


         const    CAT_re
Date                    
2000-12    1.0       NaN
2001-01    1.0 -0.070763
2001-02    1.0 -0.055887
2001-03    1.0  0.062626
2001-04    1.0  0.127241
         const      F_re
Date                    
2000-12    1.0       NaN
2001-01    1.0  0.197373
2001-02    1.0 -0.006619
2001-03    1.0  0.006947
2001-04    1.0  0.044465
