In [94]:
import pandas as pd
import numpy as np
import datetime as dt


import pandas_datareader.data as web
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm
import seaborn as sns

In [3]:
DATA_STORE = '../data/assets.h5'
START = 2000
END = 2018
idx = pd.IndexSlice
with pd.HDFStore(DATA_STORE) as store:
    prices = (store['quandl/wiki/prices']
              .loc[idx[str(START):str(END), :], 'adj_close']
              .unstack('ticker'))
    stocks = store['us_equities/stocks'].loc[:, ['marketcap', 'ipoyear', 'sector']]

In [4]:
def normalized_historical_returns(input_df,data_frequency="D",return_intervals=[1,4]):
    #input_df: Dataframe with daily/weekly/monthly stock price data
    #return_invervals should be in specified in the increment the input_df is
    #returns are normalized to the data_frequence (i.e. daily data is normalized to daily returns, monthly to per month, etc)

    df = input_df.copy()
    columns = df.columns
    returns = pd.DataFrame()

    for i in return_intervals:
        for col in columns:
            returns[f'{col}_{i}{data_frequency}_return'] = df[col].pct_change(i).add(1).pow(1/i).sub(1)

    return returns

In [None]:
#For this function to work, x_df and y_df need to have datetime indexes with the same dates to join on

def rolling_OLS(x_df,y_df,rolling_window):

    if isinstance(x_df.index,pd.DatetimeIndex):
        pass
    else:
        x_df.index = x_df.index.to_timestamp()

    if isinstance(y_df.index,pd.DatetimeIndex):
        pass
    else:
        y_df.index = y_df.index.to_timestamp()
   
    
    y_df.index.name = 'date'
    x_df.index.name = 'date'

    y_name = y_df.columns[0]
    regression_df = x_df.join(y_df).sort_index()


    regression = RollingOLS(endog=regression_df[y_name],
                            exog=sm.add_constant(regression_df.drop(y_name,axis=1)),
                            window=min(rolling_window,len(regression_df)-1)
                            ).fit(params_only=True)
    
    params = regression.params.drop('const',axis=1)
    
    return params

In [202]:
#How to pull Fama French Data
factor_data = web.DataReader('F-F_Research_Data_5_Factors_2x3', 'famafrench', start='2000')[0].drop('RF', axis=1)
factor_data.index = factor_data.index.to_timestamp()
factor_data = factor_data.div(100).resample('M').mean()