In [34]:
import pandas as pd
import numpy as np

import pickle
import datetime

import yfinance as yf
import pandas_datareader as pdr

from sklearn.decomposition import PCA
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm

In [5]:
def get_data_from_ken_french(url):
    return pd.read_csv(url, compression='zip', skiprows=6, parse_dates=[0], index_col=[0])

def get_data_from_aqr(url, factor_name):
    df = pd.read_excel(url, sheet_name=f'{factor_name} Factors', skiprows=18,
                       usecols=['DATE', 'Global'], parse_dates=[0], index_col=[0])
    return df.rename(columns={'Global': factor_name})

In [7]:
def get_factor_data():
    five_factors_url = 'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Developed_5_Factors_Daily_CSV.zip'
    momentum_url = 'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Developed_Mom_Factor_Daily_CSV.zip'

    five_factors_df = get_data_from_ken_french(five_factors_url)
    momentum_df = get_data_from_ken_french(momentum_url)

    quality_url = 'https://images.aqr.com/-/media/AQR/Documents/Insights/Data-Sets/Quality-Minus-Junk-Factors-Daily.xlsx'
    bab_url = 'https://images.aqr.com/-/media/AQR/Documents/Insights/Data-Sets/Betting-Against-Beta-Equity-Factors-Daily.xlsx'

    quality_df = get_data_from_aqr(quality_url, 'QMJ')
    bab_df = get_data_from_aqr(bab_url, 'BAB')

    #FAMA-FRENCH
    ff_df = five_factors_df.join(momentum_df, how='inner')
    ff_df = ff_df / 100

    aqr_df = quality_df.join(bab_df, how='inner')

    df = ff_df.join(aqr_df, how='inner')

    df.index.name = 'Dates'
    return df

factor_data = get_factor_data()

In [8]:
def get_macro_data():
    start = datetime.datetime(1980, 5, 1)
    end = datetime.datetime(2023, 6, 1)
    # read macro data from fred
    df = pdr.DataReader(['DFII10', 'T10YIE', 'DGS10'],
                        'fred', start=start, end=end)
    
    return df
    
macro_data = get_macro_data()

def get_stock_returns(tickers):
    df = yf.download(tickers=tickers, period='max')['Adj Close'].pct_change()
    return df

ticker_list = 'tsla msft aapl ttek blk c ko gm'
stock_return = get_stock_returns(ticker_list)

def get_stock_metadata(tickers):
    temp = yf.Tickers(tickers).tickers
    out = {}
    for ticker in temp.values():
        out[ticker.ticker] = ticker.info
    
    # save files as pickle
    filename = 'stock_metadata'
    #outfile = open(filename, 'wb')
    #pickle.dump(out, outfile)
    #outfile.close()
    
    return out

stock_metadata = get_stock_metadata(ticker_list)

In [32]:
#Data preparation, model setup
def get_stock_return(ticker,stock_return):
    return stock_return[[ticker]].dropna()

def get_stock_long_name(ticker, stock_metadata):
    return stock_metadata[ticker]['longName']

def prep_data_for_regression(ticker, stock, factor_data):
    df = factor_data
    returns = get_stock_return(ticker,stock_return)
    df = df.join(returns, how='inner')

    X = df.drop(['RF', ticker], axis=1)
    X = sm.add_constant(X)
    Y = df[ticker] - df['RF']
    return Y, X

def get_whole_sample_factor_loadings(ticker):
    returns = get_stock_return(ticker,stock_return)
    Y, X = prep_data_for_regression(ticker, returns, factor_data)

    model = sm.OLS(Y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 1})
    
    factor_loadings = pd.DataFrame(model.params).reset_index()
    factor_loadings.columns = ['index', 'params']
    factor_loadings['ticker'] = ticker
    factor_loadings['min_year'] = min(X.index)
    factor_loadings['max_year'] = max(X.index)
    
    return factor_loadings

def get_rolling_factor_loadings(ticker, rolling_window):
    returns = get_stock_return(ticker,stock_return)
    Y, X = prep_data_for_regression(ticker, returns, factor_data)

    rollingmodel = RollingOLS(Y, X, window=rolling_window).fit(cov_type='HAC', cov_kwds={'maxlags': 1})
                       
    rolling_factor_loadings = rollingmodel.params.reset_index().dropna()
    rolling_factor_loadings = pd.melt(rolling_factor_loadings, id_vars=['index'])
    rolling_factor_loadings['ticker'] = ticker
    rolling_factor_loadings['window_size'] = rolling_window
    
    return rolling_factor_loadings

In [37]:
# run regression, PCA
def run_whole_sample_regressions(ticker_list):
    tickers = ticker_list.upper().split()
    out_df = pd.DataFrame(columns=['a', 'b', 'c', 'd', 'e'])

    for ticker in tickers:
        data = get_whole_sample_factor_loadings(ticker)
        if out_df.empty:
            out_df.columns = data.columns
        out_df = pd.concat([out_df, data])
    out_df.to_csv('whole_sample_regressions_output.csv')

def run_rolling_regressions(ticker_list, rolling_window_list):
    ticker_list = ticker_list.upper().split()
    out_df = pd.DataFrame(columns=['a', 'b', 'c', 'd', 'e'])

    for ticker in ticker_list:
        for rolling_window in rolling_window_list:
            df = get_rolling_factor_loadings(ticker, rolling_window)
            if out_df.empty:
                out_df.columns = df.columns
            out_df = pd.concat([out_df, df])
    out_df.to_csv('rolling_regressions_output.csv')

def run_rolling_PCA(n_components, rolling_window):
    max_ = factor_data.shape[0] - rolling_window
    pca = PCA(n_components=n_components)
    array = np.empty([0, n_components])

    for i in range(max_):
        rolling_df = factor_data.iloc[i:(i+rolling_window), :]
        pca.fit(rolling_df)
        array = np.concatenate(
            (array, pca.explained_variance_ratio_.reshape(1, n_components)))

    var_explained_df = pd.DataFrame(array, columns=[f'PCA{i}' for i in range(
        1, n_components+1)], index=factor_data.index[rolling_window:])

    var_explained_df['other'] = 1 - var_explained_df.sum(axis=1)
    var_explained_df.index.name = 'Dates'
    var_explained_df = pd.melt(var_explained_df.reset_index(), id_vars=['Dates'])
    var_explained_df.to_csv('rolling_pca_var_explained.csv')

run_whole_sample_regressions(ticker_list)
#rolling_window_list = [60, 120, 250]
#run_rolling_regressions(ticker_list, rolling_window_list)
#run_rolling_PCA(5, 250)