In [16]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from finance_byu.regtables import Regtable
import warnings

In [None]:
# Load data into pandas dataframe
df = pd.read_parquet('Data/BoQ_Data.parquet')
df.head()

In [None]:
reduced_variables = pd.read_excel('Data/Reduced Variables.xlsx',sheet_name="Sheet2", header=None)[0].to_list()
reduced_variables

In [43]:
dataframe = df[reduced_variables + ['RET']]
dataframe = dataframe.dropna().reset_index(drop=True)
y = dataframe['RET']
del dataframe['RET']
reg_list = []

X = dataframe
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
reg_list.append(model)

In [None]:
tbl = Regtable(reg_list,stat='tstat',sig='coeff')
results = tbl.render()
results

In [None]:
# For calculating momentum portfolios - ignore for now
warnings.simplefilter('ignore')
variables_of_interest = ['PERMNO', 'DATE', 'RET', 'beta', 'idio_vol', 'BEME']
subset = df[variables_of_interest]

# Calculate monthly returns
subset['MONTHYEAR'] = pd.DatetimeIndex(subset['DATE']).month.astype(str) + '-' + pd.DatetimeIndex(subset['DATE']).year.astype(str)
subset['monthly_ret'] = subset.groupby('MONTHYEAR')['RET'].mean()

monthly_data = subset.groupby(['MONTHYEAR', 'PERMNO'])[['RET', 'beta', 'idio_vol', 'BEME']].mean()
monthly_data['DATE'] = subset.groupby(['MONTHYEAR', 'PERMNO'])['DATE'].min()

monthly_data = monthly_data.reset_index()

# Calculate momentum
monthly_data['logret'] = np.log(1 + monthly_data['RET'])
monthly_data['mom'] = monthly_data.groupby('PERMNO')['logret'].rolling(11,11).sum().reset_index(drop=True) # First we create the momentum
monthly_data['mom'] = monthly_data.groupby('PERMNO')['mom'].shift(2)

monthly_data = monthly_data.dropna().reset_index(drop=True)
monthly_data

In [None]:
from itertools import combinations

def RobustnessCheck(dataframe: pd.DataFrame):

    reg_list = []
    vars_list = ['beta', 'idio_vol', 'BEME'] # If the data is monthly, we can use 'mom'

    dataframe = dataframe[vars_list + ['RET']].dropna().reset_index(drop=True)

    # Robustness check. The coefficients are pretty steady
    for i in range(1, len(vars_list)+1):
        for combo in combinations(vars_list, i):
            X = dataframe[list(combo)]
            X = sm.add_constant(X)
            y = dataframe['RET']
            model = sm.OLS(y, X).fit()
            reg_list.append(model)
    tbl = Regtable(reg_list,stat='tstat',sig='coeff')
    return tbl

tbl = RobustnessCheck(df)

tbl.render()

In [None]:
tbl2 = RobustnessCheck(monthly_data)
tbl2.render()

In [None]:
# Potential Sharpe Ratio function. Haven't debugged
def SharpeRatio(returns: pd.Series):
    # Note: This isn't exactly the Sharpe Ratio because we don't have the risk-free rate
    return np.mean(returns) / np.std(returns)

SharpeRatio(y)