### Import Packages and Set Directories

In [2]:
import os
import sys
import json
import inspect
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Patch
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from scipy.interpolate import make_interp_spline


# Set directories
notebook_dir = os.getcwd()
base_dir = os.path.join(notebook_dir, '..')
data_dir = os.path.join(base_dir, 'data')
stock_dir = os.path.join(base_dir, 'data', 'stocks')
src_dir = os.path.join(base_dir, 'src')
graph_dir = os.path.join(base_dir, 'results', 'graphs')
sys.path.append(src_dir)

# Import optimal portfolio
from optimal_portfolios import OptimalPortfolios
warnings.simplefilter(action='ignore', category=FutureWarning)

### Clean Factor Data

In [4]:
# # Read Fama-French 5 factors and momentum factors
# ff6_weekly_df = pd.read_csv(os.path.join(data_dir, 'factors/ff6_weekly.csv'))
# ff6_monthly_df = pd.read_csv(os.path.join(data_dir, 'factors/ff6_monthly.csv'))
# ff6_yearly_df = pd.read_csv(os.path.join(data_dir, 'factors/ff6_yearly.csv'))

# # Read HXZ 4 factors
# hxz_weekly_df = pd.read_csv(os.path.join(data_dir, 'factors/hxz_weekly.csv'))
# hxz_monthly_df = pd.read_csv(os.path.join(data_dir, 'factors/hxz_monthly.csv'))
# hxz_yearly_df = pd.read_csv(os.path.join(data_dir, 'factors/hxz_yearly.csv'))

# # Combine the weekly factors dataframes
# factors_weekly_df = pd.merge(ff6_weekly_df, hxz_weekly_df, on='Date', how='inner')
# factors_weekly_df.drop(columns=['RF_HXZ', 'Mkt-RF_HXZ'], inplace=True)
# factors_weekly_df.to_csv(os.path.join(data_dir, 'factors/factors_weekly.csv'), index=False)

# # Combine the monthly factors dataframes
# factors_monthly_df = pd.merge(ff6_monthly_df, hxz_monthly_df, on='Year_Month', how='inner')
# factors_monthly_df['Year_Month'] = pd.to_datetime(factors_monthly_df['Year_Month']).dt.to_period('M')
# factors_monthly_df.drop(columns=['RF_HXZ', 'Mkt-RF_HXZ'], inplace=True)
# factors_monthly_df.to_csv(os.path.join(data_dir, 'factors/factors_monthly.csv'), index=False)

# # Combine the yearly factors dataframes
# factors_yearly_df = pd.merge(ff6_yearly_df, hxz_yearly_df, on='Year', how='inner')
# factors_yearly_df.drop(columns=['RF_HXZ', 'Mkt-RF_HXZ'], inplace=True)
# factors_yearly_df.to_csv(os.path.join(data_dir, 'factors/factors_yearly.csv'), index=False)

### Read in data

In [14]:
# Read efficient portfolios data
pfo_weekly_df = pd.read_csv(os.path.join(data_dir, 'stocks/pfo_weekly.csv'))
pfo_monthly_df = pd.read_csv(os.path.join(data_dir, 'stocks/pfo_monthly.csv'))
pfo_yearly_df = pd.read_csv(os.path.join(data_dir, 'stocks/pfo_yearly.csv'))

# Read factor data
factors_weekly_df = pd.read_csv(os.path.join(data_dir, 'factors/factors_weekly.csv'))
factors_monthly_df = pd.read_csv(os.path.join(data_dir, 'factors/factors_monthly.csv'))

### Clean Data

In [21]:
def create_week_column(df_orginal):
    """
    Create a Year_Week column using df's Date column and drop 'Date column'
    """
    df_tmp = df_orginal.copy()
    df_tmp['Date'] = pd.to_datetime(df_tmp['Date'])
    df_tmp['Week'] = df_tmp['Date'].dt.isocalendar().week  
    df_tmp['Year'] = df_tmp['Date'].dt.isocalendar().year
    df_tmp['Year_Week'] = df_tmp['Year'].astype(str) + '-' + df_tmp['Week'].astype(str).str.zfill(2)
    df_tmp.drop(columns=['Year', 'Week', 'Date'], inplace=True)
    return df_tmp

# Create Year_Week column for weekly factor
num_pfo = 5
factors_weekly_tmp = create_week_column(factors_weekly_df)
    
# Merge stock df with factor df
covariates_weekly = pd.merge(pfo_weekly_df, factors_weekly_tmp, how='inner', on='Year_Week')
covariates_monthly = pd.merge(pfo_monthly_df, factors_monthly_df, how='inner', on='Year_Month')

# Calculate excess return
pfo_col_names = []  # create tmp list to re-order columns
for pfo in range(1, num_pfo+1):
    covariates_weekly[f'R{pfo}-RF'] = covariates_weekly[f'portfolio_{pfo}'] - covariates_weekly['RF']
    covariates_monthly[f'R{pfo}-RF'] = covariates_monthly[f'portfolio_{pfo}'] - covariates_monthly['RF']
    covariates_weekly.drop(columns=[f'portfolio_{pfo}'], inplace=True)   
    pfo_col_names.append(f'R{pfo}-RF')  # append the pfo name to list  

# Reorder columns 
cols_remaining = covariates_weekly.columns.difference(['Year_Week'] + pfo_col_names).tolist()
covariates_weekly = covariates_weekly[['Year_Week'] +  pfo_col_names + cols_remaining]
covariates_monthly = covariates_monthly[['Year_Month'] + pfo_col_names + cols_remaining]
covariates_weekly.drop(columns=['RF'], inplace=True)  # drop RF column
covariates_monthly.drop(columns=['RF'], inplace=True)  # drop RF column

# Save to csv
# covariates_weekly.to_csv(os.path.join(data_dir, 'covariates_weekly.csv'), index=False)
# covariates_monthly.to_csv(os.path.join(data_dir, 'covariates_monthly.csv'), index=False)