author: Łukasz Szymczyk (szymczyklp@gmail.com) <br>
date: 2021-09-02 <br>
version: 0.2

# Packages

In [1]:
import pandas as pd
import numpy as np
import re
import warnings
from IPython.display import clear_output

from dateutil.relativedelta import relativedelta

import statsmodels.api as sm
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.arima_model import ARMA
from statsmodels.formula.api import ols

# DataIn

In [2]:
path = r'D:/MSI_BACKUP/SCIENCE/PhD/ART/4_liq_risk/data/'
file = r'US high yield bond MFs 2000-2021.07_only surviving.xlsx' #(1)
#file = r'US high yield bond MFs 2000-2021.07_non surviving included2.xlsx' #(2)
data_excel = pd.ExcelFile(path + 'in/' + file)
#print(data_excel.sheet_names)
returns_funds = data_excel.parse('US HY bond MF only surv') #(1)
#returns_funds = data_excel.parse('US HY bond MF oldest share') #(2)

In [3]:
spx = pd.read_csv(path + 'in/' + '^spx_m.csv', sep = ',', decimal = '.')

# RegEx

In [4]:
pattern_period = re.compile(r'^(Monthly Return) ([0-9]{4}-[0-9]{2}) (Base Currency)$')

# Functions

In [5]:
def period_extraction(kol_in,pattern):
    '''
    Extractin period from data description
    '''
    try:
        re.search(pattern,kol_in).group(0)
        kol_out = re.search(pattern,kol_in).group(2)
        kol_out = pd.to_datetime(kol_out)
        kol_out = kol_out + relativedelta(months=1, days=-1) 
        return kol_out
    except AttributeError:
        return None


# Hyperparameters

In [6]:
window = 36
date_start = pd.to_datetime('2000-01-31')

# DataPrep

In [7]:
#print(list(data_df.columns.values))
my_column = ['Ticker'] + [s for s in list(returns_funds.columns.values) if 'Monthly Return' in s]
returns_funds = returns_funds.loc[:,my_column]
returns_funds = returns_funds.set_index('Ticker')
returns_funds = returns_funds.transpose()
returns_funds = returns_funds.reset_index()
returns_funds = returns_funds.rename(columns={'index': 'Period'})
returns_funds['Period'] = np.vectorize(period_extraction)(returns_funds['Period'],pattern_period)
returns_funds = returns_funds.set_index('Period')
#returns_funds.iloc[:,92].plot()
returns_funds.tail(5)

Ticker,TNHIX,AGDAX,HIYYX,BJBHX,FYAIX,CHIIX,SHOIX,NPHIX,ABHIX,AHGNX,...,EKHAX,_NT03,SSTHX,SHYOX,WAHYX,SHIAX,LMECX,WHGHX,WPINX,ZEOIX
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-31,0.33751,-0.36801,0.20891,0.24433,0.36823,0.78071,1.02358,0.53048,0.16166,0.58865,...,-0.04385,,0.31467,-0.37157,0.30722,0.60823,-1.80893,0.62769,9.21739,0.04164
2021-04-30,0.76689,1.79531,1.46004,1.0243,1.2943,1.62384,0.90915,1.27836,0.87799,1.34436,...,0.85464,,0.66633,1.4861,1.23443,1.13809,2.95376,1.70453,5.25478,0.35818
2021-05-31,0.21088,0.58806,0.48256,0.34816,0.33465,-0.04927,1.24616,0.28846,0.48772,0.46316,...,-0.32143,,0.30525,0.68303,0.28036,0.39009,2.21011,1.13976,1.96672,0.58489
2021-06-30,0.9213,1.27223,1.31594,1.00831,0.47027,2.03663,1.46521,1.42533,1.20523,1.46833,...,1.14939,,0.54679,0.97664,1.50087,0.90982,0.36977,1.53892,0.14837,0.52219
2021-07-31,0.12526,0.38836,0.41769,-0.16393,0.0304,0.11954,,,,,...,1.14246,,-0.06594,,0.30565,-0.00786,0.10834,-0.07469,-0.88889,0.26056


In [8]:
b = 0
dodrop = []
B = len(returns_funds.columns) -1

for col_name in returns_funds.columns:
    clear_output(wait = True)
    print(col_name + ' :     ' + str(b) + ' / ' + str(B))
    
    if sum(pd.notna(returns_funds.loc[:,col_name])) == 0:
        dodrop.append(col_name)
    
    b = b + 1
    
returns_funds = returns_funds.drop(columns = dodrop)
print(dodrop)

ZEOIX :     179 / 179
['BSIHX', 'FIFJX', 'HNRZX', 'MWOPX', '_NT03']


In [9]:
spx['returns0'] = spx['Zamkniecie'].pct_change()
spx = pd.DataFrame(spx)
spx = spx.rename(columns={'Data': 'Period'})
spx['Period'] = pd.to_datetime(spx['Period'])
spx = spx.set_index('Period')
spx = spx.loc[spx.index >= date_start + relativedelta(months=-3)]
spx['returns1'] = spx['returns0'].shift(periods=1)
spx['returns2'] = spx['returns0'].shift(periods=2)
spx['returns3'] = spx['returns0'].shift(periods=3)
spx = spx.loc[spx.index >= date_start]
spx = spx.loc[:,('returns0','returns1','returns2','returns3')]
spx

Unnamed: 0_level_0,returns0,returns1,returns2,returns3
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-31,-0.050904,0.057722,0.019179,0.062539
2000-02-29,-0.020108,-0.050904,0.057722,0.019179
2000-03-31,0.096720,-0.020108,-0.050904,0.057722
2000-04-30,-0.030796,0.096720,-0.020108,-0.050904
2000-05-31,-0.021887,-0.030796,0.096720,-0.020108
...,...,...,...,...
2021-03-31,0.042439,0.026091,-0.011137,0.037121
2021-04-30,0.052425,0.042439,0.026091,-0.011137
2021-05-31,0.005487,0.052425,0.042439,0.026091
2021-06-30,0.022214,0.005487,0.052425,0.042439


# AR(1)

In [10]:
funds_AR_const = returns_funds.iloc[0:-window,:]
funds_AR_const = funds_AR_const.truncate(after = 0)
funds_AR_betas = funds_AR_const
funds_AR_const_pvalue = funds_AR_const
funds_AR_betas_pvalue = funds_AR_const

In [None]:
%%time

b = 0
B = len(returns_funds.columns) -1

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    
    for col_name in returns_funds.columns:
        clear_output(wait = True)
        print(col_name + ' :     ' + str(b) + ' / ' + str(B))
        
        for j in range(len(returns_funds) - window):
            current_period = returns_funds.index[j]
            
            if not (pd.isna(returns_funds.loc[:,col_name].iloc[j])) and not(pd.isna(returns_funds.loc[:,col_name].iloc[j + window])):
                simulated_data = returns_funds.loc[:,col_name].iloc[j:(j+window)]
                mod = ARMA(simulated_data, order=(1,0), freq = 'M')
                result = mod.fit()
                funds_AR_const.loc[current_period,col_name] = result.params[0]
                funds_AR_betas.loc[current_period,col_name] = result.params[1]
                funds_AR_const_pvalue.loc[current_period,col_name] = result.pvalues[0]
                funds_AR_betas_pvalue.loc[current_period,col_name] = result.pvalues[1]              
            else:
                funds_AR_const.loc[current_period,col_name] = float('NaN')
                funds_AR_betas.loc[current_period,col_name] = float('NaN')
                funds_AR_const_pvalue.loc[current_period,col_name] = float('NaN')
                funds_AR_betas_pvalue.loc[current_period,col_name] = float('NaN')
                
        b = b+1


WRHIX :     36 / 174


In [None]:
funds_AR_coef.iloc[:,45:58].plot()

In [None]:
funds_AR_coef.iloc[:,0:30].plot()

# LagEfects

In [None]:
benchmark = pd.DataFrame(spx)
#benchmark.plot()
dataset = benchmark.join(returns_funds)
dataset.head()

In [None]:
#b = 0
#B = len(returns_funds.columns) -1
#
#for col_name in dataset.columns[4:]:
#    clear_output(wait = True)
#    print(col_name + ' :     ' + str(b) + ' / ' + str(B))
#    new_col_name = col_name.replace(" ", "_")
#    dataset = dataset.rename(columns={col_name: new_col_name})    

In [None]:
beta = pd.DataFrame({'Ticker' : [np.nan], 'Lagg' : [np.nan], 'CAMP' : [np.nan]}).dropna()

In [None]:
b = 0
B = len(returns_funds.columns) -1

for col_name in dataset.columns[5:]:
    clear_output(wait = True)
    print(col_name + ' :     ' + str(b) + ' / ' + str(B))
    
    str_camp = str(col_name) + ' ~ returns0'
    camp = ols(str_camp, data = dataset)
    camp = camp.fit()
    beta_camp = camp.params[1]    
    
    str_lagg = str(col_name) + ' ~ returns0 + returns1 + returns2 + returns3'
    lagg = ols(str_lagg, data=dataset)
    lagg = lagg.fit()
    beta_lagg = lagg.params[1] + lagg.params[2] + lagg.params[3] + lagg.params[4]
    
    beta = beta.append({'Ticker' : col_name, 'Lagg': beta_lagg, 'CAMP' : beta_camp}, ignore_index=True)
    
    b = b+1

In [None]:
beta['diff'] = beta['Lagg'] - beta['CAMP']
beta.head(10)

# DataOut

In [None]:
funds_AR_const.to_csv(path + 'out/' + r'funds_AR1_const.csv', sep=';', encoding = 'windows-1250')
funds_AR_betas.to_csv(path + 'out/' + r'funds_AR1_betas.csv', sep=';', encoding = 'windows-1250')
funds_AR_const_pvalue.to_csv(path + 'out/' + r'funds_AR1_const_pvalue.csv', sep=';', encoding = 'windows-1250')
funds_AR_betas_pvalue.to_csv(path + 'out/' + r'funds_AR_betas_pvalue.csv', sep=';', encoding = 'windows-1250')
beta.to_csv(path + 'out/' + r'funds_lagg_betas.csv', sep=';', encoding = 'windows-1250')