In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import math
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

pd.set_option("display.max_columns", None)

PATH = "C:/Users/jackl/OneDrive/Documents/finance_research/japan_qe/"

In [2]:
topix_df = pd.read_csv(PATH+'raw_data/datastream/topix_daily_returns.csv', parse_dates=['valuedate'])
topix_df['ret'] = topix_df['pi_'].pct_change()

In [3]:
weight_df = pd.read_pickle(PATH+'checkpoint_data/index_weights.pkl')
topix_weight_df = weight_df[weight_df['indexlistmnem']=='LTOKYOSE']

In [4]:
ret_nk_df = pd.read_pickle(PATH+'checkpoint_data/returns_all_stocks.pkl')

In [5]:
boj_df = pd.read_pickle(PATH+'checkpoint_data/boj_purchases_stock_level_v2.pkl')
boj_dates_df = boj_df[['date']].drop_duplicates()
boj_dates_df['boj_flag'] = [1 for i in range(len(boj_dates_df))]

In [6]:
rfr_df = pd.read_pickle(PATH+'checkpoint_data/jgb_1d.pkl')

# Data Organization

In [7]:
ret_nk_df = pd.merge_asof(ret_nk_df.sort_values('datadate'), 
                          rfr_df.sort_values('date'), 
                          left_on=['datadate'],
                          right_on=['date'],
                          direction='backward').sort_values(['sedol', 'datadate'])

In [8]:
ret_nk_df = pd.merge(ret_nk_df, 
                     boj_dates_df, 
                     left_on=['datadate'], 
                     right_on=['date'], 
                     how='left')

In [9]:
len_ones = []
full_list = []
for index, df in tqdm(ret_nk_df.groupby(['sedol', 'yr_qtr_index'])): 
    len_ = len(df[df['boj_flag']==1])
    full_ = len(df)
    len_ones.append(len_)
    full_list.append(full_)

100%|███████████████████████████████████████████████████████████████████████████| 11661/11661 [00:15<00:00, 751.57it/s]


In [10]:
ret_nk_df['mod_ret'] = ret_nk_df['mod_ret'].apply(lambda x: math.nan if x==0 else x)

In [11]:
ret_nk_df = ret_nk_df[(ret_nk_df['datadate'] >= pd.to_datetime('2005-01-01')) & 
                      (ret_nk_df['datadate'] <= pd.to_datetime('2021-01-01'))]

In [12]:
ret_nk_df_0 = pd.merge_asof(ret_nk_df.sort_values('datadate'), 
                            topix_weight_df[['date_', 'sedol', 'weight']].sort_values('date_'), 
                            by='sedol', 
                            left_on='datadate', 
                            right_on='date_', 
                            direction='backward', 
                            tolerance=pd.Timedelta(days=31)).drop(columns='date_')

In [13]:
ret_nk_df_1 = pd.merge(ret_nk_df_0, 
                       topix_df, 
                       left_on=['datadate'], 
                       right_on=['valuedate'], 
                       how='left').drop(columns=['dsindexmnem', 'valuedate', 'pi_'])

In [14]:
cols = list(ret_nk_df_1.columns)
cols[-1] = 'topix_ret'
ret_nk_df_1.columns = cols

In [15]:
ret_nk_df_1['qtr'] = ret_nk_df_1['datadate'].dt.quarter
ret_nk_df_1['yr'] = ret_nk_df_1['datadate'].dt.year
ret_nk_df_1['qtr_yr_index'] = list(zip(list(ret_nk_df_1['yr']), list(ret_nk_df_1['qtr'])))

qtr_yr_index_list = list(ret_nk_df_1['qtr_yr_index'].sort_values().unique())
qtr_index = [i for i in range(len(qtr_yr_index_list))]
qtr_conv_dict = dict(zip(qtr_yr_index_list, qtr_index))

In [16]:
ret_nk_df_1 = ret_nk_df_1[['sedol', 'datadate', 'qtr_yr_index', 'mod_ret', 'topix_ret', 'weight', 'boj_flag', 'rfr']]

In [17]:
ret_nk_df_1['topix_ret_adj'] = (ret_nk_df_1['topix_ret'] - (ret_nk_df_1['weight'] * ret_nk_df_1['mod_ret'])) * (1 / 1 - (ret_nk_df_1['weight']))

In [18]:
ret_nk_df_1['mod_ret'] = ret_nk_df_1['mod_ret'] - ret_nk_df_1['rfr']
ret_nk_df_1['topix_ret'] = ret_nk_df_1['topix_ret'] - ret_nk_df_1['rfr']
ret_nk_df_1['topix_ret_adj'] = ret_nk_df_1['topix_ret_adj'] - ret_nk_df_1['rfr']

In [19]:
ret_nk_df_1 = ret_nk_df_1.drop(columns=['rfr'])

# Quarterly $R^2$

In [19]:
#quarterly
qtr_yr_index_list = []
sedol_list = []
fin_date_list = [] 

r2_list = []
idio_list = []
nobs_list = []

r2_exc_boj_list = []
idio_list_exc_boj = []
nobs_exc_boj_list = []

for index, df in tqdm(ret_nk_df_1.groupby(['qtr_yr_index', 'sedol'])): 
    df = df.reset_index(drop=True)
    qtr_yr_index, sedol = index
    fin_date = df.iat[-1, 1]
    try: 
        df = df.dropna(subset=['topix_ret_adj'])
        x = sm.add_constant(df['topix_ret_adj'])
        y = df[['mod_ret']]
        res = sm.OLS(y, x).fit()

        r2 = res.rsquared
        nobs = res.nobs
        idio = res.resid.std()

    except: 
        r2 = math.nan
        nobs = math.nan
        idio = math.nan
    
    # removes BOJ days
    try: 
        df_exc_boj = df.dropna(subset=['topix_ret_adj'])
        df_exc_boj = df_exc_boj[~(df_exc_boj['boj_flag']==1)]
        
        x_exc_boj = sm.add_constant(df_exc_boj['topix_ret_adj'])
        y_exc_boj = df_exc_boj[['mod_ret']]
        res_exc_boj = sm.OLS(y_exc_boj, x_exc_boj).fit()

        r2_exc_boj = res_exc_boj.rsquared
        nobs_exc_boj = res_exc_boj.nobs
        idio_exc_boj = res_exc_boj.resid.std()
        
    except: 
        r2_exc_boj = math.nan
        nobs_exc_boj = math.nan
        idio_exc_boj = math.nan
        
    qtr_yr_index_list.append(qtr_yr_index)
    sedol_list.append(sedol)
    fin_date_list.append(fin_date)
    
    r2_list.append(r2)
    nobs_list.append(nobs)
    idio_list.append(idio)
    
    r2_exc_boj_list.append(r2_exc_boj)
    nobs_exc_boj_list.append(nobs_exc_boj)
    idio_list_exc_boj.append(idio_exc_boj)
    
r2_qtr_df = pd.DataFrame({
                        'qtr_yr_index': qtr_yr_index_list, 
                        'sedol': sedol_list, 
                        'fin_date': fin_date_list, 
                        'r2': r2_list, 
                        'idio': idio_list,
                        'nobs': nobs_list,
                        'r2_exc_boj': r2_exc_boj_list,
                        'idio_exc_boj': idio_list_exc_boj,
                        'nobs_exc_boj': nobs_exc_boj_list, 
                     })

r2_qtr_df['qtr_index'] = r2_qtr_df['qtr_yr_index'].apply(lambda x: qtr_conv_dict[x])

100%|██████████████████████████████████████████████████████████████████████████████| 9786/9786 [04:40<00:00, 34.85it/s]


# Quarterly Beta

In [30]:
ret_nk_df_1['qtr_index'] = ret_nk_df_1['qtr_yr_index'].apply(lambda x: qtr_conv_dict[x])

In [32]:
#quarterly betas estimated over past year
qtr_yr_index_list = []
sedol_list = []
fin_date_list = [] 

beta_list = []
nobs_list = []

beta_exc_boj_list = []
nobs_exc_boj_list = []

for sedol, sedol_df in tqdm(ret_nk_df_1.groupby(['sedol'])):
    for window in window_list:
        df = sedol_df[sedol_df['qtr_index'].isin(window)]
        df = df.reset_index(drop=True)
        
        try:
            qtr_yr_index = df['qtr_yr_index'].iloc[-1]
            fin_date = df.iat[-1, 1]
        except: 
            qtr_yr_index = math.nan
            fin_date = math.nan
        
        try: 
            df = df.dropna(subset=['topix_ret', 'mod_ret'])
            x = sm.add_constant(df['topix_ret'])
            y = df[['mod_ret']]
            res = sm.OLS(y, x).fit()

            beta = res.params[1]
            nobs = res.nobs

        except: 
            beta = math.nan
            nobs = math.nan

        # removes BOJ days
        try: 
            df_exc_boj = df.dropna(subset=['topix_ret', 'mod_ret'])
            df_exc_boj = df_exc_boj[~(df_exc_boj['boj_flag']==1)]

            x_exc_boj = sm.add_constant(df_exc_boj['topix_ret'])
            y_exc_boj = df_exc_boj[['mod_ret']]
            res_exc_boj = sm.OLS(y_exc_boj, x_exc_boj).fit()

            beta_exc_boj = res_exc_boj.params[1]
            nobs_exc_boj = res_exc_boj.nobs

        except: 
            beta_exc_boj = math.nan
            nobs_exc_boj = math.nan

        sedol_list.append(sedol)
        qtr_yr_index_list.append(qtr_yr_index)
        fin_date_list.append(fin_date)

        beta_list.append(beta)
        nobs_list.append(nobs)

        beta_exc_boj_list.append(beta_exc_boj)
        nobs_exc_boj_list.append(nobs_exc_boj)

100%|███████████████████████████████████████████████████████████████████████████████| 266/266 [00:00<00:00, 617.45it/s]


In [131]:
beta_qtr_df = pd.DataFrame({
                        'qtr_yr_index': qtr_yr_index_list, 
                        'sedol': sedol_list, 
                        'beta_date': fin_date_list, 
                        'beta': beta_list, 
                        'nobs': nobs_list,
                        'beta_exc_boj': beta_exc_boj_list,
                        'nobs_exc_boj': nobs_exc_boj_list, 
                     }).sort_values(['sedol', 'beta_date'])

beta_qtr_df = beta_qtr_df.dropna(subset=['sedol', 'qtr_yr_index'])

In [132]:
beta_qtr_df['beta_date_l1'] = beta_qtr_df['beta_date'] - pd.DateOffset(months=3)
beta_qtr_df['qtr_yr_index_l1'] = beta_qtr_df['beta_date_l1'].apply(lambda x: (x.year, x.month // 3) if x.month != 8 else (x.year, 3))

In [133]:
beta_qtr_df = pd.merge(beta_qtr_df,
                       beta_qtr_df[['qtr_yr_index', 'sedol', 'beta', 'nobs', 'beta_exc_boj', 'nobs_exc_boj']],
                       left_on=['sedol', 'qtr_yr_index_l1'], 
                       right_on=['sedol', 'qtr_yr_index'],
                       how='left',
                       suffixes=[None, '_l1']).drop(columns=['qtr_yr_index_l1'])

In [134]:
beta_qtr_df.to_pickle(PATH+'checkpoint_data/beta_qtr_df.pkl')

## Yearly

In [24]:
ret_nk_df_1_merge = ret_nk_df_1[['datadate', 'sedol', 'boj_flag']]
ret_nk_df_1_merge['datadate_f1'] = ret_nk_df_1_merge['datadate'] + pd.DateOffset(days=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ret_nk_df_1_merge['datadate_f1'] = ret_nk_df_1_merge['datadate'] + pd.DateOffset(days=1)


In [25]:
ret_nk_df_1 = pd.merge(
         ret_nk_df_1, 
         ret_nk_df_1_merge, 
         left_on=['datadate', 'sedol'], 
         right_on=['datadate_f1', 'sedol'], 
         suffixes=[None, '_l1']).drop(columns=['datadate_l1', 'datadate_f1']
)

In [26]:
ret_nk_df_1['boj_flag'] = ret_nk_df_1['boj_flag'].apply(lambda x: 1 if x==1 else 0)
ret_nk_df_1['boj_flag_l1'] = ret_nk_df_1['boj_flag_l1'].apply(lambda x: 1 if x==1 else 0)
ret_nk_df_1['boj_flag_master'] = ret_nk_df_1['boj_flag'] + ret_nk_df_1['boj_flag_l1']

In [27]:
ret_nk_df_1['yr'] = ret_nk_df_1['datadate'].dt.year

In [31]:
#yearly
qtr_yr_index_list = []
sedol_list = []
fin_date_list = [] 

r2_list = []
idio_list = []
nobs_list = []

r2_exc_boj_list = []
idio_list_exc_boj = []
nobs_exc_boj_list = []

for index, df in tqdm(ret_nk_df_1.groupby(['yr', 'sedol'])): 
    qtr_yr_index, sedol = index
    fin_date = df.iat[-1, 1]
    try: 
        df = df.dropna(subset=['topix_ret_adj'])
        x = sm.add_constant(df['topix_ret_adj'])
        y = df[['mod_ret']]
        res = sm.OLS(y, x).fit()

        r2 = res.rsquared
        nobs = res.nobs
        idio = res.resid.std()

    except: 
        r2 = math.nan
        nobs = math.nan
        idio = math.nan

    try: 
        df_exc_boj = df.dropna(subset=['topix_ret_adj'])
        df_exc_boj = df_exc_boj[~(df_exc_boj['boj_flag']==1)]
        
        x_exc_boj = sm.add_constant(df_exc_boj['topix_ret_adj'])
        y_exc_boj = df_exc_boj[['mod_ret']]
        res_exc_boj = sm.OLS(y_exc_boj, x_exc_boj).fit()

        r2_exc_boj = res_exc_boj.rsquared
        nobs_exc_boj = res_exc_boj.nobs
        idio_exc_boj = res_exc_boj.resid.std()
        
    except: 
        r2_exc_boj = math.nan
        nobs_exc_boj = math.nan
        idio_exc_boj = math.nan
        
    qtr_yr_index_list.append(qtr_yr_index)
    sedol_list.append(sedol)
    fin_date_list.append(fin_date)
    
    r2_list.append(r2)
    nobs_list.append(nobs)
    idio_list.append(idio)
    
    r2_exc_boj_list.append(r2_exc_boj)
    nobs_exc_boj_list.append(nobs_exc_boj)
    idio_list_exc_boj.append(idio_exc_boj)

100%|██████████████████████████████████████████████████████████████████████████████| 2632/2632 [01:13<00:00, 35.67it/s]


In [33]:
r2_yr_df = pd.DataFrame({
                        'yr_index': qtr_yr_index_list, 
                        'sedol': sedol_list, 
                        'fin_date': fin_date_list, 
                        'r2': r2_list, 
                        'idio': idio_list,
                        'nobs': nobs_list,
                        'r2_exc_boj': r2_exc_boj_list,
                        'idio_exc_boj': idio_list_exc_boj,
                        'nobs_exc_boj': nobs_exc_boj_list, 
                     })

In [34]:
r2_yr_df.to_pickle(PATH+'checkpoint_data/r2_yr_df.pkl')