In [42]:
import pandas as pd
import numpy as np
import datetime as dt
import math
import matplotlib.pyplot as plt

from pandas.tseries.offsets import BDay, Day
from tqdm import tqdm

PATH = "C:/Users/jackl/OneDrive/Documents/finance_research/japan_qe/"

In [43]:
def get_ffill(sedol_df):
    sedol_df_copy = sedol_df.copy()
    for col in sedol_df_copy.columns: 
        sedol_df_copy[col] = sedol_df_copy.ffill()
    return sedol_df_copy

# Load Data

In [44]:
weight_df = pd.read_pickle(PATH+'checkpoint_data/index_weights.pkl')
weight_df.columns = ['date_', 'weight_within_index', 'constintcode', 'indexlistmnem', 'month_yr_index',
                     'merged_date', 'sedol', 'month', 'yr', 'conm']

const_df_full = pd.read_csv(PATH+'raw_data/datastream/topix_nikkei_constituents_v2.csv', parse_dates=['date_'])
ff_df = const_df_full[(const_df_full['indexlistmnem']=='LTOKYOSE') & 
                      (const_df_full['fffactor']!=0)][['constintcode', 'fffactor', 'date_', 'ffmktval', 'mktval']]

In [45]:
#get SEDOLs
merge_df = pd.read_pickle(PATH+'checkpoint_data/sedol_constintcode_match.pkl') 
ff_df_merged = pd.merge_asof(ff_df.sort_values('date_'), 
                             merge_df.sort_values('merged_date'), 
                             by=['constintcode'], 
                             left_on=['date_'],
                             right_on=['merged_date'],
                             direction='nearest').dropna(subset=['sedol']).drop_duplicates(['date_', 'sedol'])

In [46]:
nk_stocks = weight_df[weight_df['indexlistmnem']=='LJAPDOWA'].drop_duplicates(['sedol'])[['sedol']]
nk_stocks['nk_flag'] = [1 for i in range(len(nk_stocks))]

In [47]:
boj_df = pd.read_pickle(PATH+'checkpoint_data/boj_purchases.pkl')
boj_df = boj_df[['date', 'etf']]
boj_df = boj_df[boj_df['etf'] > 0]

boj_df['month'] = boj_df['date'].dt.month
boj_df['yr'] = boj_df['date'].dt.year
boj_df['etf'] = boj_df['etf'] * 10**8
boj_df.columns = ['date', 'boj_total_purchases', 'month', 'yr']

In [48]:
ret_df = pd.read_pickle(PATH+'checkpoint_data/returns_all_stocks.pkl')
ret_df = ret_df[['sedol', 'datadate', 'cshoc', 'prccd', 'ajexdi']]
ret_df['mkt_cap'] = ret_df['prccd'] * ret_df['cshoc']
ret_df['sedol'] = ret_df['sedol'].astype(str)
ret_df = ret_df.dropna(subset=['cshoc'])

In [86]:
indus_df = pd.read_pickle(PATH+'checkpoint_data/industries.pkl')
ret_df = pd.merge_asof(ret_df.sort_values(['datadate']), 
                       indus_df[['sedol', 'datadate', 'sic_2d']].sort_values(['datadate']), 
                       by='sedol', 
                       on='datadate',
                       direction='backward').sort_values(['sedol', 'datadate'])

In [50]:
ret_df_1 = pd.merge_asof(ret_df.sort_values('datadate'), 
                         ff_df_merged[['sedol', 'date_', 'fffactor', 'ffmktval', 'mktval']].sort_values('date_'), 
                         by='sedol', 
                         left_on=['datadate'], 
                         right_on=['date_'], 
                         direction='nearest', 
                         tolerance=pd.Timedelta(weeks=26)).sort_values(['sedol', 'datadate'])

In [51]:
comn_df = pd.read_csv(PATH+'raw_data/comp_global/company_names.csv')
comn_df = comn_df.drop_duplicates(['sedol'])[['sedol', 'conm']]
comn_df['sedol'] = comn_df['sedol'].astype(str)

# Get Template

In [52]:
date_ = pd.to_datetime('2010-01-01')
date_list = []
topix_weights, nikkei_weights = [], []
topix_name, nikkei_name = [], []
while date_ <= pd.to_datetime('2021-09-21'):
    if date_ <= pd.to_datetime('2014-11-19'): 
        topix_weights.append(0.45)
        nikkei_weights.append(0.55)
    elif (date_ > pd.to_datetime('2014-11-19')) & (date_ < pd.to_datetime('2016-09-21')): 
        topix_weights.append(0.50)
        nikkei_weights.append(0.50)
    elif (date_ >= pd.to_datetime('2016-09-21')): 
        topix_weights.append(0.75)
        nikkei_weights.append(0.25)    
    date_list.append(date_)
    topix_name.append('LTOKYOSE')
    nikkei_name.append('LJAPDOWA')
    date_ += Day(1)

weights = topix_weights + nikkei_weights 
dates = date_list + date_list
names = topix_name + nikkei_name

purchase_weights_df = pd.DataFrame({
                                   'date': dates, 
                                   'indexlistmnem': names,
                                   'index_weight': weights
})

# Start Merge

In [53]:
boj_df_merged = pd.merge(purchase_weights_df,
                         boj_df,
                         on=['date'], 
                         how='left')
boj_df_merged = boj_df_merged.dropna(subset=['boj_total_purchases'])

In [54]:
boj_df_merged = pd.merge(boj_df_merged, 
                         weight_df[['month', 'yr', 'indexlistmnem', 'conm', 'constintcode', 'sedol', 'weight_within_index']], 
                         on=['indexlistmnem', 'month', 'yr'], 
                         how='left')

boj_df_merged = boj_df_merged[['sedol', 'date', 'boj_total_purchases', 
                               'indexlistmnem', 'index_weight', 
                               'weight_within_index']]

boj_df_merged

Unnamed: 0,sedol,date,boj_total_purchases,indexlistmnem,index_weight,weight_within_index
0,6985361,2010-12-15,1.420000e+10,LTOKYOSE,0.45,0.000183
1,6740582,2010-12-15,1.420000e+10,LTOKYOSE,0.45,0.000696
2,6572644,2010-12-15,1.420000e+10,LTOKYOSE,0.45,0.000093
3,6597045,2010-12-15,1.420000e+10,LTOKYOSE,0.45,0.007098
4,6572440,2010-12-15,1.420000e+10,LTOKYOSE,0.45,0.000032
...,...,...,...,...,...,...
1474625,6896548,2021-06-21,7.010000e+10,LJAPDOWA,0.25,0.000945
1474626,6250724,2021-06-21,7.010000e+10,LJAPDOWA,0.25,0.025878
1474627,6642666,2021-06-21,7.010000e+10,LJAPDOWA,0.25,0.000979
1474628,6895169,2021-06-21,7.010000e+10,LJAPDOWA,0.25,0.000718


In [55]:
boj_df_merged = pd.merge_asof(boj_df_merged.sort_values('date'), 
                              ret_df_1.sort_values('datadate'), 
                              by='sedol',
                              left_on=['date'], 
                              right_on=['datadate'],
                              direction='nearest').drop(columns=['datadate']).sort_values(['sedol', 'date'])

boj_df_merged = boj_df_merged.drop(columns=['ffmktval', 'mktval', 'date_'])

In [56]:
boj_df_merged['ffshoc'] = boj_df_merged['fffactor'] * boj_df_merged['cshoc']
boj_df_merged['amt_purchased_yen'] = boj_df_merged['boj_total_purchases'] * boj_df_merged['index_weight'] * boj_df_merged['weight_within_index']
boj_df_merged['shares_purchased'] = boj_df_merged['amt_purchased_yen'] / boj_df_merged['prccd']

In [57]:
boj_df_merged = boj_df_merged[(boj_df_merged['date'] >= pd.to_datetime('2010-12-15')) & 
                              (boj_df_merged['date'] <= pd.to_datetime('2021-01-01'))]

In [58]:
boj_df_merged = boj_df_merged[['date', 'indexlistmnem', 'index_weight', 'boj_total_purchases', 
                               'sedol', 'weight_within_index', 'ajexdi', 'cshoc', "ffshoc", 'shares_purchased', 'sic_2d']]

In [59]:
boj_df_merged.to_pickle(PATH+'checkpoint_data/boj_purchases_stock_level.pkl')

### Add On, Passiveness Measured by Shares 

In [65]:
boj_df = pd.read_pickle(PATH+'checkpoint_data/boj_purchases_stock_level.pkl')

In [66]:
boj_df = boj_df.sort_values(['sedol', 'date']).dropna(subset=['sedol'])

In [67]:
boj_df = boj_df.dropna(subset=['shares_purchased'])

In [68]:
shares_agged_by_date_df = boj_df[['date', 'sedol', 'shares_purchased']].groupby(['date', 'sedol']).sum().reset_index()
shares_info_df = boj_df[['date', 'sedol', 'ajexdi', 'cshoc', 'ffshoc', 'sic_2d']].drop_duplicates(['date', 'sedol'])
boj_df = pd.merge(shares_info_df, 
                  shares_agged_by_date_df, 
                  on=['date', 'sedol'], 
                  how='left')
boj_df

Unnamed: 0,date,sedol,ajexdi,cshoc,ffshoc,sic_2d,shares_purchased
0,2010-12-15,6001342,1.0,13334640.0,7.334051e+06,,254.890000
1,2010-12-30,6001342,1.0,13334640.0,7.334051e+06,,242.495772
2,2011-01-21,6001342,1.0,13334640.0,7.334052e+06,,253.801957
3,2011-01-28,6001342,1.0,13334640.0,7.334052e+06,,244.756604
4,2011-01-31,6001342,1.0,13334640.0,7.334052e+06,,245.270798
...,...,...,...,...,...,...,...
1297088,2020-11-13,BZCRNM6,1.0,37149400.0,2.600458e+07,28.0,3636.897301
1297089,2020-11-18,BZCRNM6,1.0,37149400.0,2.600458e+07,28.0,3466.681672
1297090,2020-12-21,BZCRNM6,1.0,37149400.0,2.600458e+07,28.0,3348.097473
1297091,2020-12-22,BZCRNM6,1.0,37149400.0,2.600458e+07,28.0,3397.153846


In [69]:
counter = 0
total = 0
sedol_df_list = []
problem_dfs = []

for sedol, sedol_df in tqdm(boj_df.groupby(['sedol'])): 
    sedol_df = sedol_df.sort_values('date')
    shares_purchased_list = list(sedol_df['shares_purchased'])
    ajexdi_list = list(sedol_df['ajexdi'])
    try:
        cumsum_list = []
        for i in range(len(sedol_df)):
            most_recent_ajexdi = ajexdi_list[i]
            cumsum = np.nansum([shares_purchased_list[j] * ajexdi_list[j] / most_recent_ajexdi for j in range(i+1)])
            cumsum_list.append(cumsum)
        sedol_df['boj_total_shares_purchased'] = cumsum_list
        sedol_df_list.append(sedol_df)
    except: 
        problem_dfs.append(sedol_df)

100%|██████████████████████████████████████████████████████████████████████████████| 2405/2405 [03:50<00:00, 10.45it/s]


In [70]:
boj_df_w_shares = pd.concat(sedol_df_list)

In [71]:
boj_df_w_shares['boj_share_shares'] = boj_df_w_shares['boj_total_shares_purchased']/boj_df_w_shares['ffshoc']
boj_df_w_shares['boj_share_shares_no_ff'] = boj_df_w_shares['boj_total_shares_purchased']/boj_df_w_shares['cshoc']

In [72]:
boj_df_w_shares['shares_purchased_frac'] = boj_df_w_shares['shares_purchased']/boj_df_w_shares['ffshoc']
boj_df_w_shares['shares_purchased_frac_no_ff'] = boj_df_w_shares['shares_purchased']/boj_df_w_shares['cshoc']

In [73]:
# top_df = boj_df_w_shares.sort_values(['date']).drop_duplicates('sedol', keep='first')
# top_df['date'] = [pd.to_datetime('2004-12-10') for i in range(len(top_df))]
# top_df['boj_share_shares'] = [0 for i in range(len(top_df))]
# top_df['boj_total_shares_purchased'] = [0 for i in range(len(top_df))]
# top_df['shares_purchased'] = [0 for i in range(len(top_df))]
# top_df['cshoc'] = [math.nan for i in range(len(top_df))]
# top_df['ajexdi'] = [math.nan for i in range(len(top_df))]

# boj_df_w_shares = pd.concat([top_df, boj_df_w_shares])
# boj_df_w_shares = boj_df_w_shares[boj_df_w_shares['date'] > pd.to_datetime('2004-12-10')]

## Make it Panel

In [74]:
top_row_df = boj_df_w_shares.drop_duplicates(['sedol'])
top_row_df['date'] = pd.to_datetime('2010-12-14')
top_row_df[['ajexdi', 'cshoc', 'shares_purchased']] = math.nan
top_row_df[['boj_share_shares', 'boj_share_shares_no_ff', 'boj_total_shares_purchased', 'shares_purchased_frac', 'shares_purchased_frac_no_ff']] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_row_df['date'] = pd.to_datetime('2010-12-14')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_row_df[['ajexdi', 'cshoc', 'shares_purchased']] = math.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice

In [75]:
boj_df_w_shares = pd.concat([top_row_df, boj_df_w_shares])

In [76]:
date_list = sorted(boj_df_w_shares['date'].unique())

In [77]:
reindex_df_list = []
for sedol, df in tqdm(boj_df_w_shares.groupby('sedol')): 
    reindex_df = df.set_index('date').reindex(date_list)
    reindex_df['sedol'] = reindex_df['sedol'].ffill()
    reindex_df['boj_total_shares_purchased'] = reindex_df['boj_total_shares_purchased'].ffill()
    reindex_df['boj_share_shares'] = reindex_df['boj_share_shares'].ffill()
    reindex_df['boj_share_shares_no_ff'] = reindex_df['boj_share_shares_no_ff'].ffill()
    reindex_df_list.append(reindex_df)

boj_df_w_shares_reindex = pd.concat(reindex_df_list).reset_index()

100%|█████████████████████████████████████████████████████████████████████████████| 2404/2404 [00:13<00:00, 171.81it/s]


In [78]:
boj_df_w_shares_reindex = pd.merge(boj_df_w_shares_reindex, 
                                   comn_df, 
                                   on='sedol', 
                                   how='left')

In [79]:
weight_df = pd.read_pickle(PATH+'checkpoint_data/index_weights.pkl')
nk_stocks = weight_df[weight_df['indexlistmnem']=='LJAPDOWA'].drop_duplicates(['sedol'])[['sedol']]
nk_stocks['nk_flag'] = [1 for i in range(len(nk_stocks))]
boj_df_w_shares_reindex = pd.merge(boj_df_w_shares_reindex, 
                                   nk_stocks, 
                                   on='sedol', 
                                   how='left')

In [102]:
boj_df_w_shares_reindex.to_pickle(PATH+'checkpoint_data/boj_purchases_stock_level_v2.pkl')

In [106]:
boj_df_w_shares_reindex

Unnamed: 0,date,sedol,ajexdi,cshoc,ffshoc,shares_purchased,boj_total_shares_purchased,boj_share_shares,boj_share_shares_no_ff,shares_purchased_frac,shares_purchased_frac_no_ff,conm,nk_flag,sic_2d
0,2010-12-14,6001342,,,7.334051e+06,,0.000000,0.000000,0.000000,0.000000,0.000000,AOI PRO INC,,73.0
4306,2010-12-15,6001342,1.0,13334640.0,7.334051e+06,254.890000,254.890000,0.000035,0.000019,0.000035,0.000019,AOI PRO INC,,73.0
6969,2010-12-30,6001342,1.0,13334640.0,7.334051e+06,242.495772,497.385772,0.000068,0.000037,0.000033,0.000018,AOI PRO INC,,73.0
9140,2011-01-21,6001342,1.0,13334640.0,7.334052e+06,253.801957,751.187728,0.000102,0.000056,0.000035,0.000019,AOI PRO INC,,73.0
11034,2011-01-28,6001342,1.0,13334640.0,7.334052e+06,244.756604,995.944332,0.000136,0.000075,0.000033,0.000018,AOI PRO INC,,73.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
844358,2016-09-21,BZCRNM6,,,,,0.000000,0.000000,0.000000,,,KH NEOCHEM CO LTD,,
848130,2016-09-26,BZCRNM6,,,,,0.000000,0.000000,0.000000,,,KH NEOCHEM CO LTD,,
850025,2016-09-27,BZCRNM6,,,,,0.000000,0.000000,0.000000,,,KH NEOCHEM CO LTD,,
851872,2016-09-28,BZCRNM6,,,,,0.000000,0.000000,0.000000,,,KH NEOCHEM CO LTD,,


In [35]:
# boj_df_w_shares_reindex = pd.read_pickle(PATH+'checkpoint_data/boj_purchases_stock_level_v2.pkl')