In [3]:
import pandas as pd
import numpy as np
import datetime as dt
import math
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay
from tqdm import tqdm

pd.set_option("display.max_columns", None)

PATH = "C:/Users/jackl/OneDrive/Documents/finance_research/japan_qe/"

def get_plot_data(bottom_quintile_end, mean=True):
    if mean:
        return pd.concat([pd.DataFrame({'event_day': [-23], 'abret_cum': [1]}), bottom_quintile_end.groupby('event_day').mean()['abret_cum'].reset_index()]).set_index('event_day')
    else:
        return pd.concat([pd.DataFrame({'event_day': [-23], 'abret_cum': [1]}), bottom_quintile_end.groupby('event_day').median()['abret_cum'].reset_index()]).set_index('event_day')
    
def get_plot_data_turnover(bottom_quintile_end, mean=True):
    if mean:
        return bottom_quintile_end.groupby('event_day').mean()['ab_turnover'].reset_index().set_index('event_day')
    else:
        return bottom_quintile_end.groupby('event_day').mean()['ab_turnover'].reset_index().set_index('event_day')
    
def get_boj_quintiles_conditional(sue_df_boj_merged, suffix=None, quantiles=5):
    sue_df_boj_merged_nd = sue_df_boj_merged[['sedol', 'qtr_yr_index', 'boj_share_shares']].drop_duplicates()
    sue_df_boj_merged_nd_list = []
    for index, df in tqdm(sue_df_boj_merged_nd.groupby('qtr_yr_index')): 
        try:
            df['boj_share_shares_quantiles_'+suffix] = pd.qcut(df['boj_share_shares'], quantiles, labels=False)
        except: 
            df['boj_share_shares_quantiles_'+suffix] = [math.nan for i in range(len(df))]

        sue_df_boj_merged_nd_list.append(df)

    sue_df_boj_merged_nd = pd.concat(sue_df_boj_merged_nd_list)

    sue_df_boj_merged = pd.merge(sue_df_boj_merged, 
                                 sue_df_boj_merged_nd, 
                                 on=['sedol', 'qtr_yr_index', 'boj_share_shares'])
    return sue_df_boj_merged

def get_firm_quarter_count(df):
    return df.drop_duplicates(['SEDOL', 'eff_anndats'])

In [4]:
def get_truncated_df(merge_df, columns, year_column_name=None, low=0.01, high=0.99):
    merge_df_copy = merge_df.copy()
    if year_column_name == None:
        for column in columns: 
            _1pct, _99pct = merge_df_copy[column].quantile(q=low), merge_df_copy[column].quantile(q=high)
            merge_df_copy[column].where((merge_df_copy[column] < _99pct) & (merge_df_copy[column] > _1pct), math.nan, inplace=True)
    else: 
        merge_df_list = []
        merge_year_df_list = [[year, merge_year_df] for year, merge_year_df in merge_df_copy.groupby(year_column_name)]
        for year, merge_year_df in tqdm(merge_year_df_list):
            for column in columns: 
                _1pct, _99pct = merge_year_df[column].quantile(q=low), merge_year_df[column].quantile(q=high)
                merge_year_df[column].where(~((merge_year_df[column] > _99pct) | (merge_year_df[column] < _1pct)), math.nan, inplace=True)
            merge_df_list.append(merge_year_df)
        merge_df_copy = pd.concat(merge_df_list)
    
    return merge_df_copy

In [86]:
r2_df = pd.read_pickle(PATH+'checkpoint_data/r2_df.pkl')

In [106]:
r2_yr_df = pd.read_pickle(PATH+'checkpoint_data/r2_yr_df.pkl')

In [88]:
weight_df = pd.read_pickle(PATH+'checkpoint_data/index_weights.pkl')

nk_stocks = weight_df[weight_df['indexlistmnem']=='LJAPDOWA'].drop_duplicates(['sedol'])[['sedol']]
nk_stocks['nk_flag'] = [1 for i in range(len(nk_stocks))]

nk_stocks_ref = weight_df[weight_df['indexlistmnem']=='LJAPDOWA'].drop_duplicates(['sedol', 'month', 'yr'])
nk_stocks_ref = nk_stocks_ref[['sedol', 'month', 'yr']]
nk_stocks_ref['nk_flag_ref'] = [1 for i in range(len(nk_stocks_ref))]

In [89]:
boj_df = pd.read_pickle(PATH+'checkpoint_data/boj_purchases_stock_level_v2.pkl')

In [5]:
control_df = pd.read_pickle(PATH+'checkpoint_data/control.pkl')

In [8]:
control_yr_df = pd.read_pickle(PATH+'checkpoint_data/control_yearly.pkl')

In [11]:
control_yr_df['mc_raw'] = np.exp(control_yr_df['mc'])

## $R^2$ Better Version

### Quarterly

In [92]:
boj_df = boj_df.sort_values(['date']).drop_duplicates(['sedol', 'yr_qtr_index'], keep='last')

In [96]:
r2_df_1 = pd.merge(r2_df, 
                   boj_df, 
                   left_on=['qtr_yr_index', 'sedol'], 
                   right_on=['yr_qtr_index_f1', 'sedol']).drop(columns=['yr_qtr_index', 'yr_qtr_index_f1', 'ajexdi'])

In [97]:
r2_df_2 = pd.merge(r2_df_1, 
                   control_df[['sedol', 'yr_qtr_index', 'mc', 'prc', 'mb', 'vlt_l1', 'ret_l1', 'turnover_l1', 'turnover']], 
                   left_on=['sedol', 'qtr_yr_index'], 
                   right_on=['sedol', 'yr_qtr_index'], 
                   how='left').drop(columns=['yr_qtr_index'])

In [98]:
r2_df_2['qtr_yr_index'] = r2_df_2['qtr_yr_index'].astype(str) 
r2_df_2.to_stata(PATH+'regression_files/dta_files/r2_df.dta')

In [99]:
# r2_df_2 = pd.read_stata(PATH+'regression_files/dta_files/r2_df.dta')

### Yearly

In [108]:
boj_df = pd.read_pickle(PATH+'checkpoint_data/boj_purchases_stock_level_v2.pkl')

In [109]:
boj_df['yr'] = boj_df['date'].dt.year
boj_df = boj_df.sort_values(['date']).drop_duplicates(['sedol', 'yr'], keep='last')

In [110]:
boj_df['yr_f1'] = boj_df['yr'] + 1

In [111]:
r2_df_1 = pd.merge(r2_yr_df, 
                   boj_df, 
                   left_on=['yr_index', 'sedol'], 
                   right_on=['yr_f1', 'sedol']).drop(columns=['yr_qtr_index', 'yr_qtr_index_f1', 'yr', 'yr_f1', 'fin_date', 'date'])

In [112]:
r2_df_2 = pd.merge(r2_df_1, 
                   control_yr_df, 
                   left_on=['yr_index', 'sedol'], 
                   right_on=['yr', 'sedol'])

In [113]:
r2_df_2.to_stata(PATH+'regression_files/dta_files/r2_yr_df.dta')

# Other

In [5]:
sum_stats = r2_df_3[r2_df_3['nobs_exc_boj'] >= 38]

In [8]:
sum_stats = sum_stats[['sedol', 'qtr_yr_index', 'r2_exc_boj', 'r2']]

In [13]:
sum_stats.groupby(['sedol']).std().describe()

Unnamed: 0,r2_exc_boj,r2
count,243.0,243.0
mean,0.170101,0.172558
std,0.022861,0.02568
min,0.084482,0.025814
25%,0.156785,0.158117
50%,0.169908,0.174266
75%,0.186454,0.187264
max,0.230768,0.24322
