In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import math
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay
from tqdm import tqdm

pd.set_option("display.max_columns", None)
PATH = "C:/Users/jackl/OneDrive/Documents/finance_research/japan_qe/"

In [2]:
def get_truncated_df(merge_df, columns, year_column_name=None, low=0.01, high=0.99):
    merge_df_copy = merge_df.copy()
    if year_column_name == None:
        for column in columns: 
            _1pct, _99pct = merge_df_copy[column].quantile(q=low), merge_df_copy[column].quantile(q=high)
            merge_df_copy[column].where((merge_df_copy[column] < _99pct) & (merge_df_copy[column] > _1pct), math.nan, inplace=True)
    else: 
        merge_df_list = []
        merge_year_df_list = [[year, merge_year_df] for year, merge_year_df in merge_df_copy.groupby(year_column_name)]
        for year, merge_year_df in tqdm(merge_year_df_list):
            for column in columns: 
                _1pct, _99pct = merge_year_df[column].quantile(q=low), merge_year_df[column].quantile(q=high)
                merge_year_df[column].where(~((merge_year_df[column] > _99pct) | (merge_year_df[column] < _1pct)), math.nan, inplace=True)
            merge_df_list.append(merge_year_df)
        merge_df_copy = pd.concat(merge_df_list)
    
    return merge_df_copy

def get_boj_quintiles_conditional(sue_df_boj_merged, suffix=None, quantiles=5):
    sue_df_boj_merged_nd = sue_df_boj_merged[['SEDOL', 'qtr_yr_index', 'boj_share_shares']].drop_duplicates()
    sue_df_boj_merged_nd_list = []
    for index, df in tqdm(sue_df_boj_merged_nd.groupby('qtr_yr_index')): 
        try:
            df['boj_share_shares_quantiles_'+suffix] = pd.qcut(df['boj_share_shares'], quantiles, labels=False)
        except: 
            df['boj_share_shares_quantiles_'+suffix] = [math.nan for i in range(len(df))]

        sue_df_boj_merged_nd_list.append(df)

    sue_df_boj_merged_nd = pd.concat(sue_df_boj_merged_nd_list)

    sue_df_boj_merged = pd.merge(sue_df_boj_merged, 
                                 sue_df_boj_merged_nd, 
                                 on=['SEDOL', 'qtr_yr_index', 'boj_share_shares'])
    return sue_df_boj_merged

In [3]:
sue_df = pd.read_pickle(PATH+'checkpoint_data/sue_df_final.pkl')

In [5]:
sue_df['qtr_yr_index'] = sue_df['qtr_yr_index'].apply(lambda x: (x[1], x[0]))
qtr_index = dict(zip(sorted(sue_df['qtr_yr_index'].unique()), [i for i in range(len(sue_df['qtr_yr_index'].unique()))]))
sue_df['qtr_index'] = sue_df['qtr_yr_index'].apply(lambda x: qtr_index[x])

In [6]:
ed_df = sue_df[sue_df['event_day']==0]

In [7]:
ed_df = get_boj_quintiles_conditional(ed_df, suffix='nk', quantiles=10)

100%|█████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 166.67it/s]


In [8]:
ed_df = ed_df[['SEDOL', 'fpedat', 'eff_anndats', 'sue_nm', 'qtr_index', 'sue_nm_quintiles', 'sue_nm_deciles',
               'mod_ret_mkt_adj', 'boj_share_shares', 'boj_share_shares_quantiles_nk', 'yr', 'nk_flag']]

In [9]:
ed_df_trimmed = get_truncated_df(ed_df, 
                                 columns=['sue_nm', 'mod_ret_mkt_adj'], 
                                 year_column_name=None, 
                                 low=0.01, 
                                 high=0.99)

In [10]:
ed_df_trimmed.to_stata(PATH+'regression_files\dta_files\event_day_test.dta')

In [17]:
df = pd.read_excel(r'C:\Users\jackl\Downloads\Democracy.xls')

In [19]:
df.to_stata('democracy.dta')