In [38]:
import pandas as pd
import math
import time
import numpy as np
from tqdm import tqdm 
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime
from scipy.stats.mstats import winsorize

pd.options.mode.chained_assignment = None  # default='warn'

PATH = "C:/Users/jackl/OneDrive/Documents/finance_research/japan_qe/"

In [39]:
def change_orientation(df):
    # modifies orientation list
    # makes individual owners marked as passive
    o_list, h_list = list(df['orientation']), list(df['holder_type'])
    mod_o_list = []
    for i in range(len(o_list)):
        o, h = o_list[i], h_list[i]

        if o == 'Active':
            o = 'Active Informed'

        if h == 'VC/PE Firm': 
            mod_o_list.append('Active Informed')
        else: 
            mod_o_list.append(o)

    df['mod_orientation'] = mod_o_list
    return df

In [43]:
own_df1 = pd.read_pickle(PATH+'checkpoint_data/ownership_jan03.pkl')

In [44]:
boj_df = pd.read_pickle(PATH+'checkpoint_data/boj_purchases_stock_level_v2.pkl')

In [45]:
#get shares outstanding
nk_df = pd.read_csv(PATH+'nk_df_v2.csv')
nk_df['sedol'] = nk_df['sedol'].astype(str)
ret_df = pd.read_pickle(PATH+'checkpoint_data/returns_all_stocks.pkl')
ret_df['sedol'] = ret_df['sedol'].astype(str)
ret_df = ret_df[ret_df['sedol'].isin(nk_df['sedol'])]
ret_df = ret_df[['sedol', 'datadate', 'cshoc']]
ret_df = ret_df.dropna()

#get shares outstanding
own_df1 = pd.merge_asof(own_df1.sort_values('date'), 
                        ret_df.sort_values('datadate'), 
                        by='sedol',
                        left_on=['date'], 
                        right_on=['datadate'], 
                        direction='nearest').drop(columns=['datadate'])

#account for short positions
own_df1['short_dummy'] = own_df1['holdings'].apply(lambda x: 1 if x < 0 else 0)
own_df1['holdings'] = abs(own_df1['holdings'])
own_df1['pct'] = abs(own_df1['pct'])

#correct holdings for incorrect firms with shares outstanding data
pt1 = own_df1[((own_df1['holdings'] != 0) & (own_df1['pct'] == 0))].copy()
pt2 = own_df1[((own_df1['holdings'] != 0) & (own_df1['pct'].isna()))].copy()
pt3 = own_df1[((own_df1['holdings'] == 0) & (own_df1['pct'] != 0))].copy()
pt4 = own_df1[((own_df1['holdings'] == 0) & (own_df1['pct'].isna()))].copy()
wrong_df = pd.concat([pt1, pt2, pt3, pt4])
wrong_df['pct'] = wrong_df['holdings']/wrong_df['cshoc'] * 100
own_df1 = pd.concat([wrong_df, own_df1])
own_df1 = own_df1.drop_duplicates(['ticker', 'asof_date', 'ciq'], keep='first')

#make sure dates are correct, i.e. exclude incorrect dates
own_df1['date_diff'] = abs(own_df1['asof_date'] - own_df1['date'])
own_df1 = own_df1[own_df1['date_diff'] < pd.Timedelta(days=365)]

In [46]:
own_df1 = change_orientation(own_df1)

In [47]:
own_df1 = own_df1[['sedol', 'comn', 'ticker', 'asof_date', 'cshoc', 'name', 'holdings', 'pct', 'short_dummy', 'ciq', 'holder_type',
                   'date', 'date_diff', 'mod_orientation', 'orientation']]
own_df1 = own_df1.sort_values(['sedol', 'asof_date', 'pct'], ascending=[True, True, False]).reset_index(drop=True)

In [48]:
own_df1.to_pickle(PATH+'checkpoint_data/ownership_jan03_v2.pkl')