In [1]:
"""
Collect all IBES' EA dates and process file into finalized/standardized 
format. 

"""
# Import Libraries
import pandas as pd
import numpy as np
import wrds
from datetime import datetime, timedelta
import datetime

"""
Connect to WRDS!

"""
conn = wrds.Connection()

Enter your WRDS username [clj585]:cindylu
Enter your password:········
WRDS recommends setting up a .pgpass file.
Create .pgpass file now [y/n]?: y
Created .pgpass file successfully.
Loading library list...
Done


In [2]:
"""
Get the table called act_epsus 

"""
collist = ['ticker', 'oftic', 'cusip', 'pdicity', 'anndats', 'anntims', "pends"]
ibes_raw1 = conn.get_table(library='ibes', table='act_epsus', columns = collist)

# Rename columns 
colmap = {'cusip' : "cusip8", "pdicity" : "q/a", "oftic" : "hticker",
          "ticker" : "ibes_id",  "anndats" : "ea_date", "anntims": "ea_time", "pends": "datadate"}
ibes_raw1 = ibes_raw1.rename(columns=colmap)
ibes_raw1

Unnamed: 0,ibes_id,hticker,cusip8,q/a,ea_date,ea_time,datadate
0,0000,,,ANN,2014-02-14,67260.0,2012-12-31
1,0000,,,QTR,2014-02-14,67260.0,2012-12-31
2,0000,,,QTR,2014-02-14,67260.0,2013-01-31
3,0000,,,QTR,2014-02-14,67260.0,2013-03-31
4,0000,,,QTR,2014-02-14,67260.0,2013-04-30
...,...,...,...,...,...,...,...
204467,ZYNX,ZYXI,98986M10,QTR,2021-04-29,57660.0,2021-03-31
204468,ZYNX,ZYXI,98986M10,QTR,2021-07-29,57660.0,2021-06-30
204469,ZYNX,ZYXI,98986M10,QTR,2021-11-02,57660.0,2021-09-30
204470,ZYNX,ZYXI,98986M10,ANN,2022-02-24,57660.0,2021-12-31


In [3]:
# Adjust column data types (adjust time stamps next step)
ibes_raw1[['hticker', 'cusip8', 'q/a', 'datadate', 'ibes_id',
           'ea_date']] = ibes_raw1[['hticker', 'cusip8', 'q/a', 'datadate', 'ibes_id',
                                    'ea_date']].astype("string")
ibes_raw1['ea_date'] = pd.to_datetime(ibes_raw1['ea_date'])
ibes_raw1['datadate'] = pd.to_datetime(ibes_raw1['datadate'])
ibes_raw1['q/a'] = np.where(ibes_raw1['q/a'] == "ANN", "A", "Q")
ibes_raw1

Unnamed: 0,ibes_id,hticker,cusip8,q/a,ea_date,ea_time,datadate
0,0000,,,A,2014-02-14,67260.0,2012-12-31
1,0000,,,Q,2014-02-14,67260.0,2012-12-31
2,0000,,,Q,2014-02-14,67260.0,2013-01-31
3,0000,,,Q,2014-02-14,67260.0,2013-03-31
4,0000,,,Q,2014-02-14,67260.0,2013-04-30
...,...,...,...,...,...,...,...
204467,ZYNX,ZYXI,98986M10,Q,2021-04-29,57660.0,2021-03-31
204468,ZYNX,ZYXI,98986M10,Q,2021-07-29,57660.0,2021-06-30
204469,ZYNX,ZYXI,98986M10,Q,2021-11-02,57660.0,2021-09-30
204470,ZYNX,ZYXI,98986M10,A,2022-02-24,57660.0,2021-12-31


In [4]:
# Adjust the number of seconds to hour, minutes, second!

ibes_raw1['ea_time'] = pd.to_datetime(ibes_raw1["ea_time"], unit='s').dt.strftime("%H:%M:%S")
ibes_raw1

Unnamed: 0,ibes_id,hticker,cusip8,q/a,ea_date,ea_time,datadate
0,0000,,,A,2014-02-14,18:41:00,2012-12-31
1,0000,,,Q,2014-02-14,18:41:00,2012-12-31
2,0000,,,Q,2014-02-14,18:41:00,2013-01-31
3,0000,,,Q,2014-02-14,18:41:00,2013-03-31
4,0000,,,Q,2014-02-14,18:41:00,2013-04-30
...,...,...,...,...,...,...,...
204467,ZYNX,ZYXI,98986M10,Q,2021-04-29,16:01:00,2021-03-31
204468,ZYNX,ZYXI,98986M10,Q,2021-07-29,16:01:00,2021-06-30
204469,ZYNX,ZYXI,98986M10,Q,2021-11-02,16:01:00,2021-09-30
204470,ZYNX,ZYXI,98986M10,A,2022-02-24,16:01:00,2021-12-31


In [6]:
# Adjust ticker format
ibes_raw1[['root', 'suffix']] = ibes_raw1['hticker'].str.split('[ /.]', n=1, expand=True)
ibes_raw1[['root', 'suffix']] = ibes_raw1[['root', 'suffix']].fillna('')
ibes_raw1

Unnamed: 0,ibes_id,hticker,cusip8,q/a,ea_date,ea_time,datadate,root,suffix
0,0000,,,A,2014-02-14,18:41:00,2012-12-31,,
1,0000,,,Q,2014-02-14,18:41:00,2012-12-31,,
2,0000,,,Q,2014-02-14,18:41:00,2013-01-31,,
3,0000,,,Q,2014-02-14,18:41:00,2013-03-31,,
4,0000,,,Q,2014-02-14,18:41:00,2013-04-30,,
...,...,...,...,...,...,...,...,...,...
204467,ZYNX,ZYXI,98986M10,Q,2021-04-29,16:01:00,2021-03-31,ZYXI,
204468,ZYNX,ZYXI,98986M10,Q,2021-07-29,16:01:00,2021-06-30,ZYXI,
204469,ZYNX,ZYXI,98986M10,Q,2021-11-02,16:01:00,2021-09-30,ZYXI,
204470,ZYNX,ZYXI,98986M10,A,2022-02-24,16:01:00,2021-12-31,ZYXI,


In [8]:
# Do a simple check with BRK 
ibes_raw1[ibes_raw1['hticker']=="BRK.A"]

Unnamed: 0,ibes_id,hticker,cusip8,q/a,ea_date,ea_time,datadate,root,suffix
152402,BKHT,BRK.A,08467010,Q,2009-11-06,17:05:00,2009-09-30,BRK,A
152403,BKHT,BRK.A,08467010,A,2010-02-27,08:00:00,2009-12-31,BRK,A
152404,BKHT,BRK.A,08467010,Q,2010-02-27,08:00:00,2009-12-31,BRK,A
152405,BKHT,BRK.A,08467010,Q,2010-05-07,17:15:00,2010-03-31,BRK,A
152406,BKHT,BRK.A,08467010,Q,2010-08-06,17:00:00,2010-06-30,BRK,A
...,...,...,...,...,...,...,...,...,...
152461,BKHT,BRK.A,08467010,Q,2021-05-01,08:05:00,2021-03-31,BRK,A
152462,BKHT,BRK.A,08467010,Q,2021-08-07,08:09:00,2021-06-30,BRK,A
152463,BKHT,BRK.A,08467010,Q,2021-11-06,08:31:00,2021-09-30,BRK,A
152464,BKHT,BRK.A,08467010,A,2022-02-28,06:32:00,2021-12-31,BRK,A


In [10]:
# Dedupe here 
ibes_raw1[['hticker', 'cusip8']] = ibes_raw1[['hticker', 'cusip8']].fillna('')
checkdupes = ibes_raw1.drop_duplicates(['ibes_id', "hticker", "cusip8", "q/a", 
                                        "ea_date", "ea_time", "datadate"])
print(len(checkdupes))
checkdupes

1203767


Unnamed: 0,ibes_id,hticker,cusip8,q/a,ea_date,ea_time,datadate,root,suffix
0,0000,,,A,2014-02-14,18:41:00,2012-12-31,,
1,0000,,,Q,2014-02-14,18:41:00,2012-12-31,,
2,0000,,,Q,2014-02-14,18:41:00,2013-01-31,,
3,0000,,,Q,2014-02-14,18:41:00,2013-03-31,,
4,0000,,,Q,2014-02-14,18:41:00,2013-04-30,,
...,...,...,...,...,...,...,...,...,...
204467,ZYNX,ZYXI,98986M10,Q,2021-04-29,16:01:00,2021-03-31,ZYXI,
204468,ZYNX,ZYXI,98986M10,Q,2021-07-29,16:01:00,2021-06-30,ZYXI,
204469,ZYNX,ZYXI,98986M10,Q,2021-11-02,16:01:00,2021-09-30,ZYXI,
204470,ZYNX,ZYXI,98986M10,A,2022-02-24,16:01:00,2021-12-31,ZYXI,


In [11]:
# Save as parquet file of the EA timestamps from IBES 
savepth = "C://Users/clj585/OneDrive - Northwestern University/data_feeds/IBES/Data"      

# Auf Wiedersehen, KIS! I figured some half of my storage problem out! 
checkdupes.to_parquet(savepth+"/ibes_eadates.parquet")          