In [1]:
import pandas as pd 
import numpy as np
import pycatch22 as tsfe

# Get Earnings Number to Homogenize Dates across Tickers

In [2]:
df_actual_est = pd.read_csv('ActualEarningsData.csv').sort_values(['TICKER', 'ANNDATS'], ascending=[True, False]).set_index('TICKER')
df_actual_est.head()

Unnamed: 0_level_0,CUSIP,OFTIC,CNAME,PENDS,MEASURE,PDICITY,ANNDATS,VALUE
TICKER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0001,26878510,EPEGQ,EP ENGR CORP,20191231,EPS,QTR,20200325,
000V,28249U10,EIGR,EIGER,20220630,EPS,QTR,20220804,-0.51
000V,28249U10,EIGR,EIGER,20220331,EPS,QTR,20220505,-0.64
000V,28249U10,EIGR,EIGER,20211231,EPS,QTR,20220310,-0.64
000V,28249U10,EIGR,EIGER,20210930,EPS,QTR,20211104,-0.65


In [3]:
df_actual_est['ERNUM'] = df_actual_est.groupby(
    'TICKER', 
    group_keys = False
).apply(
    lambda x: (x.ANNDATS != x.ANNDATS.shift(1)).cumsum()[::-1]
)
max_ernum = df_actual_est.ERNUM.max()
df_actual_est['ERNUM'] = df_actual_est.ERNUM.groupby(
    'TICKER', 
    group_keys=False
).apply(
    lambda x: x + (max_ernum - x.max())
)

# Try Homogenizing Dates for Estimates

In [4]:
actual_est = df_actual_est.set_index('ANNDATS', append=True)

In [5]:
df_estimates = pd.read_csv('InitialEstimatesData.csv')
df_estimates = df_estimates[df_estimates.FISCALP == 'QTR']
df_estimates.FPEDATS = df_estimates.FPEDATS.astype('int')
estimates = df_estimates.set_index(['TICKER', 'FPEDATS'])

In [6]:
estimates = estimates.loc[estimates.index.get_level_values(0).isin(actual_est.index.get_level_values(0))].sort_index()
estimates['ERNUM'] = 0

In [7]:
for idx, row in actual_est.iterrows():
    try:
        estimates.loc[
            (
                idx[0], 
                slice(idx[1])
            ),
            'ERNUM'
        ] = row.ERNUM
    except:
        pass

In [8]:
estimates = estimates.droplevel(1).set_index('ERNUM', append=True).sort_index()

# Time Series Feature Extraction for AAPL

In [9]:
ret, col = [], 'MEDEST'
for ticker, ernum in estimates.index.unique():
    ftrs = pd.DataFrame(tsfe.catch22_all(estimates.loc[(ticker, ernum), col])).set_index('names').T
    ftrs.columns.name = None
    ftrs['ticker'] = ticker
    ftrs['ernum'] = ernum
    ftrs.set_index(['ticker', 'ernum'], inplace=True)
    ret.append(ftrs)

In [10]:
all_ftrs = pd.concat(ret).add_prefix(col)
all_ftrs.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,MEDESTDN_HistogramMode_5,MEDESTDN_HistogramMode_10,MEDESTCO_f1ecac,MEDESTCO_FirstMin_ac,MEDESTCO_HistogramAMI_even_2_5,MEDESTCO_trev_1_num,MEDESTMD_hrv_classic_pnn40,MEDESTSB_BinaryStats_mean_longstretch1,MEDESTSB_TransitionMatrix_3ac_sumdiagcov,MEDESTPD_PeriodicityWang_th0_01,...,MEDESTFC_LocalSimple_mean1_tauresrat,MEDESTDN_OutlierInclude_p_001_mdrmd,MEDESTDN_OutlierInclude_n_001_mdrmd,MEDESTSP_Summaries_welch_rect_area_5_1,MEDESTSB_BinaryStats_diff_longstretch0,MEDESTSB_MotifThree_quantile_hh,MEDESTSC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1,MEDESTSC_FluctAnal_2_dfa_50_1_2_logi_prop_r1,MEDESTSP_Summaries_welch_rect_centroid,MEDESTFC_LocalSimple_mean3_stderr
ticker,ernum,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0045,12,0.442759,0.442759,1.812903,4.0,0.356548,0.077495,0.862745,8.0,0.074074,9.0,...,0.076923,0.615385,-0.769231,0.450123,4.0,1.966248,0.318182,0.5,0.589049,0.8945
00AI,5,-0.264912,-1.142641,5.798794,7.0,0.324564,0.141152,0.5,10.0,0.166667,7.0,...,0.052632,0.72549,-0.509804,0.724173,3.0,1.809463,0.285714,0.571429,0.098175,0.624293
00AO,5,-1.237429,0.051727,1.504236,6.0,0.412515,-1.382388,0.54902,10.0,0.008074,10.0,...,0.333333,-0.5,0.384615,0.321774,3.0,1.785271,0.5,0.727273,0.589049,1.064959
00Q7,5,0.244146,0.465983,1.416789,3.0,0.362395,-0.367887,0.818182,10.0,0.0625,3.0,...,0.142857,0.411765,-0.794118,0.380338,3.0,2.00742,0.461538,0.538462,0.687223,1.008803
01AF,5,0.794838,0.505333,0.775114,3.0,0.187127,-0.014208,0.784314,7.0,0.005867,6.0,...,0.5,-0.788462,-0.240385,0.03552,4.0,2.072794,0.636364,0.272727,1.178097,1.254924
