In [16]:
import sklearn
from sklearn.ensemble import RandomForestRegressor

from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection

import statsmodels as sms
import statsmodels.formula.api as smf

import seaborn as sns # for data visualization
sns.set_style("whitegrid")

from dateutil.relativedelta import *
from pandas.tseries.offsets import *

pd.set_option('display.max_columns', None)

In [17]:
%%time
mdf2 = pd.read_csv("~/misp_data/lagged_comp_fundr_for_val_1976-2019.csv", index_col=0)
mdf2 = mdf2.replace([np.inf, -np.inf], np.nan)
# mdf2 = mdf2.dropna(thresh=int(mdf2.shape[1]/2))
mdf2 = mdf2.fillna(0)

CPU times: user 11.6 s, sys: 1.57 s, total: 13.2 s
Wall time: 13.2 s


In [18]:
mdf2.shape

(380884, 215)

In [19]:
# train val split: 1975-2005 train, 2005-2010 val, 2010-2015 test

dftr = mdf2.loc[(1970 <= mdf2['year']) & (mdf2['year'] <= 2009)]
dfts = mdf2.loc[(2010 <= mdf2['year']) & (mdf2['year'] <= 2015)]

# x_vars=['at', 'pstkl', 'txditc', 'pstkrv', 'seq', 'pstk', 'ni', 'epspi', 'revt', 'capx', 'ajex', 'sic2', 'sic', 'naics', 'sale', 'cogs', 'xsga', 'xrd', 'xad', 'ib', 'ebitda', 'ebit', 'nopi', 'spi', 'pi', 'txp', 'txfed', 'txfo', 'txt', 'xint', 'oancf', 'dvt', 'ob', 'gdwlia', 'gdwlip', 'gwo', 'rect', 'act', 'che', 'ppegt', 'invt', 'aco', 'intan', 'ao', 'ppent', 'gdwl', 'fatb', 'fatl', 'lct', 'dlc', 'dltt', 'lt', 'dm', 'dcvt', 'cshrc', 'dcpstk', 'ap', 'lco', 'lo', 'drc', 'drlt', 'txdi', 'ceq', 'scstkc', 'emp', 'csho', 'prcc_f', 'mve_f', 'am', 'txdb', 'dvc', 'dvp', 'dp', 'dvpsx_f', 'mib', 'ivao', 'ivst', 'sstk', 'prstkc', 'dv', 'dltis', 'dltr', 'dlcch', 'oibdp', 'dvpa', 'tstkp', 'oiadp', 'xpp', 'xacc', 're', 'ppenb', 'ppenls', 'capxv', 'fopt', 'wcap', 'be', 'ni_-5', 'at_-5', 'epspi_-5', 'revt_-5', 'capx_-5', 'naics_-5', 'cogs_-5', 'xsga_-5', 'xrd_-5', 'xad_-5', 'ib_-5', 'ebitda_-5', 'ebit_-5', 'nopi_-5', 'pi_-5', 'dvt_-5', 'be_-5', 'ni_-4', 'at_-4', 'epspi_-4', 'revt_-4', 'capx_-4', 'naics_-4', 'cogs_-4', 'xsga_-4', 'xrd_-4', 'xad_-4', 'ib_-4', 'ebitda_-4', 'ebit_-4', 'nopi_-4', 'pi_-4', 'dvt_-4', 'be_-4', 'ni_-3', 'at_-3', 'epspi_-3', 'revt_-3', 'capx_-3', 'naics_-3', 'cogs_-3', 'xsga_-3', 'xrd_-3', 'xad_-3', 'ib_-3', 'ebitda_-3', 'ebit_-3', 'nopi_-3', 'pi_-3', 'dvt_-3', 'be_-3', 'ni_-2', 'at_-2', 'epspi_-2', 'revt_-2', 'capx_-2', 'naics_-2', 'cogs_-2', 'xsga_-2', 'xrd_-2', 'xad_-2', 'ib_-2', 'ebitda_-2', 'ebit_-2', 'nopi_-2', 'pi_-2', 'dvt_-2', 'be_-2', 'ni_-1', 'at_-1', 'epspi_-1', 'revt_-1', 'capx_-1', 'naics_-1', 'cogs_-1', 'xsga_-1', 'xrd_-1', 'xad_-1', 'ib_-1', 'ebitda_-1', 'ebit_-1', 'nopi_-1', 'pi_-1', 'dvt_-1', 'be_-1', 'at_yoy1', 'ni_yoy1', 'epspi_yoy1', 'revt_yoy1', 'capx_yoy1', 'naics_yoy1', 'cogs_yoy1', 'xsga_yoy1', 'xrd_yoy1', 'xad_yoy1', 'ib_yoy1', 'ebitda_yoy1', 'ebit_yoy1', 'nopi_yoy1', 'pi_yoy1', 'dvt_yoy1', 'be_yoy1', 'at_yoy2', 'ni_yoy2', 'epspi_yoy2', 'revt_yoy2', 'capx_yoy2', 'naics_yoy2', 'cogs_yoy2', 'xsga_yoy2', 'xrd_yoy2', 'xad_yoy2', 'ib_yoy2', 'ebitda_yoy2', 'ebit_yoy2', 'nopi_yoy2', 'pi_yoy2', 'dvt_yoy2', 'be_yoy2', 'at_yoy3', 'ni_yoy3', 'epspi_yoy3', 'revt_yoy3', 'capx_yoy3', 'naics_yoy3', 'cogs_yoy3', 'xsga_yoy3', 'xrd_yoy3', 'xad_yoy3', 'ib_yoy3', 'ebitda_yoy3', 'ebit_yoy3', 'nopi_yoy3', 'pi_yoy3', 'dvt_yoy3', 'be_yoy3', 'at_yoy4', 'ni_yoy4', 'epspi_yoy4', 'revt_yoy4', 'capx_yoy4', 'naics_yoy4', 'cogs_yoy4', 'xsga_yoy4', 'xrd_yoy4', 'xad_yoy4', 'ib_yoy4', 'ebitda_yoy4', 'ebit_yoy4', 'nopi_yoy4', 'pi_yoy4', 'dvt_yoy4', 'be_yoy4', 'at_yoy5', 'ni_yoy5', 'epspi_yoy5', 'revt_yoy5', 'capx_yoy5', 'naics_yoy5', 'cogs_yoy5', 'xsga_yoy5', 'xrd_yoy5', 'xad_yoy5', 'ib_yoy5', 'ebitda_yoy5', 'ebit_yoy5', 'nopi_yoy5', 'pi_yoy5', 'dvt_yoy5', 'be_yoy5', 'CAPEI', 'bm', 'evm', 'pe_op_basic', 'pe_op_dil', 'pe_exi', 'pe_inc', 'ps_y', 'pcf', 'dpr', 'npm', 'opmbd', 'opmad', 'gpm', 'ptpm', 'cfm', 'roa', 'roe', 'roce', 'efftax', 'aftret_eq', 'aftret_invcapx', 'aftret_equity', 'pretret_noa', 'pretret_earnat', 'GProf', 'equity_invcap', 'debt_invcap', 'totdebt_invcap', 'capital_ratio', 'int_debt', 'int_totdebt', 'cash_lt', 'invt_act', 'rect_act', 'debt_at', 'debt_ebitda', 'short_debt', 'curr_debt', 'lt_debt', 'profit_lct', 'ocf_lct', 'cash_debt', 'fcf_ocf', 'lt_ppent', 'dltt_be', 'debt_assets', 'debt_capital', 'de_ratio', 'intcov', 'intcov_ratio', 'cash_ratio', 'quick_ratio', 'curr_ratio', 'cash_conversion', 'inv_turn', 'at_turn', 'rect_turn', 'pay_turn', 'sale_invcap', 'sale_equity', 'sale_nwc', 'rd_sale', 'adv_sale', 'staff_sale', 'accrual', 'ptb', 'PEG_trailing', 'PEG_1yrforward', 'PEG_ltgforward']

x_vars = ['siccd',
 'sic2',
 'sic',
 'me',
 'at',
 'pstkl',
 'txditc',
 'pstkrv',
 'seq',
 'pstk',
 'ni',
 'epspi',
 'revt',
 'capx',
 'ajex',
 'naics',
 'sale',
 'cogs',
 'xsga',
 'xrd',
 'xad',
 'ib',
 'ebitda',
 'ebit',
 'nopi',
 'spi',
 'pi',
 'txp',
 'txfed',
 'txfo',
 'txt',
 'xint',
 'oancf',
 'dvt',
 'ob',
 'gdwlia',
 'gdwlip',
 'gwo',
 'rect',
 'act',
 'che',
 'ppegt',
 'invt',
 'aco',
 'intan',
 'ao',
 'ppent',
 'gdwl',
 'fatb',
 'fatl',
 'lct',
 'dlc',
 'dltt',
 'lt',
 'dm',
 'dcvt',
 'cshrc',
 'dcpstk',
 'ap',
 'lco',
 'lo',
 'drc',
 'drlt',
 'txdi',
 'ceq',
 'scstkc',
 'emp',
 'csho',
 'prcc_f',
 'mve_f',
 'am',
 'txdb',
 'dvc',
 'dvp',
 'dp',
 'dvpsx_f',
 'mib',
 'ivao',
 'ivst',
 'sstk',
 'prstkc',
 'dv',
 'dltis',
 'dltr',
 'dlcch',
 'oibdp',
 'dvpa',
 'tstkp',
 'oiadp',
 'xpp',
 'xacc',
 're',
 'ppenb',
 'ppenls',
 'capxv',
 'fopt',
 'wcap',
 'be',
 'at_-5',
 'revt_-5',
 'capx_-5',
 'ni_-5',
 'at_-4',
 'revt_-4',
 'capx_-4',
 'ni_-4',
 'at_-3',
 'revt_-3',
 'capx_-3',
 'ni_-3',
 'at_-2',
 'revt_-2',
 'capx_-2',
 'ni_-2',
 'at_-1',
 'revt_-1',
 'capx_-1',
 'ni_-1',
 'ni',
 'ni_yoy1',
 'revt_yoy1',
 'at_yoy1',
 'capx_yoy1',
 'ni_yoy3',
 'revt_yoy3',
 'at_yoy3',
 'capx_yoy3',
 'ni_yoy2',
 'revt_yoy2',
 'at_yoy2',
 'capx_yoy2',
 'CAPEI',
 'bm',
 'evm',
 'pe_op_basic',
 'pe_op_dil',
 'pe_exi',
 'pe_inc',
 'ps_y',
 'pcf',
 'dpr',
 'npm',
 'opmbd',
 'opmad',
 'gpm',
 'ptpm',
 'cfm',
 'roa',
 'roe',
 'roce',
 'efftax',
 'aftret_eq',
 'aftret_invcapx',
 'aftret_equity',
 'pretret_noa',
 'pretret_earnat',
 'GProf',
 'equity_invcap',
 'debt_invcap',
 'totdebt_invcap',
 'capital_ratio',
 'int_debt',
 'int_totdebt',
 'cash_lt',
 'invt_act',
 'rect_act',
 'debt_at',
 'debt_ebitda',
 'short_debt',
 'curr_debt',
 'lt_debt',
 'profit_lct',
 'ocf_lct',
 'cash_debt',
 'fcf_ocf',
 'lt_ppent',
 'dltt_be',
 'debt_assets',
 'debt_capital',
 'de_ratio',
 'intcov',
 'intcov_ratio',
 'cash_ratio',
 'quick_ratio',
 'curr_ratio',
 'cash_conversion',
 'inv_turn',
 'at_turn',
 'rect_turn',
 'pay_turn',
 'sale_invcap',
 'sale_equity',
 'sale_nwc',
 'rd_sale',
 'adv_sale',
 'staff_sale',
 'accrual',
 'ptb',
 'PEG_trailing',
 'PEG_1yrforward',
 'PEG_ltgforward']

In [20]:
X_tr = dftr[x_vars].astype(float)
y_tr1 = dftr['ni_1'].astype(float)
y_tr2 = dftr['ni_2'].astype(float)
y_tr3 = dftr['ni_3'].astype(float)
y_tr4 = dftr['ni_4'].astype(float)
y_tr5 = dftr['ni_5'].astype(float)

X_ts = dfts[x_vars].astype(float)
y_ts1 = dfts['ni_1'].astype(float)
y_ts2 = dfts['ni_2'].astype(float)
y_ts3 = dfts['ni_3'].astype(float)
y_ts4 = dfts['ni_4'].astype(float)
y_ts5 = dfts['ni_5'].astype(float)

X_tr1 = X_tr[~(y_tr1==0)]
y_tr1 = y_tr1[~(y_tr1==0)]
print(1, X_tr1.shape, y_tr1.shape)

X_tr2 = X_tr[~(y_tr2==0)]
y_tr2 = y_tr2[~(y_tr2==0)]
print(2, X_tr2.shape, y_tr2.shape)

X_tr3 = X_tr[~(y_tr3==0)]
y_tr3 = y_tr3[~(y_tr3==0)]
print(3, X_tr3.shape, y_tr3.shape)

X_tr4 = X_tr[~(y_tr4==0)]
y_tr4 = y_tr4[~(y_tr4==0)]
print(4, X_tr4.shape, y_tr4.shape)

X_tr5 = X_tr[~(y_tr5==0)]
y_tr5 = y_tr5[~(y_tr5==0)]
print(5, X_tr5.shape, y_tr5.shape)

X_ts1 = X_ts
print(1, X_ts1.shape, y_ts1.shape)

X_ts2 = X_ts
print(2, X_ts2.shape, y_ts2.shape)

X_ts3 = X_ts
print(3, X_ts3.shape, y_ts3.shape)

X_ts4 = X_ts
print(4, X_ts4.shape, y_ts4.shape)

X_ts5 = X_ts
print(5, X_ts5.shape, y_ts5.shape)

1 (239155, 201) (239155,)
2 (227659, 201) (227659,)
3 (214968, 201) (214968,)
4 (201398, 201) (201398,)
5 (186951, 201) (186951,)
1 (45143, 201) (45143,)
2 (45143, 201) (45143,)
3 (45143, 201) (45143,)
4 (45143, 201) (45143,)
5 (45143, 201) (45143,)


In [23]:
import pickle 

prefixpath = ''
with open(f"{prefixpath}RF_regr1.pickle", "rb") as input_file:
    regr1 = pickle.load(input_file)
with open(f"{prefixpath}RF_regr2.pickle", "rb") as input_file:
    regr2 = pickle.load(input_file)
with open(f"{prefixpath}RF_regr3.pickle", "rb") as input_file:
    regr3 = pickle.load(input_file)
with open(f"{prefixpath}RF_regr4.pickle", "rb") as input_file:
    regr4 = pickle.load(input_file)
with open(f"{prefixpath}RF_regr5.pickle", "rb") as input_file:
    regr5 = pickle.load(input_file)



In [24]:
X_all = mdf2[x_vars].astype(float)
X_all.head(3)

Unnamed: 0,siccd,sic2,sic,me,at,pstkl,txditc,pstkrv,seq,pstk,ni,epspi,revt,capx,ajex,naics,sale,cogs,xsga,xrd,xad,ib,ebitda,ebit,nopi,spi,pi,txp,txfed,txfo,txt,xint,oancf,dvt,ob,gdwlia,gdwlip,gwo,rect,act,che,ppegt,invt,aco,intan,ao,ppent,gdwl,fatb,fatl,lct,dlc,dltt,lt,dm,dcvt,cshrc,dcpstk,ap,lco,lo,drc,drlt,txdi,ceq,scstkc,emp,csho,prcc_f,mve_f,am,txdb,dvc,dvp,dp,dvpsx_f,mib,ivao,ivst,sstk,prstkc,dv,dltis,dltr,dlcch,oibdp,dvpa,tstkp,oiadp,xpp,xacc,re,ppenb,ppenls,capxv,fopt,wcap,be,at_-5,revt_-5,capx_-5,ni_-5,at_-4,revt_-4,capx_-4,ni_-4,at_-3,revt_-3,capx_-3,ni_-3,at_-2,revt_-2,capx_-2,ni_-2,at_-1,revt_-1,capx_-1,ni_-1,ni.1,ni_yoy1,revt_yoy1,at_yoy1,capx_yoy1,ni_yoy3,revt_yoy3,at_yoy3,capx_yoy3,ni_yoy2,revt_yoy2,at_yoy2,capx_yoy2,CAPEI,bm,evm,pe_op_basic,pe_op_dil,pe_exi,pe_inc,ps_y,pcf,dpr,npm,opmbd,opmad,gpm,ptpm,cfm,roa,roe,roce,efftax,aftret_eq,aftret_invcapx,aftret_equity,pretret_noa,pretret_earnat,GProf,equity_invcap,debt_invcap,totdebt_invcap,capital_ratio,int_debt,int_totdebt,cash_lt,invt_act,rect_act,debt_at,debt_ebitda,short_debt,curr_debt,lt_debt,profit_lct,ocf_lct,cash_debt,fcf_ocf,lt_ppent,dltt_be,debt_assets,debt_capital,de_ratio,intcov,intcov_ratio,cash_ratio,quick_ratio,curr_ratio,cash_conversion,inv_turn,at_turn,rect_turn,pay_turn,sale_invcap,sale_equity,sale_nwc,rd_sale,adv_sale,staff_sale,accrual,ptb,PEG_trailing,PEG_1yrforward,PEG_ltgforward
2,3990.0,3942.0,3942.0,3002.34375,2.115,0.0,0.0,0.0,0.418,0.0,-0.73,-0.2,1.026,0.24,1.0,339931.0,1.026,0.511,1.1,0.039,0.113,-0.73,-0.585,-0.686,0.024,0.0,-0.73,0.0,0.0,0.0,0.0,0.068,0.0,0.0,0.0,0.0,0.0,0.0,0.7,1.63,0.348,0.328,0.512,0.07,0.252,0.044,0.189,0.0,0.0,0.0,1.639,0.968,0.058,1.697,0.0,0.0,0.0,0.0,0.41,0.261,0.0,0.0,0.0,0.0,0.418,0.0,0.053,3.843,0.75,2.88225,0.05,0.0,0.0,0.0,0.101,0.0,0.0,0.0,0.0,1.743,0.0,0.0,0.057,0.0,-0.372,-0.585,0.0,0.0,-0.686,0.07,0.0,-1.599,0.0,0.0,0.24,-0.507,-0.009,0.418,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.72,0.354,0.026,-0.511,-0.73,0.3,0.654971,0.659574,0.891667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.145,-6.681,0.0,0.0,-1.875,-1.875,1.541,-1.172,0.0,-0.712,-0.57,-0.669,0.502,-0.712,-0.567,-0.413,0.0,-0.808,0.0,-19.467,-0.102,-19.467,3.049,-0.609,0.243,0.878,0.122,2.155,0.122,2.345,0.084,0.205,0.314,0.429,0.485,-1.754,0.943,0.966,0.034,-0.357,-0.823,-0.795,0.0,8.979,0.139,0.802,0.775,4.06,-9.735,-10.088,0.212,0.682,0.995,148.069,1.237,0.724,2.842,1.839,2.155,2.455,0.0,0.038,0.11,0.0,-0.437,3.784,0.0,0.0,0.0
3,0.0,3942.0,3942.0,0.0,0.72,0.0,0.0,0.0,-0.343,0.0,-0.511,-0.18,0.354,0.026,1.0,339931.0,0.354,0.361,0.366,0.0,0.067,-0.511,-0.373,-0.419,0.001,0.0,-0.511,0.0,0.0,0.0,0.0,0.093,0.0,0.0,0.0,0.0,0.0,0.0,0.022,0.388,0.03,0.086,0.314,0.022,0.113,0.174,0.045,0.0,0.0,0.0,1.063,0.596,0.0,1.063,0.0,0.0,0.0,0.0,0.361,0.106,0.0,0.0,0.0,0.0,-0.343,0.0,0.005,2.892,0.0,0.0,0.022,0.0,0.0,0.0,0.046,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,-0.201,-0.373,0.0,0.0,-0.419,0.022,0.106,-0.869,0.0,0.0,0.026,-0.442,-0.675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,3942.0,3942.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,339931.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
ni_1_pred = regr1.predict(X_all)
ni_2_pred = regr2.predict(X_all)
ni_3_pred = regr3.predict(X_all)
ni_4_pred = regr4.predict(X_all)
ni_5_pred = regr5.predict(X_all)

[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   3 out of 100 | elapsed:    0.1s remaining:    3.8s
[Parallel(n_jobs=100)]: Done  54 out of 100 | elapsed:    0.2s remaining:    0.2s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.2s finished
[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.0s remaining:    2.1s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.0s remaining:    2.3s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.0s remaining:    1.7s
[Parallel(n_jobs=100)]: Done 100 out o

In [26]:
mdf_rf = mdf2[['ticker', 'permno', 'year', 'jdate']].copy()
mdf_rf['ni_1'] = ni_1_pred
mdf_rf['ni_2'] = ni_2_pred
mdf_rf['ni_3'] = ni_3_pred
mdf_rf['ni_4'] = ni_4_pred
mdf_rf['ni_5'] = ni_5_pred
for i in range(6,11):
    mdf_rf[f'ni_{i}'] = ni_5_pred
mdf_rf.to_csv('RF_ni_pred_for_val.csv')

In [27]:
print(np.asarray(list(X_tr1.keys()))[(regr1.feature_importances_.argsort()[-15:][::-1])])
regr1.score(X_ts1, y_ts1)
sns.set(rc={'figure.figsize':(300,12)})
ax = sns.barplot(x=list(X_tr2.keys()), y=regr2.feature_importances_)

['ib' 'drlt' 'pi' 'intan' 'dltr' 'xint' 're' 'mve_f' 'dlcch' 'ni' 'ni'
 'am' 'dltt' 'spi' 'ebit']


[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   3 out of 100 | elapsed:    0.1s remaining:    1.6s
[Parallel(n_jobs=100)]: Done  54 out of 100 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished


0.4601909813158691

In [28]:
print(np.asarray(list(X_tr2.keys()))[(regr2.feature_importances_.argsort()[-15:][::-1])])
regr2.score(X_ts2, y_ts2)
# sns.set(rc={'figure.figsize':(300,12)})
# ax = sns.barplot(x=list(X_tr2.keys()), y=regr2.feature_importances_)

['pi' 'ib' 'drlt' 'oiadp' 'dltt' 'mve_f' 'me' 'ebit' 're' 'ppent' 'ni'
 'nopi' 'dltis' 'pcf' 'capx_yoy1']


[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.1s remaining:    2.7s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished


0.22869138046373938

In [29]:
print(np.asarray(list(X_tr3.keys()))[(regr3.feature_importances_.argsort()[-15:][::-1])])
regr3.score(X_ts3, y_ts3)
# sns.set(rc={'figure.figsize':(300,12)})
# ax = sns.barplot(x=list(X_tr2.keys()), y=regr2.feature_importances_)

['pi' 'ib' 'oiadp' 'drlt' 'mve_f' 'dltt' 'me' 'cash_conversion' 'ajex'
 'ni' 'ni' 're' 'xint' 'dlcch' 'at_yoy2']


[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.0s remaining:    2.1s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished


0.2315737812533707

In [30]:
print(np.asarray(list(X_tr4.keys()))[(regr4.feature_importances_.argsort()[-15:][::-1])])
regr4.score(X_ts4, y_ts4)
# sns.set(rc={'figure.figsize':(300,12)})
# ax = sns.barplot(x=list(X_tr2.keys()), y=regr2.feature_importances_)

['mve_f' 'ib' 'dltt' 'oiadp' 'drlt' 'ajex' 'dlcch' 'pi' 'me' 'nopi' 'dltr'
 'txfo' 'seq' 'ni_-4' 'pcf']


[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.0s remaining:    1.1s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished


0.40809137749590374

In [31]:
print(np.asarray(list(X_tr5.keys()))[(regr5.feature_importances_.argsort()[-15:][::-1])])
regr5.score(X_ts5, y_ts5)
# sns.set(rc={'figure.figsize':(300,12)})
# ax = sns.barplot(x=list(X_tr2.keys()), y=regr2.feature_importances_)

['mve_f' 'dltt' 'dltr' 'ajex' 'oiadp' 'ib' 'dm' 'lo' 'ni_-1' 'me' 'oancf'
 'evm' 'pi' 'ni_-3' 'capx_yoy1']


[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:    0.1s remaining:    2.5s
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:    0.1s finished


0.20864420033489695