### Train Model

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os

In [2]:
from collections import Counter

In [3]:
from sklearn.compose import ColumnTransformer

In [4]:
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.feature_selection import VarianceThreshold

In [5]:
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

In [6]:
from sklearn.utils import compute_sample_weight
from sklearn.utils import class_weight

In [7]:
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, make_scorer, accuracy_score, f1_score
from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix
from sklearn.metrics import classification_report

In [8]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline, make_pipeline

In [9]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler

In [10]:
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, make_scorer, accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix
from sklearn.metrics import classification_report

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

In [12]:
# Models
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [13]:
# Set display and format settings

pd.set_option('display.max_rows', 800)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.0f}'.format

In [14]:
DATA_ROOT_DIR='/mnt/data/projects/MD7'
PROJ_ROOT_DIR='/home/priyesh/projects/MD7'

### Load Data

In [33]:
filepath=os.path.join(PROJ_ROOT_DIR,'pickle','yahoo_complete_flat.pkl')
df_complete_flat = pd.read_pickle(filepath)

In [34]:
filepath=os.path.join(PROJ_ROOT_DIR,'pickle','yahoo_complete.pkl')
df_complete = pd.read_pickle(filepath)

In [35]:
# Take a copy so that we can always refer back to original 

df = df_complete_flat.copy()

In [36]:
df.head()

Unnamed: 0,company,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,FinalScore
0,A,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,2407000000,9452000000,4704000000,1109000000,1,1,33,25,238000000,72000000,1021000000,156000000,0,155000000,1590000000,206000000,723000000,299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,2359000000,9627000000,4754000000,1948000000,1,2,32,24,308000000,83000000,921000000,119000000,0,119000000,147000000,222000000,469000000,717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,2729000000,10705000000,5316000000,2091000000,1,2,33,25,321000000,110000000,1485000000,189000000,0,188000000,749000000,236000000,788000000,696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,2769000000,10516000000,5227000000,1917000000,1,2,34,26,317000000,125000000,1312000000,291000000,0,291000000,338000000,250000000,1139000000,1372000000,1021000000,3
1,AA,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,1800000000,14640000000,8784000000,967000000,0,1,30,12,713000000,30000000,686000000,379000000,0,0,468000000,0,0,444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,2465000000,14864000000,9873000000,1759000000,1,1,42,16,653000000,25000000,394000000,353000000,0,0,167000000,0,0,-514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,1727000000,15003000000,8754000000,1803000000,0,1,27,11,664000000,39000000,920000000,390000000,0,0,-565000000,19000000,150000000,1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,1807000000,14783000000,8211000000,2246000000,0,1,26,12,617000000,40000000,822000000,480000000,0,0,495000000,72000000,500000000,768000000,342000000,3
2,AAL,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,33444000000,59995000000,60113000000,-10105000000,0,0,100,55,2318000000,94000000,3815000000,4268000000,0,0,2243000000,178000000,1097000000,1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,41021000000,62008000000,68875000000,-5474000000,0,0,120,66,2370000000,91000000,-6543000000,1958000000,0,0,4342000000,43000000,173000000,-10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,46152000000,66442000000,73782000000,-1669000000,0,0,118,69,2335000000,98000000,704000000,208000000,5000000,204000000,5983000000,0,18000000,-5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,43687000000,64716000000,70515000000,-6227000000,0,0,115,67,2298000000,78000000,2173000000,2906000000,0,360000000,-636000000,0,21000000,2631000000,-733000000,1
3,AAP,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,2764479000,11248525000,7699444000,1217841000,0,1,43,24,238371000,37438000,866909000,471648000,0,270129000,462939000,17185000,498435000,882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,3047483000,11839636000,8280124000,1526086000,0,1,46,25,250081000,45271000,969688000,267806000,0,267576000,266897000,56347000,469691000,285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,3371971000,12194209000,9065918000,1095169000,0,1,51,27,259933000,63067000,1112262000,289639000,0,289639000,287314000,160925000,906208000,1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,3651601000,12018482000,9340201000,676463000,0,1,57,30,283800000,50978000,722222000,425961000,0,424061000,424448000,336230000,618480000,620704000,296261000,2
4,AAPL,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,108047000000,338516000000,248028000000,57101000000,1,1,54,31,12547000000,6068000000,69391000000,10495000000,0,10495000000,-45896000000,14119000000,66897000000,90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,112436000000,323888000000,258549000000,38321000000,1,1,63,34,11056000000,6829000000,80674000000,7309000000,0,7309000000,4289000000,14081000000,72358000000,86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,124719000000,351002000000,287912000000,9355000000,1,1,66,35,11284000000,7906000000,104038000000,11085000000,0,11085000000,14545000000,14467000000,85971000000,93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,120069000000,352755000000,302083000000,-18577000000,0,0,70,34,11104000000,9038000000,122151000000,10708000000,0,10708000000,22354000000,14841000000,89402000000,110749000000,111443000000,3


In [37]:
# Rename final score to label

df.rename(columns={'FinalScore':'label'},inplace=True)

In [38]:
# Drop column company

df.drop('company',axis=1,inplace=True)

In [39]:
df.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,label
0,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,2407000000,9452000000,4704000000,1109000000,1,1,33,25,238000000,72000000,1021000000,156000000,0,155000000,1590000000,206000000,723000000,299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,2359000000,9627000000,4754000000,1948000000,1,2,32,24,308000000,83000000,921000000,119000000,0,119000000,147000000,222000000,469000000,717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,2729000000,10705000000,5316000000,2091000000,1,2,33,25,321000000,110000000,1485000000,189000000,0,188000000,749000000,236000000,788000000,696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,2769000000,10516000000,5227000000,1917000000,1,2,34,26,317000000,125000000,1312000000,291000000,0,291000000,338000000,250000000,1139000000,1372000000,1021000000,3
1,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,1800000000,14640000000,8784000000,967000000,0,1,30,12,713000000,30000000,686000000,379000000,0,0,468000000,0,0,444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,2465000000,14864000000,9873000000,1759000000,1,1,42,16,653000000,25000000,394000000,353000000,0,0,167000000,0,0,-514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,1727000000,15003000000,8754000000,1803000000,0,1,27,11,664000000,39000000,920000000,390000000,0,0,-565000000,19000000,150000000,1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,1807000000,14783000000,8211000000,2246000000,0,1,26,12,617000000,40000000,822000000,480000000,0,0,495000000,72000000,500000000,768000000,342000000,3
2,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,33444000000,59995000000,60113000000,-10105000000,0,0,100,55,2318000000,94000000,3815000000,4268000000,0,0,2243000000,178000000,1097000000,1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,41021000000,62008000000,68875000000,-5474000000,0,0,120,66,2370000000,91000000,-6543000000,1958000000,0,0,4342000000,43000000,173000000,-10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,46152000000,66442000000,73782000000,-1669000000,0,0,118,69,2335000000,98000000,704000000,208000000,5000000,204000000,5983000000,0,18000000,-5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,43687000000,64716000000,70515000000,-6227000000,0,0,115,67,2298000000,78000000,2173000000,2906000000,0,360000000,-636000000,0,21000000,2631000000,-733000000,1
3,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,2764479000,11248525000,7699444000,1217841000,0,1,43,24,238371000,37438000,866909000,471648000,0,270129000,462939000,17185000,498435000,882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,3047483000,11839636000,8280124000,1526086000,0,1,46,25,250081000,45271000,969688000,267806000,0,267576000,266897000,56347000,469691000,285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,3371971000,12194209000,9065918000,1095169000,0,1,51,27,259933000,63067000,1112262000,289639000,0,289639000,287314000,160925000,906208000,1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,3651601000,12018482000,9340201000,676463000,0,1,57,30,283800000,50978000,722222000,425961000,0,424061000,424448000,336230000,618480000,620704000,296261000,2
4,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,108047000000,338516000000,248028000000,57101000000,1,1,54,31,12547000000,6068000000,69391000000,10495000000,0,10495000000,-45896000000,14119000000,66897000000,90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,112436000000,323888000000,258549000000,38321000000,1,1,63,34,11056000000,6829000000,80674000000,7309000000,0,7309000000,4289000000,14081000000,72358000000,86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,124719000000,351002000000,287912000000,9355000000,1,1,66,35,11284000000,7906000000,104038000000,11085000000,0,11085000000,14545000000,14467000000,85971000000,93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,120069000000,352755000000,302083000000,-18577000000,0,0,70,34,11104000000,9038000000,122151000000,10708000000,0,10708000000,22354000000,14841000000,89402000000,110749000000,111443000000,3


In [40]:
df['label'].value_counts()

label
3    315
2    273
1     83
4     29
0     28
Name: count, dtype: int64

### Split Data into Independent and Dependent Columns

In [41]:
X = df.drop('label',axis=1)
y = df['label']

In [42]:
print(X.shape,y.shape)

(728, 260) (728,)


## Train Test Split

In [43]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=66,shuffle=True,stratify=y)

In [44]:
X_train.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
228,5333700000,2818300000,2515400000,790900000,1056000000,790900000,-13500000,400000,1200000,1459400000,4277700000,252800000,13900000,1459400000,0,1057600000,1,1,174900000,741800000,0,0,0,157400000,1366400000,2457200000,1266400000,76300000,0,0,0,0,0,1342700000,100400000,444300000,0,544700000,490200000,0,99400000,99400000,0,589600000,2633900000,2665600000,590600000,3799900000,1134300000,1912500000,2,4,18,15,148700000,5700000,842700000,246400000,0,246400000,239700000,498600000,0,595100000,596300000,5647300000,3079500000,2567800000,859100000,1140400000,859100000,-9100000,600000,1400000,1427400000,4506900000,273600000,9700000,1427400000,0,1142400000,1,1,245700000,769400000,0,0,0,140300000,1337500000,2499600000,1273700000,191400000,0,0,0,0,0,1465100000,133600000,479100000,0,612700000,516500000,0,102300000,102300000,0,618800000,2689600000,2733200000,650100000,3964700000,1231500000,1886900000,1,4,19,16,162400000,5700000,1101800000,168100000,0,168100000,281700000,803400000,52000000,754400000,933700000,6010900000,3233700000,2777200000,925000000,1217400000,925000000,-9600000,100000,0,1559800000,4793500000,282800000,9700000,1559800000,0,1217500000,1,1,236200000,900200000,0,0,0,188100000,1523600000,2856600000,1261500000,180900000,0,0,0,0,0,1442400000,150800000,531400000,0,682200000,486000000,0,88600000,88600000,0,574600000,2970900000,3042200000,636800000,4299000000,1256800000,2174400000,1,4,17,14,170700000,5600000,770100000,156600000,0,156600000,148500000,643700000,0,627100000,613500000,6980600000,3764800000,3215800000,1086900000,1453600000,1086900000,-13600000,700000,0,1762200000,5527000000,353100000,14300000,1762200000,0,1454300000,1,1,230100000,1013200000,0,0,0,165400000,1708000000,3124800000,1253000000,170800000,0,0,0,0,0,1423800000,293700000,496100000,0,789800000,508400000,3500000,83700000,83700000,0,595600000,3218700000,3163200000,802100000,4548600000,1385400000,2335000000,1,3,20,17,176600000,7200000,941000000,173800000,0,173800000,163000000,711300000,237800000,774900000,767200000
399,4200819000,1682234000,2518585000,1107835000,1402939000,1107835000,13023000,13023000,13023000,1115646000,2797880000,308127000,0,1115646000,0,1402939000,1,1,1331020000,540330000,225846000,0,134885000,0,360731000,2316309000,298640000,53973000,0,1331643000,2383748000,1052105000,0,2834043000,0,402837000,0,661097000,0,30505000,287469000,0,0,317974000,5022480000,4171281000,0,5150352000,979071000,1655212000,2,3,0,0,64814000,63356000,1113762000,110398000,0,101661000,326724000,0,707300000,628506000,1003364000,4598638000,1874758000,2723880000,1409594000,1633153000,1409594000,-6996000,0,-6996000,1090727000,2965485000,216563000,6996000,1090727000,0,1633153000,1,1,2061767000,666012000,177919000,0,155166000,0,333085000,3140955000,314656000,70475000,0,1331643000,2390689000,1059046000,0,3061761000,0,462886000,0,749988000,0,27432000,264436000,0,0,291868000,6432074000,5160860000,0,6202716000,1041856000,2390967000,3,4,0,0,60973000,70289000,1364163000,67272000,0,48722000,472487000,0,595918000,526068000,1296891000,5541352000,2432839000,3108513000,1377475000,1797467000,1377475000,0,0,3952000,1311046000,3743885000,423944000,0,1311046000,0,1797467000,1,1,3076189000,896658000,243492000,0,349865000,0,593357000,4682110000,313753000,80252000,0,1331643000,2404029000,1072386000,0,3122674000,0,645626000,0,965076000,0,29508000,243249000,0,0,272757000,7809549000,6566951000,0,7804784000,1237833000,3717034000,4,4,0,0,50155000,70483000,1155741000,57453000,0,43868000,992022000,0,13830000,-34821000,1098288000,6311050000,3136483000,3174567000,1191624000,1584721000,1191624000,0,0,-12757000,1589846000,4726329000,380340000,0,1589846000,0,1584721000,1,1,2669455000,1016203000,466551000,1688000,467392000,109823000,935631000,4764897000,516897000,134478000,0,1417941000,2638351000,1220410000,0,3528208000,0,886204000,0,1001978000,0,42286000,223800000,0,0,266086000,9001173000,7025041000,0,8293105000,1268064000,3762919000,3,4,0,0,61241000,64109000,887699000,212153000,0,188726000,161367000,0,771028000,706938000,675546000
249,2988100000,1026800000,1961300000,137000000,211300000,138400000,-92100000,0,7200000,1750000000,2776800000,-12000000,92100000,707700000,492600000,218500000,0,0,1086400000,30200000,0,0,0,48900000,0,1575300000,455200000,17200000,0,2976500000,4074200000,1097700000,0,4725900000,18400000,438300000,93800000,2001100000,2569700000,17700000,755300000,100900000,0,3518000000,-153500000,772000000,2588100000,6301200000,5519100000,-425800000,0,0,77,41,209700000,147000000,723400000,92300000,0,87600000,135300000,0,458600000,456900000,631100000,3316700000,1158600000,2158100000,-495100000,-358900000,-494100000,-91300000,0,-45200000,2517000000,3675600000,-1300000,91300000,762300000,560400000,-404100000,-2,-2,765200000,41800000,0,0,0,60800000,0,1291300000,399300000,36000000,0,3275100000,4530200000,1255100000,0,5141600000,24300000,578400000,200000,2314200000,3256800000,56400000,817100000,92000000,0,4130500000,-1190900000,-12900000,3281100000,6432900000,6444700000,-1022900000,0,0,100,51,202700000,191500000,764600000,81500000,0,66500000,482300000,0,541700000,581700000,683100000,3815700000,1372200000,2443500000,242300000,381800000,242800000,-126000000,0,-2200000,2061700000,3433900000,10800000,126000000,849700000,706300000,379600000,1,1,1255700000,63600000,0,0,0,109900000,0,1889800000,329200000,91200000,0,3540800000,4925500000,1384700000,0,5527300000,24100000,522500000,170000000,2436700000,4000900000,77700000,818600000,75300000,0,4897200000,-1474600000,81700000,4025000000,7417100000,7333900000,-546900000,0,0,98,54,199600000,207900000,829300000,253200000,0,51100000,635600000,0,526000000,-298100000,576100000,4091300000,1484500000,2606800000,352200000,514500000,352900000,-146300000,0,-11700000,2092300000,3576800000,3600000,146300000,797800000,794000000,502800000,2,2,774000000,60100000,0,0,0,271800000,0,1582600000,309700000,95000000,0,3536900000,4789100000,1252200000,0,5390900000,18200000,487600000,90200000,2459800000,3929400000,87100000,826500000,56200000,0,4843000000,-2422600000,-331800000,3947600000,6973500000,7302800000,-877200000,0,0,109,56,194600000,264400000,979700000,60100000,0,59700000,132000000,0,1294600000,1326700000,919600000
328,25282320000,23368919000,1913401000,287111000,727270000,289474000,-221020000,21460000,-55546000,1186131000,24555050000,161230000,221020000,1111347000,42861000,727270000,1,1,1163343000,3657166000,314258000,468217000,2310081000,0,3023003000,8345085000,3333750000,213705000,0,622255000,879108000,256853000,0,4625390000,375181000,8156924000,0,8532105000,2121284000,163821000,115818000,115818000,0,2537612000,2037037000,1887443000,2496465000,12970475000,11069717000,-187020000,0,0,56,19,771833000,61346000,1193066000,1005480000,0,1005480000,872454000,52004000,350323000,415772000,187586000,27266438000,25335625000,1930813000,53912000,656432000,56779000,-190483000,14559000,-205211000,1274381000,26610006000,203959000,173877000,1174694000,44143000,656432000,0,0,1393557000,3952443000,376542000,450781000,2389719000,0,3131783000,9134885000,4028159000,162242000,0,696853000,906723000,209870000,0,5262531000,160917000,8898566000,0,9059483000,2980323000,268925000,114657000,114657000,0,3512534000,2040922000,1811384000,3141240000,14397416000,12572017000,75402000,0,1,63,21,794581000,83084000,1257275000,983035000,0,983035000,921113000,50462000,214510000,65123000,274240000,29285000000,26926000000,2359000000,696000000,1065000000,698000000,-113000000,6000000,-8000000,1294000000,28220000000,246000000,130000000,1213000000,34000000,1065000000,4,4,1567000000,4139000000,680000000,677000000,3142000000,0,4414000000,10877000000,4465000000,239000000,0,715000000,897000000,182000000,0,5777000000,108000000,10575000000,0,10683000000,3211000000,334000000,111000000,111000000,0,3834000000,2688000000,2136000000,3319000000,16654000000,14517000000,194000000,0,1,60,19,876000000,102000000,1433000000,1159000000,0,1159000000,851000000,50000000,428000000,413000000,274000000,33478000000,30846000000,2632000000,996000000,1411000000,996000000,-158000000,5000000,-22000000,1221000000,32067000000,235000000,151000000,1154000000,33000000,1411000000,6,7,1478000000,5191000000,605000000,687000000,4918000000,0,6128000000,13908000000,4454000000,294000000,0,704000000,862000000,158000000,0,5809000000,419000000,13278000000,0,13697000000,2992000000,272000000,122000000,122000000,0,3568000000,3638000000,2451000000,3411000000,19717000000,17265000000,211000000,0,1,58,17,925000000,81000000,1651000000,1385000000,0,1385000000,858000000,48000000,696000000,888000000,266000000
233,2242447000,356577000,1885870000,427734000,518463000,427734000,0,0,22648000,1367407000,1723984000,113377000,0,959349000,408058000,518463000,7,7,972282000,322029000,22441000,0,11960000,182874000,34401000,1511586000,223426000,203781000,0,1065379000,1065379000,108903000,53085000,1878689000,0,298496000,0,1105526000,0,131853000,391399000,313000,0,523252000,1638090000,1761497000,0,3390275000,1628778000,406060000,1,1,0,0,68507000,162914000,747841000,103542000,0,103542000,414634000,0,201045000,155447000,644299000,2350822000,407887000,1942935000,307441000,400067000,307441000,0,0,-3670000,1542868000,1950755000,88956000,0,1101544000,441324000,400067000,5,5,1209889000,296183000,17096000,0,10802000,259506000,27898000,1793476000,529919000,347447000,0,1858966000,1858966000,225900000,62377000,2884444000,19275000,385870000,0,1288279000,707762000,59511000,390100000,602000,0,1157373000,1945531000,2232268000,727037000,4677920000,2445652000,505197000,1,1,24,15,95857000,201948000,660898000,59940000,0,59940000,747002000,0,100016000,-337243000,600958000,2603416000,493146000,2110270000,331241000,394025000,331241000,0,0,-7088000,1716245000,2209391000,55696000,0,1203618000,512627000,394025000,5,5,910607000,340536000,13081000,0,8974000,337902000,22055000,1611100000,436098000,472558000,0,2216553000,2216553000,237178000,158885000,3386180000,19275000,403583000,0,1391527000,646717000,75236000,523587000,2414000,0,1245540000,2187828000,2360213000,665992000,4997280000,2637067000,219573000,1,1,22,13,115424000,243279000,645196000,30651000,0,30651000,445335000,0,500000000,468280000,614545000,2695845000,539627000,2156218000,322160000,411701000,322160000,0,0,-26308000,1744517000,2284144000,63233000,0,1201149000,543368000,411701000,5,5,884566000,469979000,10164000,0,58201000,489314000,68365000,1912224000,395657000,516122000,0,2259282000,2259282000,200288000,224780000,3363970000,349772000,422997000,0,1839951000,272376000,67710000,627179000,2781000,0,967265000,2404106000,2468978000,622148000,5276194000,2807216000,72273000,1,1,20,11,115609000,249216000,442631000,33624000,0,33624000,-218116000,0,500023000,476508000,409007000


In [45]:
X_test.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
633,14383000000,5219000000,9164000000,5017000000,5975000000,5017000000,-170000000,0,-77000000,3189000000,8408000000,711000000,170000000,1645000000,1544000000,5898000000,5,5,5387000000,1074000000,909000000,916000000,176000000,299000000,2001000000,8761000000,3303000000,468000000,0,4362000000,4771000000,409000000,0,9257000000,500000000,909000000,0,2123000000,5303000000,1514000000,78000000,78000000,0,6988000000,39898000000,8907000000,5803000000,18018000000,9111000000,6638000000,3,4,39,32,1050000000,217000000,6649000000,847000000,0,0,1920000000,3008000000,2960000000,4730000000,5802000000,14461000000,5192000000,9269000000,5595000000,6116000000,5595000000,-190000000,0,91000000,3153000000,8345000000,422000000,190000000,1623000000,1530000000,6207000000,5,6,6568000000,1414000000,811000000,964000000,180000000,302000000,1955000000,10239000000,3269000000,569000000,0,4362000000,4636000000,274000000,0,9112000000,550000000,1073000000,0,2390000000,6248000000,1305000000,90000000,90000000,0,7774000000,42051000000,9187000000,6798000000,19351000000,10164000000,7849000000,3,4,42,35,992000000,224000000,6139000000,649000000,0,0,922000000,3426000000,2553000000,4547000000,5490000000,18344000000,5968000000,12376000000,7769000000,9156000000,7769000000,-184000000,0,-53000000,3220000000,9188000000,1150000000,184000000,1666000000,1554000000,9103000000,8,8,9739000000,1701000000,598000000,1067000000,245000000,335000000,1910000000,13685000000,5141000000,748000000,0,4362000000,4447000000,85000000,0,10991000000,500000000,1294000000,0,2569000000,7241000000,1367000000,87000000,87000000,0,8774000000,45919000000,13333000000,7741000000,24676000000,11343000000,11116000000,4,5,36,31,954000000,230000000,8756000000,2462000000,0,0,4095000000,3886000000,527000000,3137000000,6294000000,20028000000,6257000000,13771000000,8749000000,10397000000,8749000000,-214000000,0,-151000000,3374000000,9631000000,1283000000,214000000,1704000000,1670000000,10246000000,9,9,9067000000,1895000000,858000000,1546000000,353000000,302000000,2757000000,14021000000,6876000000,1135000000,0,4362000000,4514000000,152000000,0,13186000000,500000000,1686000000,497000000,2985000000,8235000000,1226000000,66000000,66000000,0,9645000000,50353000000,14577000000,8735000000,27207000000,12630000000,11036000000,3,4,37,32,979000000,289000000,8720000000,2797000000,0,0,3583000000,4297000000,3615000000,6718000000,5923000000
384,21076500000,9961200000,11115300000,6025400000,8885900000,6025400000,-1085200000,36700000,217400000,2229400000,12190600000,1992700000,1121900000,2229400000,0,9140000000,7,7,898500000,2224200000,0,0,0,435200000,50200000,3557900000,37421200000,6531700000,0,2677400000,2677400000,0,0,43952900000,680100000,2940900000,0,3621000000,46875900000,3906100000,1318100000,1318100000,0,52100100000,52930500000,-8210300000,47556000000,47510800000,55721100000,-63100000,0,0,120,100,1617900000,109600000,8122100000,2393700000,340800000,0,3071100000,3581900000,4976200000,4994800000,5728400000,19207800000,9455700000,9752100000,4730500000,7206500000,4730500000,-1200400000,17700000,134600000,2545600000,12001300000,1410200000,1218100000,2245000000,0,7358800000,6,6,3449100000,2110300000,0,0,0,683800000,51100000,6243200000,38785900000,7597700000,0,2773100000,2773100000,0,0,46383600000,2945100000,3236100000,0,6181200000,48518100000,3726800000,2025600000,2025600000,0,54270500000,53908100000,-7824900000,51463200000,52626800000,60451700000,62000000,1,1,117,97,1751400000,92400000,6265200000,1640800000,76300000,0,1545800000,3752900000,907800000,2249000000,4624400000,23222900000,10642700000,12580200000,7545200000,9872700000,7545200000,-1176600000,9200000,431800000,2707500000,13350200000,1582700000,1185800000,2377800000,0,10313700000,10,10,4709200000,1872400000,0,0,0,566900000,55600000,7148500000,38272600000,8185300000,0,2782500000,2782500000,0,0,46457900000,705500000,3314500000,0,4020000000,48643600000,3716100000,1827700000,1827700000,0,54187400000,57534700000,-4601000000,49349100000,53606400000,58207400000,3128500000,1,1,110,92,1868100000,139200000,9141500000,2040000000,196200000,0,2165700000,3918600000,845500000,5595600000,7101500000,23182600000,9975400000,13207200000,6177400000,10344600000,6177400000,-1163000000,44000000,-1356600000,2862600000,12838000000,1648000000,1207000000,2492200000,0,9032400000,8,8,2583800000,2115000000,0,0,0,725400000,52000000,5424200000,36339300000,8672100000,0,2900400000,2900400000,0,0,45011400000,661100000,3141000000,0,3802100000,48037900000,2601500000,1997500000,1997500000,0,52636900000,59543900000,-6003400000,48699000000,50435600000,56439000000,1622100000,1,1,114,96,1870600000,166700000,7386700000,1899200000,445900000,0,2678100000,4168200000,3896000000,6580200000,5487500000
611,4043000000,1940000000,2103000000,507000000,825000000,489000000,-155000000,7000000,10000000,1278000000,3218000000,191000000,162000000,1157000000,0,842000000,5,5,400000000,3380000000,805000000,24000000,0,30000000,1199000000,5362000000,816000000,30000000,0,970000000,1113000000,143000000,0,2091000000,0,752000000,122000000,1313000000,5755000000,50000000,859000000,815000000,0,6664000000,1785000000,-530000000,5755000000,7453000000,7977000000,4049000000,3,4,110,77,121000000,24000000,453000000,108000000,106000000,108000000,44000000,166000000,340000000,289000000,344000000,2160000000,1233000000,927000000,-255000000,74000000,-253000000,-185000000,7000000,-165000000,853000000,2086000000,-23000000,192000000,727000000,0,-84000000,-2,-2,1231000000,2694000000,1050000000,30000000,0,26000000,1347000000,5617000000,758000000,26000000,0,964000000,1095000000,131000000,0,1996000000,0,705000000,204000000,715000000,6575000000,98000000,1193000000,725000000,0,7866000000,1390000000,-975000000,6575000000,7613000000,8581000000,4902000000,5,7,117,86,126000000,20000000,374000000,69000000,0,69000000,60000000,138000000,128000000,-502000000,305000000,3134000000,1597000000,1537000000,308000000,616000000,313000000,-195000000,3000000,8000000,921000000,2518000000,116000000,198000000,797000000,0,627000000,3,3,396000000,2502000000,999000000,13000000,0,0,1216000000,4488000000,768000000,25000000,0,961000000,1180000000,219000000,0,2100000000,0,767000000,0,1159000000,5449000000,36000000,738000000,686000000,0,6223000000,1587000000,-801000000,5449000000,6588000000,7382000000,3329000000,2,3,117,82,124000000,32000000,568000000,57000000,0,57000000,93000000,109000000,25000000,1288000000,511000000,3567000000,1839000000,1728000000,357000000,679000000,356000000,-189000000,6000000,-4000000,1049000000,2888000000,130000000,195000000,930000000,0,681000000,4,4,562000000,2640000000,983000000,14000000,0,0,1193000000,4757000000,720000000,16000000,12000000,955000000,1162000000,207000000,0,2000000000,0,725000000,0,1136000000,5753000000,56000000,703000000,679000000,0,6525000000,1808000000,-913000000,5753000000,6757000000,7661000000,3621000000,3,4,118,85,119000000,45000000,442000000,52000000,0,52000000,45000000,135000000,351000000,196000000,390000000
528,3032000000,2152000000,880000000,225000000,510000000,225000000,-209000000,33000000,0,370000000,2522000000,76000000,209000000,305000000,0,510000000,1,1,102000000,35000000,217000000,47000000,154000000,15000000,418000000,570000000,579000000,9000000,0,1879000000,3002000000,1123000000,0,3590000000,21000000,311000000,46000000,378000000,4239000000,19000000,294000000,294000000,0,4600000000,-823000000,-818000000,4260000000,4160000000,4978000000,192000000,0,1,123,102,91000000,0,403000000,109000000,0,109000000,128000000,0,0,196000000,294000000,3263000000,2290000000,973000000,363000000,586000000,363000000,-70000000,0,-31000000,387000000,2677000000,153000000,70000000,358000000,0,586000000,1,1,312000000,309000000,194000000,54000000,171000000,13000000,419000000,1053000000,673000000,25000000,0,1879000000,2971000000,1092000000,0,3669000000,25000000,407000000,83000000,432000000,2259000000,37000000,326000000,326000000,0,2675000000,233000000,1615000000,2284000000,4722000000,3107000000,621000000,1,2,58,48,99000000,5000000,319000000,143000000,0,143000000,143000000,124000000,0,-34000000,176000000,3556000000,2745000000,811000000,324000000,478000000,324000000,-48000000,0,-14000000,333000000,3078000000,106000000,48000000,320000000,0,478000000,1,1,164000000,338000000,276000000,63000000,244000000,19000000,583000000,1104000000,732000000,36000000,0,1879000000,2940000000,1061000000,0,3708000000,25000000,459000000,86000000,484000000,2133000000,38000000,351000000,351000000,0,2572000000,365000000,1756000000,2158000000,4812000000,3056000000,620000000,1,2,55,44,109000000,4000000,310000000,141000000,0,141000000,141000000,192000000,0,317000000,169000000,3817000000,3041000000,776000000,258000000,414000000,258000000,-76000000,6000000,-12000000,350000000,3403000000,80000000,78000000,340000000,0,414000000,1,1,38000000,370000000,383000000,81000000,258000000,41000000,722000000,1171000000,787000000,61000000,0,1879000000,2910000000,1031000000,0,3758000000,39000000,457000000,145000000,496000000,2119000000,47000000,365000000,365000000,0,2565000000,431000000,1868000000,2158000000,4929000000,3061000000,675000000,0,2,53,43,117000000,5000000,219000000,128000000,0,128000000,128000000,192000000,0,217000000,91000000
7,4805239000,1196313000,3608926000,-674339000,-501543000,-674339000,75934000,85902000,13906000,4110469000,5306782000,262636000,9968000,2318700000,976695000,-401735000,-1,-1,3074273000,3145457000,0,0,0,341598000,0,6561443000,686867000,306809000,0,652088000,755000000,102912000,0,1748676000,38022000,3598564000,1081419000,5233764000,381374000,271164000,0,0,0,3884040000,-1420991000,-807685000,419396000,8310119000,9117804000,1327679000,1,1,-108,5,114162000,97547000,222727000,125452000,0,125452000,347155000,0,0,-854579000,97275000,3378199000,876042000,2502157000,-4584716000,-3438792000,-4584716000,-144571000,27117000,-1098575000,5940949000,6816991000,-97222000,171688000,2310176000,2752872000,-4510250000,-16,-16,6391257000,2181329000,0,0,0,309954000,0,8916386000,654262000,189164000,0,655801000,731687000,75886000,0,1575113000,56586000,4675298000,407895000,5139779000,2246467000,203470000,0,0,0,2449937000,-6005707000,2901783000,2303053000,10491499000,7589716000,3776607000,1,1,44,21,125876000,3001948000,-629732000,37371000,0,37371000,-79590000,0,0,-2940814000,-667103000,5991760000,1155833000,4835927000,-352034000,542166000,-352034000,-424865000,12734000,-417508000,4293761000,5449594000,51827000,437599000,2021656000,1425048000,137392000,0,0,8322476000,3715471000,0,0,0,333669000,0,12386380000,428621000,188563000,0,652602000,704910000,52308000,0,1322094000,63479000,5392075000,903728000,6359282000,2355020000,218459000,0,0,0,2573479000,-6357741000,4775713000,2418499000,13708474000,8932761000,6027098000,1,1,33,17,138319000,898830000,2189694000,25322000,0,25322000,1351955000,0,0,-1431159000,2164372000,8399000000,1499000000,6900000000,1893000000,1891000000,1893000000,162000000,186000000,-64000000,5009000000,6508000000,96000000,24000000,2466000000,1502000000,2013000000,2,2,9622000000,4783000000,0,0,0,456000000,0,14861000000,259000000,234000000,0,650000000,684000000,34000000,0,1177000000,59000000,6737000000,672000000,7978000000,2282000000,218000000,0,0,0,2500000000,-5965000000,5560000000,2341000000,16038000000,10478000000,6883000000,1,1,29,14,81000000,930000000,3430000000,25000000,0,25000000,28000000,0,1500000000,689000000,3405000000


## Prepare Data

### Class Imbalance

In [28]:
df = pd.concat([X_train,y_train],axis=1)
df.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,label
228,5333700000,2818300000,2515400000,790900000,1056000000,790900000,-13500000,400000,1200000,1459400000,4277700000,252800000,13900000,1459400000,0,1057600000,1,1,174900000,741800000,0,0,0,157400000,1366400000,2457200000,1266400000,76300000,0,0,0,0,0,1342700000,100400000,444300000,0,544700000,490200000,0,99400000,99400000,0,589600000,2633900000,2665600000,590600000,3799900000,1134300000,1912500000,2,4,18,15,148700000,5700000,842700000,246400000,0,246400000,239700000,498600000,0,595100000,596300000,5647300000,3079500000,2567800000,859100000,1140400000,859100000,-9100000,600000,1400000,1427400000,4506900000,273600000,9700000,1427400000,0,1142400000,1,1,245700000,769400000,0,0,0,140300000,1337500000,2499600000,1273700000,191400000,0,0,0,0,0,1465100000,133600000,479100000,0,612700000,516500000,0,102300000,102300000,0,618800000,2689600000,2733200000,650100000,3964700000,1231500000,1886900000,1,4,19,16,162400000,5700000,1101800000,168100000,0,168100000,281700000,803400000,52000000,754400000,933700000,6010900000,3233700000,2777200000,925000000,1217400000,925000000,-9600000,100000,0,1559800000,4793500000,282800000,9700000,1559800000,0,1217500000,1,1,236200000,900200000,0,0,0,188100000,1523600000,2856600000,1261500000,180900000,0,0,0,0,0,1442400000,150800000,531400000,0,682200000,486000000,0,88600000,88600000,0,574600000,2970900000,3042200000,636800000,4299000000,1256800000,2174400000,1,4,17,14,170700000,5600000,770100000,156600000,0,156600000,148500000,643700000,0,627100000,613500000,6980600000,3764800000,3215800000,1086900000,1453600000,1086900000,-13600000,700000,0,1762200000,5527000000,353100000,14300000,1762200000,0,1454300000,1,1,230100000,1013200000,0,0,0,165400000,1708000000,3124800000,1253000000,170800000,0,0,0,0,0,1423800000,293700000,496100000,0,789800000,508400000,3500000,83700000,83700000,0,595600000,3218700000,3163200000,802100000,4548600000,1385400000,2335000000,1,3,20,17,176600000,7200000,941000000,173800000,0,173800000,163000000,711300000,237800000,774900000,767200000,3
399,4200819000,1682234000,2518585000,1107835000,1402939000,1107835000,13023000,13023000,13023000,1115646000,2797880000,308127000,0,1115646000,0,1402939000,1,1,1331020000,540330000,225846000,0,134885000,0,360731000,2316309000,298640000,53973000,0,1331643000,2383748000,1052105000,0,2834043000,0,402837000,0,661097000,0,30505000,287469000,0,0,317974000,5022480000,4171281000,0,5150352000,979071000,1655212000,2,3,0,0,64814000,63356000,1113762000,110398000,0,101661000,326724000,0,707300000,628506000,1003364000,4598638000,1874758000,2723880000,1409594000,1633153000,1409594000,-6996000,0,-6996000,1090727000,2965485000,216563000,6996000,1090727000,0,1633153000,1,1,2061767000,666012000,177919000,0,155166000,0,333085000,3140955000,314656000,70475000,0,1331643000,2390689000,1059046000,0,3061761000,0,462886000,0,749988000,0,27432000,264436000,0,0,291868000,6432074000,5160860000,0,6202716000,1041856000,2390967000,3,4,0,0,60973000,70289000,1364163000,67272000,0,48722000,472487000,0,595918000,526068000,1296891000,5541352000,2432839000,3108513000,1377475000,1797467000,1377475000,0,0,3952000,1311046000,3743885000,423944000,0,1311046000,0,1797467000,1,1,3076189000,896658000,243492000,0,349865000,0,593357000,4682110000,313753000,80252000,0,1331643000,2404029000,1072386000,0,3122674000,0,645626000,0,965076000,0,29508000,243249000,0,0,272757000,7809549000,6566951000,0,7804784000,1237833000,3717034000,4,4,0,0,50155000,70483000,1155741000,57453000,0,43868000,992022000,0,13830000,-34821000,1098288000,6311050000,3136483000,3174567000,1191624000,1584721000,1191624000,0,0,-12757000,1589846000,4726329000,380340000,0,1589846000,0,1584721000,1,1,2669455000,1016203000,466551000,1688000,467392000,109823000,935631000,4764897000,516897000,134478000,0,1417941000,2638351000,1220410000,0,3528208000,0,886204000,0,1001978000,0,42286000,223800000,0,0,266086000,9001173000,7025041000,0,8293105000,1268064000,3762919000,3,4,0,0,61241000,64109000,887699000,212153000,0,188726000,161367000,0,771028000,706938000,675546000,3
249,2988100000,1026800000,1961300000,137000000,211300000,138400000,-92100000,0,7200000,1750000000,2776800000,-12000000,92100000,707700000,492600000,218500000,0,0,1086400000,30200000,0,0,0,48900000,0,1575300000,455200000,17200000,0,2976500000,4074200000,1097700000,0,4725900000,18400000,438300000,93800000,2001100000,2569700000,17700000,755300000,100900000,0,3518000000,-153500000,772000000,2588100000,6301200000,5519100000,-425800000,0,0,77,41,209700000,147000000,723400000,92300000,0,87600000,135300000,0,458600000,456900000,631100000,3316700000,1158600000,2158100000,-495100000,-358900000,-494100000,-91300000,0,-45200000,2517000000,3675600000,-1300000,91300000,762300000,560400000,-404100000,-2,-2,765200000,41800000,0,0,0,60800000,0,1291300000,399300000,36000000,0,3275100000,4530200000,1255100000,0,5141600000,24300000,578400000,200000,2314200000,3256800000,56400000,817100000,92000000,0,4130500000,-1190900000,-12900000,3281100000,6432900000,6444700000,-1022900000,0,0,100,51,202700000,191500000,764600000,81500000,0,66500000,482300000,0,541700000,581700000,683100000,3815700000,1372200000,2443500000,242300000,381800000,242800000,-126000000,0,-2200000,2061700000,3433900000,10800000,126000000,849700000,706300000,379600000,1,1,1255700000,63600000,0,0,0,109900000,0,1889800000,329200000,91200000,0,3540800000,4925500000,1384700000,0,5527300000,24100000,522500000,170000000,2436700000,4000900000,77700000,818600000,75300000,0,4897200000,-1474600000,81700000,4025000000,7417100000,7333900000,-546900000,0,0,98,54,199600000,207900000,829300000,253200000,0,51100000,635600000,0,526000000,-298100000,576100000,4091300000,1484500000,2606800000,352200000,514500000,352900000,-146300000,0,-11700000,2092300000,3576800000,3600000,146300000,797800000,794000000,502800000,2,2,774000000,60100000,0,0,0,271800000,0,1582600000,309700000,95000000,0,3536900000,4789100000,1252200000,0,5390900000,18200000,487600000,90200000,2459800000,3929400000,87100000,826500000,56200000,0,4843000000,-2422600000,-331800000,3947600000,6973500000,7302800000,-877200000,0,0,109,56,194600000,264400000,979700000,60100000,0,59700000,132000000,0,1294600000,1326700000,919600000,2
328,25282320000,23368919000,1913401000,287111000,727270000,289474000,-221020000,21460000,-55546000,1186131000,24555050000,161230000,221020000,1111347000,42861000,727270000,1,1,1163343000,3657166000,314258000,468217000,2310081000,0,3023003000,8345085000,3333750000,213705000,0,622255000,879108000,256853000,0,4625390000,375181000,8156924000,0,8532105000,2121284000,163821000,115818000,115818000,0,2537612000,2037037000,1887443000,2496465000,12970475000,11069717000,-187020000,0,0,56,19,771833000,61346000,1193066000,1005480000,0,1005480000,872454000,52004000,350323000,415772000,187586000,27266438000,25335625000,1930813000,53912000,656432000,56779000,-190483000,14559000,-205211000,1274381000,26610006000,203959000,173877000,1174694000,44143000,656432000,0,0,1393557000,3952443000,376542000,450781000,2389719000,0,3131783000,9134885000,4028159000,162242000,0,696853000,906723000,209870000,0,5262531000,160917000,8898566000,0,9059483000,2980323000,268925000,114657000,114657000,0,3512534000,2040922000,1811384000,3141240000,14397416000,12572017000,75402000,0,1,63,21,794581000,83084000,1257275000,983035000,0,983035000,921113000,50462000,214510000,65123000,274240000,29285000000,26926000000,2359000000,696000000,1065000000,698000000,-113000000,6000000,-8000000,1294000000,28220000000,246000000,130000000,1213000000,34000000,1065000000,4,4,1567000000,4139000000,680000000,677000000,3142000000,0,4414000000,10877000000,4465000000,239000000,0,715000000,897000000,182000000,0,5777000000,108000000,10575000000,0,10683000000,3211000000,334000000,111000000,111000000,0,3834000000,2688000000,2136000000,3319000000,16654000000,14517000000,194000000,0,1,60,19,876000000,102000000,1433000000,1159000000,0,1159000000,851000000,50000000,428000000,413000000,274000000,33478000000,30846000000,2632000000,996000000,1411000000,996000000,-158000000,5000000,-22000000,1221000000,32067000000,235000000,151000000,1154000000,33000000,1411000000,6,7,1478000000,5191000000,605000000,687000000,4918000000,0,6128000000,13908000000,4454000000,294000000,0,704000000,862000000,158000000,0,5809000000,419000000,13278000000,0,13697000000,2992000000,272000000,122000000,122000000,0,3568000000,3638000000,2451000000,3411000000,19717000000,17265000000,211000000,0,1,58,17,925000000,81000000,1651000000,1385000000,0,1385000000,858000000,48000000,696000000,888000000,266000000,3
233,2242447000,356577000,1885870000,427734000,518463000,427734000,0,0,22648000,1367407000,1723984000,113377000,0,959349000,408058000,518463000,7,7,972282000,322029000,22441000,0,11960000,182874000,34401000,1511586000,223426000,203781000,0,1065379000,1065379000,108903000,53085000,1878689000,0,298496000,0,1105526000,0,131853000,391399000,313000,0,523252000,1638090000,1761497000,0,3390275000,1628778000,406060000,1,1,0,0,68507000,162914000,747841000,103542000,0,103542000,414634000,0,201045000,155447000,644299000,2350822000,407887000,1942935000,307441000,400067000,307441000,0,0,-3670000,1542868000,1950755000,88956000,0,1101544000,441324000,400067000,5,5,1209889000,296183000,17096000,0,10802000,259506000,27898000,1793476000,529919000,347447000,0,1858966000,1858966000,225900000,62377000,2884444000,19275000,385870000,0,1288279000,707762000,59511000,390100000,602000,0,1157373000,1945531000,2232268000,727037000,4677920000,2445652000,505197000,1,1,24,15,95857000,201948000,660898000,59940000,0,59940000,747002000,0,100016000,-337243000,600958000,2603416000,493146000,2110270000,331241000,394025000,331241000,0,0,-7088000,1716245000,2209391000,55696000,0,1203618000,512627000,394025000,5,5,910607000,340536000,13081000,0,8974000,337902000,22055000,1611100000,436098000,472558000,0,2216553000,2216553000,237178000,158885000,3386180000,19275000,403583000,0,1391527000,646717000,75236000,523587000,2414000,0,1245540000,2187828000,2360213000,665992000,4997280000,2637067000,219573000,1,1,22,13,115424000,243279000,645196000,30651000,0,30651000,445335000,0,500000000,468280000,614545000,2695845000,539627000,2156218000,322160000,411701000,322160000,0,0,-26308000,1744517000,2284144000,63233000,0,1201149000,543368000,411701000,5,5,884566000,469979000,10164000,0,58201000,489314000,68365000,1912224000,395657000,516122000,0,2259282000,2259282000,200288000,224780000,3363970000,349772000,422997000,0,1839951000,272376000,67710000,627179000,2781000,0,967265000,2404106000,2468978000,622148000,5276194000,2807216000,72273000,1,1,20,11,115609000,249216000,442631000,33624000,0,33624000,-218116000,0,500023000,476508000,409007000,2


In [29]:
df = pd.concat([X_train,y_train],axis=1)

df_class_0 = df[df['label'] == 0]
df_class_1 = df[df['label'] == 1]
df_class_2 = df[df['label'] == 2]
df_class_3 = df[df['label'] == 3]
df_class_4 = df[df['label'] == 4]

df_class_0_over = df_class_0.sample(252, replace=True)
df_class_1_over = df_class_1.sample(252, replace=True)
df_class_2_over = df_class_2.sample(252, replace=True)
df_class_4_over = df_class_4.sample(252, replace=True)

df_temp = pd.concat([df_class_0_over,df_class_1_over,df_class_2_over,df_class_3,df_class_4_over],axis=0)

X_train = df_temp.drop('label',axis=1)
y_train = df_temp['label']

In [30]:
y_train.value_counts()

label
0    252
1    252
2    252
3    252
4    252
Name: count, dtype: int64

### One Hot Encoding

In [None]:
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
ct = ColumnTransformer(
    [("ohe", ohe, [0]),
    ],remainder='passthrough')    

In [None]:
#X_train = ct.fit_transform(X_train)
#X_test = ct.transform(X_test)

### Robust Scaler

In [54]:
robustScaler=RobustScaler()

In [55]:
l = X_train.shape[1]
ct1 = ColumnTransformer(
          [('robustScaler',robustScaler, slice(0,l))
          ],remainder='passthrough')   

In [56]:
# Apply StandardScaler to All Columns

X_train_robustScale = ct1.fit_transform(X_train)
X_test_robustScale = ct1.transform(X_test)

In [57]:
X_test_robustScale

array([[ 8.76428650e-01,  3.81611179e-01,  2.19186663e+00, ...,
         4.55612207e+00,  4.99056572e+00,  4.70436810e+00],
       [ 1.50441935e+00,  1.08057087e+00,  2.77031890e+00, ...,
         4.92978719e+00,  4.88236848e+00,  4.33376450e+00],
       [-9.36802448e-02, -1.01685355e-01,  9.86715726e-02, ...,
         2.15755703e-01, -1.30351366e-01, -4.12727319e-03],
       ...,
       [ 7.15966634e-01,  1.19226410e+00,  5.44420439e-02, ...,
         6.64286823e-01, -1.88278541e+00, -8.55196535e-01],
       [-4.10607503e-01, -2.98747711e-01, -5.06371779e-01, ...,
        -2.50993254e-01, -3.14082522e-01, -4.96847104e-01],
       [-4.40037454e-01, -3.74564983e-01, -4.46871798e-01, ...,
        -2.50993254e-01, -2.96408998e-01, -3.27986316e-01]])

### Outliers - IQR

In [None]:
x = list(X_train.columns)
x.sort
x

In [59]:
def cap_ext_outliers(train_X,test_X):

  df_train = train_X.copy()
  df_test = test_X.copy()
    
  cols = df_train.columns
  cols = list(cols)

  for feature in cols:
    IQR = X_train[feature].quantile(0.75)- X_train[feature].quantile(0.25)
    ext_lower_bound = df_train[feature].quantile(0.75) - (IQR*3)
    ext_upper_bound = df_train[feature].quantile(0.75) + (IQR*3) 
      
    df_train[feature] = np.where(df_train[feature] < ext_lower_bound, ext_lower_bound, df_train[feature])
    df_train[feature] = np.where(df_train[feature] > ext_upper_bound, ext_upper_bound, df_train[feature])  

    df_test[feature] = np.where(df_test[feature] < ext_lower_bound, ext_lower_bound, df_test[feature])
    df_test[feature] = np.where(df_test[feature] > ext_upper_bound, ext_upper_bound, df_test[feature])  

  return df_train,df_test


In [60]:
def cap_outliers(train_X,test_X):

  df_train = train_X.copy()
  df_test = test_X.copy()
    
  cols = df_train.columns
  cols = list(cols)

  for feature in cols:
    IQR = X_train[feature].quantile(0.75)- X_train[feature].quantile(0.25)
    ext_lower_bound = df_train[feature].quantile(0.75) - (IQR*1.5)
    ext_upper_bound = df_train[feature].quantile(0.75) + (IQR*1.5) 
      
    df_train[feature] = np.where(df_train[feature] < ext_lower_bound, ext_lower_bound, df_train[feature])
    df_train[feature] = np.where(df_train[feature] > ext_upper_bound, ext_upper_bound, df_train[feature])  

    df_test[feature] = np.where(df_test[feature] < ext_lower_bound, ext_lower_bound, df_test[feature])
    df_test[feature] = np.where(df_test[feature] > ext_upper_bound, ext_upper_bound, df_test[feature])  

  return df_train,df_test

In [61]:
X_train_cap, X_test_cap = cap_outliers(X_train,X_test)

In [None]:
#X_train_ext_cap, X_test_ext_cap = cap_ext_outliers(X_train,X_test)

In [122]:
X_train_cap.shape

(1260, 260)

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
633,14383000000,5219000000,8092335000,1802418000,2694300000,1811750000,-170000000,0,-55458000,3189000000,8408000000,402268750,170000000,1645000000,410250000,2539300000,5,5,2516144875,1074000000,717235000,82673138,176000000,299000000,2001000000,8534091250,3303000000,468000000,0,4362000000,4771000000,409000000,0,9257000000,500000000,909000000,0,2123000000,5303000000,1292000500,78000000,78000000,0,6988000000,8494794500,8907000000,5803000000,18018000000,9111000000,3007015000,3,4,39,32,1050000000,184988000,3628898875,847000000,0,0,1920000000,500677500,828678480,2085290000,1956993250,14461000000,5192000000,5258058000,1796992500,2236915500,1953497500,-190000000,0,91000000,3153000000,8345000000,259455375,190000000,1623000000,464715625,2137068500,5,6,4136805500,1414000000,615000000,74505000,180000000,302000000,1955000000,9237878000,3269000000,569000000,0,4362000000,4636000000,274000000,0,9112000000,550000000,1073000000,0,2390000000,6248000000,1305000000,90000000,90000000,0,7774000000,8112528750,9187000000,6798000000,19351000000,10164000000,4255117500,3,4,42,35,992000000,224000000,2637175500,649000000,0,0,922000000,523120000,410500000,1869244375,2144500000,18344000000,5968000000,6861245625,2621000000,3730399000,2727559500,-184000000,0,-53000000,3220000000,9188000000,463595625,184000000,1666000000,660520000,3450000000,8,8,4286540625,1701000000,598000000,145852500,245000000,335000000,1910000000,11036080000,5141000000,748000000,0,4362000000,4447000000,85000000,0,10991000000,500000000,1294000000,0,2569000000,7241000000,1367000000,87000000,87000000,0,8774000000,8872007125,11438468500,7741000000,24676000000,11343000000,4519695000,4,5,36,31,954000000,230000000,4101382375,1502436250,0,0,2310321500,349498125,500000000,2811763750,2544062500,20028000000,6257000000,8555592250,3240396500,4287150000,3185500000,-214000000,0,-108000000,3374000000,9631000000,617225500,214000000,1704000000,796922500,4019745000,9,9,4659959616,1895000000,840281250,197500000,353000000,302000000,2757000000,12262843875,6876000000,1135000000,0,4362000000,4514000000,152000000,0,13186000000,500000000,1686000000,497000000,2985000000,8235000000,1226000000,66000000,66000000,0,9645000000,11707309375,12542007125,8735000000,27207000000,12630000000,5068500000,3,4,37,32,979000000,289000000,4229284000,1833990000,0,0,2492940750,462500000,922500000,2071620500,2558008500
384,21076500000,9961200000,8092335000,1802418000,2694300000,1811750000,-290389500,36700000,85290000,2229400000,12190600000,402268750,509613125,2229400000,0,2539300000,7,7,898500000,2224200000,0,0,0,435200000,50200000,3557900000,12218918125,1485225000,0,2677400000,2677400000,0,0,27942876500,680100000,2940900000,0,3621000000,11703150000,1292000500,1253579500,776741875,0,18462092125,8494794500,-1749797000,15026863250,36448092750,28799159000,-63100000,0,0,120,98,1475982500,109600000,3628898875,1594016750,0,0,2129675755,500677500,828678480,2085290000,1956993250,18974453750,9455700000,5258058000,1796992500,2236915500,1953497500,-298722000,17700000,134600000,2545600000,12001300000,259455375,530286500,2245000000,0,2137068500,6,6,3449100000,2110300000,0,0,0,455747500,51100000,6243200000,12806020500,1572923000,0,2773100000,2773100000,0,0,25705669500,965445625,3236100000,0,6181200000,14134596625,1596205000,1322936875,609401875,0,22891353000,8112528750,-1068444500,14891462000,38070169500,30613300000,62000000,1,1,117,97,1482690500,92400000,2637175500,1431622422,0,0,1545800000,523120000,410500000,1869244375,2144500000,23222900000,10642700000,6861245625,2621000000,3730399000,2727559500,-338562500,9200000,100172500,2707500000,13350200000,463595625,611888500,2377800000,0,3450000000,10,10,4286540625,1872400000,0,0,0,566900000,55600000,7148500000,13540672500,1699950000,0,2782500000,2782500000,0,0,29471341000,705500000,3314500000,0,4020000000,15519732500,1554029125,1475946000,658882500,0,20631776375,8872007125,-1267356500,17434718375,41276325000,29833075000,3128500000,1,1,110,92,1458051875,139200000,4101382375,1502436250,0,0,2165700000,349498125,500000000,2811763750,2544062500,23182600000,9975400000,8555592250,3240396500,4287150000,3185500000,-335434000,44000000,-108000000,2862600000,12838000000,617225500,612888500,2492200000,0,4019745000,8,8,2583800000,2115000000,0,0,0,725400000,52000000,5424200000,13233216000,2000530000,0,2900400000,2900400000,0,0,28624380500,661100000,3141000000,0,3802100000,15525858500,1820413750,1527772000,836750000,0,20519944500,11707309375,-1456170625,16333866125,42057839625,28370460000,1622100000,1,1,114,96,1561281250,166700000,4229284000,1833990000,0,0,2492940750,462500000,922500000,2071620500,2558008500
611,4043000000,1940000000,2103000000,507000000,825000000,489000000,-155000000,7000000,10000000,1278000000,3218000000,191000000,162000000,1157000000,0,842000000,5,5,400000000,2536060000,717235000,24000000,0,30000000,1199000000,5362000000,816000000,30000000,0,970000000,1113000000,143000000,0,2091000000,0,752000000,122000000,1313000000,5755000000,50000000,859000000,776741875,0,6664000000,1785000000,-530000000,5755000000,7453000000,7977000000,3007015000,3,4,110,77,121000000,24000000,453000000,108000000,0,108000000,44000000,166000000,340000000,289000000,344000000,2160000000,1233000000,927000000,-255000000,74000000,-253000000,-185000000,7000000,-120750000,853000000,2086000000,-23000000,192000000,727000000,0,-84000000,-2,-2,1231000000,2436500000,615000000,30000000,0,26000000,1347000000,5617000000,758000000,26000000,0,964000000,1095000000,131000000,0,1996000000,0,705000000,204000000,715000000,6575000000,98000000,1193000000,609401875,0,7866000000,1390000000,-975000000,6575000000,7613000000,8581000000,4255117500,5,6,117,86,126000000,20000000,374000000,69000000,0,69000000,60000000,138000000,128000000,-502000000,305000000,3134000000,1597000000,1537000000,308000000,616000000,313000000,-195000000,3000000,8000000,921000000,2518000000,116000000,198000000,797000000,0,627000000,3,3,396000000,2502000000,791000000,13000000,0,0,1216000000,4488000000,768000000,25000000,0,961000000,1180000000,219000000,0,2100000000,0,767000000,0,1159000000,5449000000,36000000,738000000,658882500,0,6223000000,1587000000,-801000000,5449000000,6588000000,7382000000,3329000000,2,3,117,82,124000000,32000000,568000000,57000000,0,57000000,93000000,109000000,25000000,1288000000,511000000,3567000000,1839000000,1728000000,357000000,679000000,356000000,-189000000,6000000,-4000000,1049000000,2888000000,130000000,195000000,930000000,0,681000000,4,4,562000000,2640000000,840281250,14000000,0,0,1193000000,4757000000,720000000,16000000,0,955000000,1162000000,207000000,0,2000000000,0,725000000,0,1136000000,5753000000,56000000,703000000,679000000,0,6525000000,1808000000,-913000000,5753000000,6757000000,7661000000,3621000000,3,4,118,85,119000000,45000000,442000000,52000000,0,52000000,45000000,135000000,351000000,196000000,390000000
528,3032000000,2152000000,880000000,225000000,510000000,225000000,-209000000,33000000,0,370000000,2522000000,76000000,209000000,305000000,0,510000000,1,1,102000000,35000000,217000000,47000000,154000000,15000000,418000000,570000000,579000000,9000000,0,1879000000,3002000000,1123000000,0,3590000000,21000000,311000000,46000000,378000000,4239000000,19000000,294000000,294000000,0,4600000000,-823000000,-818000000,4260000000,4160000000,4978000000,192000000,0,1,123,98,91000000,0,403000000,109000000,0,109000000,128000000,0,0,196000000,294000000,3263000000,2290000000,973000000,363000000,586000000,363000000,-70000000,0,-31000000,387000000,2677000000,153000000,70000000,358000000,0,586000000,1,1,312000000,309000000,194000000,54000000,171000000,13000000,419000000,1053000000,673000000,25000000,0,1879000000,2971000000,1092000000,0,3669000000,25000000,407000000,83000000,432000000,2259000000,37000000,326000000,326000000,0,2675000000,233000000,1615000000,2284000000,4722000000,3107000000,621000000,1,2,58,48,99000000,5000000,319000000,143000000,0,143000000,143000000,124000000,0,-34000000,176000000,3556000000,2745000000,811000000,324000000,478000000,324000000,-48000000,0,-14000000,333000000,3078000000,106000000,48000000,320000000,0,478000000,1,1,164000000,338000000,276000000,63000000,244000000,19000000,583000000,1104000000,732000000,36000000,0,1879000000,2940000000,1061000000,0,3708000000,25000000,459000000,86000000,484000000,2133000000,38000000,351000000,351000000,0,2572000000,365000000,1756000000,2158000000,4812000000,3056000000,620000000,1,2,55,44,109000000,4000000,310000000,141000000,0,141000000,141000000,192000000,0,317000000,169000000,3817000000,3041000000,776000000,258000000,414000000,258000000,-76000000,6000000,-12000000,350000000,3403000000,80000000,78000000,340000000,0,414000000,1,1,38000000,370000000,383000000,81000000,258000000,41000000,722000000,1171000000,787000000,61000000,0,1879000000,2910000000,1031000000,0,3758000000,39000000,457000000,145000000,496000000,2119000000,47000000,365000000,365000000,0,2565000000,431000000,1868000000,2158000000,4929000000,3061000000,675000000,0,2,53,43,117000000,5000000,219000000,128000000,0,128000000,128000000,192000000,0,217000000,91000000
7,4805239000,1196313000,3608926000,-460212000,-501543000,-455950000,75934000,49480000,13906000,4110469000,5306782000,262636000,9968000,2318700000,410250000,-401735000,-1,-1,2516144875,2536060000,0,0,0,341598000,0,6561443000,686867000,306809000,0,652088000,755000000,102912000,0,1748676000,38022000,3587925000,313091875,5233764000,381374000,271164000,0,0,0,3884040000,-1420991000,-807685000,419396000,8310119000,9117804000,1327679000,1,1,-2,5,114162000,97547000,222727000,125452000,0,125452000,347155000,0,0,-807130000,97275000,3378199000,876042000,2502157000,-900598500,-706915500,-975099500,-144571000,25080000,-120750000,4420387000,6816991000,-57591375,171688000,2310176000,464715625,-824552500,-4,-4,4136805500,2181329000,0,0,0,309954000,0,8916386000,654262000,189164000,0,655801000,731687000,75886000,0,1575113000,56586000,3328525000,356023125,5139779000,2246467000,203470000,0,0,0,2449937000,-2438505750,2901783000,2303053000,10491499000,7589716000,3776607000,1,1,44,21,125876000,232411500,-567325500,37371000,0,37371000,-79590000,0,0,-1297248875,-600500000,5991760000,1155833000,4835927000,-352034000,542166000,-352034000,-338562500,10100000,-64914500,4293761000,5449594000,51827000,437599000,2021656000,660520000,137392000,0,0,4286540625,3191057000,0,0,0,333669000,0,11036080000,428621000,188563000,0,652602000,704910000,52308000,0,1322094000,63479000,4392548500,450187500,6359282000,2355020000,218459000,0,0,0,2573479000,-2840458625,4775713000,2418499000,13708474000,8932761000,4519695000,1,1,33,17,138319000,293795000,2189694000,25322000,0,25322000,1351955000,0,0,-1384352750,2164372000,8399000000,1499000000,6900000000,1893000000,1891000000,1893000000,162000000,62020000,-64000000,5009000000,6508000000,96000000,24000000,2466000000,796922500,2013000000,2,2,4659959616,3675288625,0,0,0,456000000,0,12262843875,259000000,234000000,0,650000000,684000000,34000000,0,1177000000,59000000,4594806500,549130625,7978000000,2282000000,218000000,0,0,0,2500000000,-4110861875,5560000000,2341000000,16038000000,10478000000,5068500000,1,1,29,14,81000000,407387500,3430000000,25000000,0,25000000,28000000,0,922500000,689000000,2558008500


### PCA

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=75)

In [None]:
X_train_pca = pca.fit_transform(X_train_scale)
X_test_pca = pca.transform(X_test_scale)

In [None]:
X_train_pca.shape

### Smote

In [None]:
l = X_train_fsel_mic.shape[1]
stdScale=stdScale=StandardScaler()
trfScaleStd = ColumnTransformer(
                  [('stdScale',stdScale, slice(0,l))
                  ],remainder='passthrough')   

In [None]:
trfSmote = SMOTE()

In [None]:
pipe1 = Pipeline([
     ('trf1',trfScaleStd),
])

In [None]:
pipe2 = Pipeline([
     ('trf2',trfSmote)
])

In [None]:
X_train_temp = pipe1.fit_transform(X_train_fsel_mic)

In [None]:
X_train_smote, y_train_smote = pipe2.fit_resample(X_train_temp,y_train)

In [None]:
X_train_smote.shape

In [None]:
counter = Counter(y_train_smote)
for k, v in counter.items():
  dist = v / len(y_train_smote) * 100
  print(f"Class={k}, n={v} ({dist}%)")

In [None]:
y_train_smote

In [None]:
### Feature Selection - Variance

In [124]:
X_train_sel1.shape

(1260, 244)

In [126]:
X_train_sel1

array([[ 2.45862054e+08,  1.31664713e+08,  1.14197341e+08, ...,
         0.00000000e+00, -3.36538000e+08, -2.73298000e+08],
       [ 5.47100000e+09,  1.97780000e+09,  3.49320000e+09, ...,
         0.00000000e+00,  9.13000000e+07, -8.48300000e+08],
       [ 2.45862054e+08,  1.31664713e+08,  1.14197341e+08, ...,
         0.00000000e+00, -3.36538000e+08, -2.73298000e+08],
       ...,
       [ 7.14006000e+08,  2.40515000e+08,  4.73491000e+08, ...,
         1.60950000e+08,  2.42853000e+08,  2.80364000e+08],
       [ 5.51790000e+09,  3.54430000e+09,  1.97360000e+09, ...,
         2.59800000e+08,  3.70000000e+08,  1.59710000e+09],
       [ 4.59000000e+09,  3.41600000e+09,  1.17400000e+09, ...,
         9.22500000e+08,  2.07162050e+09,  2.55800850e+09]])

### Feature Selection - KBest

In [None]:
trfFSel = SelectKBest(score_func=chi2,k=200)
trfScaleMinMax = MinMaxScaler()

In [None]:
pipe3 = Pipeline([
     ('trf1',trfScaleMinMax),
     ('trf2',trfFSel),   
])

In [None]:
X_train_selKBest = pipe3.fit_transform(X_train,y_train)
X_test_selKBest  = pipe3.fit(X_test,y_test)

### Feature Selection - Correlation

In [62]:
X_train_cap.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_totalDebt,0_bal_totalAssets,0_bal_totalLiabilities,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashFlowInvestingActivities,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_bal_totalDebt,1_bal_totalAssets,1_bal_totalLiabilities,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfPPE,1_cas_cashFlowInvestingActivities,1_cas_cashDividendsPaid,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_bal_totalDebt,2_bal_totalAssets,2_bal_totalLiabilities,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfPPE,2_cas_cashFlowInvestingActivities,2_cas_cashDividendsPaid,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_bal_totalDebt,3_bal_totalAssets,3_bal_totalLiabilities,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_bal_debtCapitalRatio,3_bal_debtAssetRatio,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfPPE,3_cas_cashFlowInvestingActivities,3_cas_cashDividendsPaid,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
228,5333700000,2818300000,2515400000,790900000,1056000000,790900000,-13500000,400000,1200000,1459400000,4277700000,252800000,13900000,1459400000,0,1057600000,1,1,174900000,741800000,0,0,0,157400000,1366400000,2457200000,1266400000,76300000,0,0,0,0,0,1342700000,100400000,444300000,0,544700000,490200000,0,99400000,99400000,0,589600000,2633900000,2665600000,590600000,3799900000,1134300000,1912500000,2,4,18,15,148700000,5700000,842700000,246400000,0,246400000,239700000,498600000,0,595100000,596300000,5647300000,3079500000,2567800000,859100000,1140400000,859100000,-9100000,600000,1400000,1427400000,4506900000,273600000,9700000,1427400000,0,1142400000,1,1,245700000,769400000,0,0,0,140300000,1337500000,2499600000,1273700000,191400000,0,0,0,0,0,1465100000,133600000,479100000,0,612700000,516500000,0,102300000,102300000,0,618800000,2689600000,2733200000,650100000,3964700000,1231500000,1886900000,1,4,19,16,162400000,5700000,1101800000,168100000,0,168100000,281700000,803400000,52000000,754400000,933700000,6010900000,3233700000,2777200000,925000000,1217400000,925000000,-9600000,100000,0,1559800000,4793500000,282800000,9700000,1559800000,0,1217500000,1,1,236200000,900200000,0,0,0,188100000,1523600000,2856600000,1261500000,180900000,0,0,0,0,0,1442400000,150800000,531400000,0,682200000,486000000,0,88600000,88600000,0,574600000,2970900000,3042200000,636800000,4299000000,1256800000,2174400000,1,4,17,14,170700000,5600000,770100000,156600000,0,156600000,148500000,643700000,0,627100000,613500000,6980600000,3764800000,3215800000,1086900000,1453600000,1086900000,-13600000,700000,0,1762200000,5527000000,353100000,14300000,1762200000,0,1454300000,1,1,230100000,1013200000,0,0,0,165400000,1708000000,3124800000,1253000000,170800000,0,0,0,0,0,1423800000,293700000,496100000,0,789800000,508400000,3500000,83700000,83700000,0,595600000,3218700000,3163200000,802100000,4548600000,1385400000,2335000000,1,3,20,17,176600000,7200000,941000000,173800000,0,173800000,163000000,711300000,237800000,774900000,767200000
399,4200819000,1682234000,2518585000,1107835000,1402939000,1107835000,13023000,13023000,13023000,1115646000,2797880000,308127000,0,1115646000,0,1402939000,1,1,1331020000,540330000,225846000,0,134885000,0,360731000,2316309000,298640000,53973000,0,1331643000,2383748000,1052105000,0,2834043000,0,402837000,0,661097000,0,30505000,287469000,0,0,317974000,5022480000,4171281000,0,5150352000,979071000,1655212000,2,3,11,6,64814000,63356000,1113762000,110398000,0,101661000,326724000,0,707300000,628506000,1003364000,4598638000,1874758000,2723880000,1409594000,1633153000,1409594000,-6996000,0,-6996000,1090727000,2965485000,216563000,6996000,1090727000,0,1633153000,1,1,2061767000,666012000,177919000,0,155166000,0,333085000,3140955000,314656000,70475000,0,1331643000,2390689000,1059046000,0,3061761000,0,462886000,0,749988000,0,27432000,264436000,0,0,291868000,6432074000,5160860000,0,6202716000,1041856000,2390967000,3,4,12,6,60973000,70289000,1364163000,67272000,0,48722000,472487000,0,542631250,526068000,1296891000,5541352000,2432839000,3108513000,1377475000,1797467000,1377475000,0,0,3952000,1311046000,3743885000,423944000,0,1311046000,0,1797467000,1,1,3076189000,896658000,243492000,0,349865000,0,593357000,4682110000,313753000,80252000,0,1331643000,2404029000,1072386000,0,3122674000,0,645626000,0,965076000,0,29508000,243249000,0,0,272757000,7809549000,6566951000,0,7804784000,1237833000,3717034000,2,4,11,5,50155000,70483000,1155741000,57453000,0,43868000,992022000,0,13830000,-34821000,1098288000,6311050000,3136483000,3174567000,1191624000,1584721000,1191624000,0,0,-12757000,1589846000,4726329000,380340000,0,1589846000,0,1584721000,1,1,2669455000,1016203000,466551000,1688000,467392000,109823000,935631000,4764897000,516897000,134478000,0,1417941000,2638351000,1220410000,0,3528208000,0,886204000,0,1001978000,0,42286000,223800000,0,0,266086000,9001173000,7025041000,0,8293105000,1268064000,3762919000,2,4,10,7,61241000,64109000,887699000,212153000,0,188726000,161367000,0,771028000,706938000,675546000
249,2988100000,1026800000,1961300000,137000000,211300000,138400000,-92100000,0,7200000,1750000000,2776800000,-12000000,92100000,707700000,367643750,218500000,0,0,1086400000,30200000,0,0,0,48900000,0,1575300000,455200000,17200000,0,2976500000,4074200000,1097700000,0,4725900000,18400000,438300000,93800000,2001100000,2569700000,17700000,755300000,100900000,0,3518000000,-153500000,772000000,2588100000,6301200000,5519100000,-425800000,0,0,77,41,209700000,147000000,723400000,92300000,0,87600000,135300000,0,458600000,456900000,631100000,3316700000,1158600000,2158100000,-495100000,-358900000,-494100000,-91300000,0,-45200000,2517000000,3675600000,-1300000,91300000,762300000,451272500,-404100000,-2,-2,765200000,41800000,0,0,0,60800000,0,1291300000,399300000,36000000,0,3275100000,4530200000,1255100000,0,5141600000,24300000,578400000,200000,2314200000,3256800000,56400000,817100000,92000000,0,4130500000,-1190900000,-12900000,3281100000,6432900000,6444700000,-687840000,0,0,100,51,202700000,191500000,764600000,81500000,0,66500000,482300000,0,541700000,581700000,683100000,3815700000,1372200000,2443500000,242300000,381800000,242800000,-126000000,0,-2200000,2061700000,3433900000,10800000,126000000,849700000,630821875,379600000,1,1,1255700000,63600000,0,0,0,109900000,0,1889800000,329200000,91200000,0,3540800000,4925500000,1384700000,0,5527300000,24100000,522500000,170000000,2436700000,4000900000,77700000,818600000,75300000,0,4897200000,-1474600000,81700000,4025000000,7417100000,7333900000,-546900000,0,0,98,54,199600000,207900000,829300000,253200000,0,51100000,635600000,0,526000000,-298100000,576100000,4091300000,1484500000,2606800000,352200000,514500000,352900000,-146300000,0,-11700000,2092300000,3576800000,3600000,146300000,797800000,741658750,502800000,2,2,774000000,60100000,0,0,0,271800000,0,1582600000,309700000,95000000,0,3536900000,4789100000,1252200000,0,5390900000,18200000,487600000,90200000,2459800000,3929400000,87100000,826500000,56200000,0,4843000000,-2422600000,-331800000,3947600000,6973500000,7302800000,-877200000,0,0,109,56,194600000,264400000,979700000,60100000,0,59700000,132000000,0,1294600000,1326700000,919600000
328,25282320000,17805649000,1913401000,287111000,727270000,289474000,-221020000,21460000,-55546000,1186131000,23474781750,161230000,221020000,1111347000,42861000,727270000,1,1,1163343000,3509260250,314258000,94790000,423812500,0,2854387500,8345085000,3333750000,213705000,0,622255000,879108000,256853000,0,4625390000,375181000,5003853875,0,8532105000,2121284000,163821000,115818000,115818000,0,2537612000,2037037000,1887443000,2496465000,12970475000,11069717000,-187020000,0,0,56,19,771833000,61346000,1193066000,1005480000,0,582000000,872454000,52004000,350323000,415772000,187586000,26178953750,17556525000,1930813000,53912000,656432000,56779000,-190483000,14559000,-184792250,1274381000,22978230125,203959000,173877000,1174694000,44143000,656432000,0,0,1393557000,3444016625,376542000,100029375,431715000,0,2951125000,9134885000,4028159000,162242000,0,696853000,906723000,209870000,0,5262531000,160917000,4823258750,0,9059483000,2980323000,268925000,114657000,114657000,0,3512534000,2040922000,1811384000,3141240000,14397416000,12572017000,75402000,0,1,63,21,794581000,83084000,1257275000,983035000,0,540946250,921113000,50462000,214510000,65123000,274240000,29285000000,21202428875,2359000000,696000000,1065000000,698000000,-113000000,6000000,-8000000,1294000000,27058157500,246000000,130000000,1213000000,34000000,1065000000,4,4,1567000000,4139000000,680000000,145852500,573167500,0,3467073500,10877000000,4465000000,239000000,0,715000000,897000000,182000000,0,5777000000,108000000,6001749500,0,10084858500,3211000000,334000000,111000000,111000000,0,3834000000,2688000000,2136000000,3319000000,16654000000,14517000000,194000000,0,1,60,19,876000000,102000000,1433000000,1159000000,0,696250000,851000000,50000000,428000000,413000000,274000000,33478000000,23664981750,2632000000,996000000,1411000000,996000000,-158000000,5000000,-22000000,1221000000,31601350125,235000000,151000000,1154000000,33000000,1411000000,6,7,1478000000,4681406875,605000000,158151875,752812500,0,4098149875,13724392875,4454000000,294000000,0,704000000,862000000,158000000,0,5809000000,419000000,6412158250,0,11073231375,2992000000,272000000,122000000,122000000,0,3568000000,3638000000,2451000000,3411000000,19717000000,17265000000,211000000,0,1,58,17,925000000,81000000,1651000000,1385000000,0,767250000,858000000,48000000,696000000,888000000,266000000
233,2242447000,356577000,1885870000,427734000,518463000,427734000,0,0,22648000,1367407000,1723984000,113377000,0,959349000,367643750,518463000,7,7,972282000,322029000,22441000,0,11960000,182874000,34401000,1511586000,223426000,203781000,0,1065379000,1065379000,108903000,0,1878689000,0,298496000,0,1105526000,0,131853000,391399000,313000,0,523252000,1638090000,1761497000,0,3390275000,1628778000,406060000,1,1,11,6,68507000,162914000,747841000,103542000,0,103542000,414634000,0,201045000,155447000,644299000,2350822000,407887000,1942935000,307441000,400067000,307441000,0,0,-3670000,1542868000,1950755000,88956000,0,1101544000,441324000,400067000,5,5,1209889000,296183000,17096000,0,10802000,259506000,27898000,1793476000,529919000,347447000,0,1858966000,1858966000,225900000,0,2884444000,19275000,385870000,0,1288279000,707762000,59511000,390100000,602000,0,1157373000,1945531000,2232268000,727037000,4677920000,2445652000,505197000,1,1,24,15,95857000,201948000,660898000,59940000,0,59940000,747002000,0,100016000,-337243000,600958000,2603416000,493146000,2110270000,331241000,394025000,331241000,0,0,-7088000,1716245000,2209391000,55696000,0,1203618000,512627000,394025000,5,5,910607000,340536000,13081000,0,8974000,337902000,22055000,1611100000,436098000,472558000,0,2216553000,2216553000,237178000,0,3386180000,19275000,403583000,0,1391527000,646717000,75236000,523587000,2414000,0,1245540000,2187828000,2360213000,665992000,4997280000,2637067000,219573000,1,1,22,13,115424000,243279000,645196000,30651000,0,30651000,445335000,0,500000000,468280000,614545000,2695845000,539627000,2156218000,322160000,411701000,322160000,0,0,-26308000,1744517000,2284144000,63233000,0,1201149000,543368000,411701000,5,5,884566000,469979000,10164000,0,58201000,489314000,68365000,1912224000,395657000,516122000,0,2259282000,2259282000,200288000,0,3363970000,349772000,422997000,0,1839951000,272376000,67710000,627179000,2781000,0,967265000,2404106000,2468978000,622148000,5276194000,2807216000,72273000,1,1,20,11,115609000,249216000,442631000,33624000,0,33624000,-218116000,0,500023000,476508000,409007000


In [63]:
pd.options.display.float_format = '{:.2f}'.format
df_temp = X_train_cap.corr()
df_temp[(df_temp > 0.95) & (df_temp != 1)].stack()

0_inc_totalRevenue                        0_inc_costOfRevenue                        0.95
                                          0_inc_totalExpenses                        0.99
                                          1_inc_totalRevenue                         0.97
                                          1_inc_totalExpenses                        0.98
                                          2_inc_totalRevenue                         0.96
                                          2_inc_totalExpenses                        0.96
                                          3_inc_totalRevenue                         0.95
                                          3_inc_totalExpenses                        0.95
0_inc_costOfRevenue                       0_inc_totalRevenue                         0.95
                                          0_inc_totalExpenses                        0.96
                                          1_inc_costOfRevenue                        0.98
          

In [64]:
# Select highly correlated features

def correlation(dataset, threshold):
  col_corr = set()
  corr_matrix = dataset.corr()
  for i in range(len(corr_matrix.columns)):
    for j in range(i):
        if corr_matrix.iloc[i,j] > threshold:
          colname = corr_matrix.columns[i]
          col_corr.add(colname)
  return col_corr

In [65]:
corr_features = correlation(X_train_cap,0.90)
len(set(corr_features))

152

In [66]:
X_train_sel2 = X_train.drop(corr_features,axis=1)
X_test_sel2 = X_test.drop(corr_features,axis=1)

In [67]:
X_train_sel2.shape

(582, 108)

In [68]:
X_test_sel2.shape

(146, 108)

### Feature Selection - Mutual Info Classification

In [69]:
from sklearn.feature_selection import mutual_info_classif

mutual_info = mutual_info_classif(X_train_sel2, y_train)
#mutual_info

In [70]:
pd.options.display.float_format = '{:.2f}'.format
mutual_info = pd.Series(mutual_info)
mutual_info.index = X_train_sel2.columns
mutual_info.sort_values(ascending=False)
df_temp = pd.DataFrame(mutual_info)
df_temp = df_temp.reset_index()
df_temp.columns = ['feature','value']
df_temp.sort_values(by='value',ascending=False).reset_index(drop=True)


Unnamed: 0,feature,value
0,2_inc_netIncome,0.23
1,2_inc_dilutedEPS,0.23
2,1_cas_cashFlowOperatingActivities,0.23
3,3_cas_freeCashFlow,0.21
4,3_inc_taxProvision,0.2
5,2_cas_freeCashFlow,0.19
6,3_inc_dilutedEPS,0.19
7,3_inc_netIncome,0.19
8,2_inc_taxProvision,0.16
9,1_inc_netIncome,0.15


In [71]:
from sklearn.feature_selection import SelectKBest
# Select Top 100 features

sel_top_cols = SelectKBest(mutual_info_classif, k=100)
sel_top_cols.fit(X_train_sel2,y_train)
cols = X_train_sel2.columns[sel_top_cols.get_support()]

In [72]:
cols

Index(['0_inc_totalRevenue', '0_inc_grossProfit', '0_inc_netIncome',
       '0_inc_operatingIncome', '0_inc_netInterestIncome',
       '0_inc_otherIncomeExpense', '0_inc_taxProvision',
       '0_inc_interestExpense', '0_inc_researchAndDevelopment',
       '0_inc_dilutedEPS', '0_bal_cashEquivalent', '0_bal_receivables',
       '0_bal_finishedGoods', '0_bal_workInProcess', '0_bal_rawMaterials',
       '0_bal_otherCurrentAssets', '0_bal_inventory', '0_bal_netPPE',
       '0_bal_otherNonCurrentAssets', '0_bal_goodwill',
       '0_bal_otherIntangibleAssets', '0_bal_nonCurrentAccountsReceivable',
       '0_bal_totalNonCurrentAssets', '0_bal_otherCurrentLiabilities',
       '0_bal_otherNonCurrentLiabilities',
       '0_bal_nonCurrentDeferredLiabilities',
       '0_bal_nonCurrentDeferredTaxesLiabilities', '0_bal_longTermProvisions',
       '0_bal_retainedEarnings', '0_bal_stockholdersEquity',
       '0_bal_workingCapital', '0_bal_quickRatio', '0_bal_currentRatio',
       '0_bal_debtCapitalRati

In [73]:
X_train_sel2.shape

(582, 108)

In [74]:
X_train_fsel_mic = X_train_sel2[cols]
X_test_fsel_mic = X_test_sel2[cols]

In [75]:
X_train_fsel_mic.shape

(582, 100)

In [76]:
X_test_fsel_mic.shape

(146, 100)

In [77]:
X_train_fsel_mic.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netInterestIncome,0_inc_otherIncomeExpense,0_inc_taxProvision,0_inc_interestExpense,0_inc_researchAndDevelopment,0_inc_dilutedEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_goodwill,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_otherCurrentLiabilities,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_bal_workingCapital,0_bal_quickRatio,0_bal_currentRatio,0_bal_debtCapitalRatio,0_bal_debtAssetRatio,0_cas_stockBasedCompensation,0_cas_saleOfBusiness,0_cas_purchaseOfPPE,0_cas_cashDividendsPaid,0_cas_repurchaseOfCapitalStock,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_netIncome,1_inc_operatingIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_taxProvision,1_inc_dilutedEPS,1_bal_cashEquivalent,1_bal_nonCurrentAccountsReceivable,1_bal_currentDebt,1_bal_longTermProvisions,1_bal_workingCapital,1_bal_quickRatio,1_bal_currentRatio,1_bal_debtCapitalRatio,1_bal_debtAssetRatio,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_repurchaseOfCapitalStock,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_netIncome,2_inc_otherIncomeExpense,2_inc_taxProvision,2_inc_dilutedEPS,2_bal_financialAssets,2_bal_nonCurrentAccountsReceivable,2_bal_currentDebt,2_bal_otherCurrentLiabilities,2_bal_longTermProvisions,2_bal_workingCapital,2_bal_quickRatio,2_bal_currentRatio,2_bal_debtCapitalRatio,2_bal_debtAssetRatio,2_cas_saleOfBusiness,2_cas_cashFlowInvestingActivities,2_cas_repurchaseOfCapitalStock,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_netIncome,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_taxProvision,3_inc_dilutedEPS,3_bal_otherCurrentAssets,3_bal_financialAssets,3_bal_nonCurrentAccountsReceivable,3_bal_currentDebt,3_bal_longTermProvisions,3_bal_workingCapital,3_bal_quickRatio,3_bal_currentRatio,3_cas_saleOfBusiness,3_cas_cashFlowInvestingActivities,3_cas_repurchaseOfCapitalStock,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
228,5333700000,2515400000,790900000,1056000000,-13500000,1200000,252800000,13900000,0,1,174900000,741800000,0,0,0,157400000,1366400000,1266400000,76300000,0,0,0,1342700000,0,0,99400000,99400000,0,2633900000,2665600000,1912500000,2,4,18,15,5700000,0,246400000,498600000,0,595100000,596300000,859100000,1140400000,600000,1400000,273600000,1,245700000,0,133600000,0,1886900000,1,4,19,16,5700000,1101800000,52000000,754400000,933700000,925000000,0,282800000,1,0,0,150800000,0,0,2174400000,1,4,17,14,0,148500000,0,627100000,613500000,1086900000,-13600000,700000,0,353100000,1,165400000,0,0,293700000,0,2335000000,1,3,0,163000000,237800000,774900000,767200000
399,4200819000,2518585000,1107835000,1402939000,13023000,13023000,308127000,0,0,1,1331020000,540330000,225846000,0,134885000,0,360731000,298640000,53973000,1331643000,1052105000,0,2834043000,0,30505000,287469000,0,0,5022480000,4171281000,1655212000,2,3,0,0,63356000,0,101661000,0,707300000,628506000,1003364000,1409594000,1633153000,0,-6996000,216563000,1,2061767000,0,0,0,2390967000,3,4,0,0,70289000,1364163000,595918000,526068000,1296891000,1377475000,3952000,423944000,1,0,0,0,0,0,3717034000,4,4,0,0,0,992022000,13830000,-34821000,1098288000,1191624000,0,0,-12757000,380340000,1,109823000,0,0,0,0,3762919000,3,4,0,161367000,771028000,706938000,675546000
249,2988100000,1961300000,137000000,211300000,-92100000,7200000,-12000000,92100000,492600000,0,1086400000,30200000,0,0,0,48900000,0,455200000,17200000,2976500000,1097700000,0,4725900000,93800000,17700000,755300000,100900000,0,-153500000,772000000,-425800000,0,0,77,41,147000000,0,87600000,0,458600000,456900000,631100000,-495100000,-358900000,0,-45200000,-1300000,-2,765200000,0,24300000,0,-1022900000,0,0,100,51,191500000,764600000,541700000,581700000,683100000,242300000,-2200000,10800000,1,0,0,24100000,170000000,0,-546900000,0,0,98,54,0,635600000,526000000,-298100000,576100000,352200000,-146300000,0,-11700000,3600000,2,271800000,0,0,18200000,0,-877200000,0,0,0,132000000,1294600000,1326700000,919600000
328,25282320000,1913401000,287111000,727270000,-221020000,-55546000,161230000,221020000,42861000,1,1163343000,3657166000,314258000,468217000,2310081000,0,3023003000,3333750000,213705000,622255000,256853000,0,4625390000,0,163821000,115818000,115818000,0,2037037000,1887443000,-187020000,0,0,56,19,61346000,0,1005480000,52004000,350323000,415772000,187586000,53912000,656432000,14559000,-205211000,203959000,0,1393557000,0,160917000,0,75402000,0,1,63,21,83084000,1257275000,214510000,65123000,274240000,696000000,-8000000,246000000,4,0,0,108000000,0,0,194000000,0,1,60,19,0,851000000,428000000,413000000,274000000,996000000,-158000000,5000000,-22000000,235000000,6,0,0,0,419000000,0,211000000,0,1,0,858000000,696000000,888000000,266000000
233,2242447000,1885870000,427734000,518463000,0,22648000,113377000,0,408058000,7,972282000,322029000,22441000,0,11960000,182874000,34401000,223426000,203781000,1065379000,108903000,53085000,1878689000,0,131853000,391399000,313000,0,1638090000,1761497000,406060000,1,1,0,0,162914000,0,103542000,0,201045000,155447000,644299000,307441000,400067000,0,-3670000,88956000,5,1209889000,62377000,19275000,0,505197000,1,1,24,15,201948000,660898000,100016000,-337243000,600958000,331241000,-7088000,55696000,5,0,158885000,19275000,0,0,219573000,1,1,22,13,0,445335000,500000000,468280000,614545000,322160000,0,0,-26308000,63233000,5,489314000,0,224780000,349772000,0,72273000,1,1,0,-218116000,500023000,476508000,409007000


### Feature Selection - Variance

In [156]:
#var_thres=VarianceThreshold(threshold=0)
#X_train_fsel_var = var_thres.fit_transform(X_train_fsel_mic)
#X_test_fsel_var = var_thres.transform(X_test_fsel_mic)

In [157]:
#X_train_fsel_var.shape

(1260, 100)

### Standard Scaler

In [159]:
stdScaler=StandardScaler()
l = X_train.shape[1]
ct1 = ColumnTransformer(
          [('stdScaler',stdScaler, slice(0,l))
          ],remainder='passthrough')   

In [160]:
# Apply StandardScaler to All Columns

X_train_fsel_scale = ct1.fit_transform(X_train_fsel_mic)
X_test_fsel_scale = ct1.transform(X_test_fsel_mic)

## Model Selection

In [46]:
def train_eval_model(model,train_X,train_y,test_X,test_y,use_sample_weights=False):

  if use_sample_weights:
    class_weights = class_weight.compute_sample_weight(
            class_weight='balanced',
            y=train_y)
    model.fit(train_X, train_y,sample_weight = class_weights)
  else:
    model.fit(train_X, train_y)  
      
  y_pred = model.predict(test_X)
  y_train_predict = model.predict(train_X)

  train_accuracy = round(accuracy_score(train_y, y_train_predict),3)
  test_accuracy = round(accuracy_score(test_y, y_pred),3)

  train_f1 = round(f1_score(train_y, y_train_predict,zero_division=np.nan, average='weighted'),3)
  test_f1 = round(f1_score(test_y, y_pred,zero_division=np.nan, average='weighted'),3)
    
  print('Train accuracy',train_accuracy)
  print('Test accuracy',test_accuracy) 
  print
    
  print('Train F1 Score',train_f1)
  print('Test F1 Score',test_f1) 
  print("")

  cross_val_accuracy = round(np.mean(cross_val_score(model,train_X,train_y,scoring='accuracy',cv=5)),3)
 
  cross_val_f1 = round(np.mean(cross_val_score(model,train_X,train_y,
                                         scoring=make_scorer(f1_score, average='weighted'),
                                         cv=5)),3)
  
  print('Cross val accuracy:',cross_val_accuracy)
  print('Cross val f1:',cross_val_f1)
  print("")
  print(pd.crosstab(test_y,y_pred))
  print("")
  print(classification_report(test_y, y_pred))


### Basesline Evaluation

Default parameters, no modifications to dataset.

#### XGBoost

In [47]:
model = xgb.XGBClassifier(
                       random_state=0)      

train_eval_model(model, X_train, y_train, X_test, y_test)


Train accuracy 1.0
Test accuracy 0.685
Train F1 Score 1.0
Test F1 Score 0.696

Cross val accuracy: 0.649
Cross val f1: 0.632

col_0  0   1   2   3  4
label                  
0      5   0   0   0  0
1      1  10   6   0  0
2      0   4  32  19  0
3      0   1   8  53  1
4      0   0   1   5  0

              precision    recall  f1-score   support

           0       0.83      1.00      0.91         5
           1       0.67      0.59      0.62        17
           2       0.68      0.58      0.63        55
           3       0.69      0.84      0.76        63
           4       0.00      0.00      0.00         6

    accuracy                           0.68       146
   macro avg       0.57      0.60      0.58       146
weighted avg       0.66      0.68      0.67       146



In [1]:
model = xgb.XGBClassifier(
                       random_state=50)      

train_eval_model(model, X_train, y_train, X_test, y_test,use_sample_weights=True)

NameError: name 'xgb' is not defined

### Adaboost

In [49]:
model = AdaBoostClassifier(n_estimators=200,algorithm='SAMME')
train_eval_model(model, X_train, y_train, X_test, y_test)

Train accuracy 0.603
Test accuracy 0.527
Train F1 Score 0.579
Test F1 Score 0.55

Cross val accuracy: 0.551
Cross val f1: 0.536

col_0  0   1   2   3
label               
0      0   5   0   0
1      1  11   5   0
2      0   5  35  15
3      0   1  31  31
4      0   0   2   4

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.50      0.65      0.56        17
           2       0.48      0.64      0.55        55
           3       0.62      0.49      0.55        63
           4       0.00      0.00      0.00         6

    accuracy                           0.53       146
   macro avg       0.32      0.36      0.33       146
weighted avg       0.51      0.53      0.51       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [50]:
model = AdaBoostClassifier(n_estimators=200,algorithm='SAMME.R')
train_eval_model(model, X_train, y_train, X_test, y_test)

Train accuracy 0.607
Test accuracy 0.555
Train F1 Score 0.584
Test F1 Score 0.53

Cross val accuracy: 0.521
Cross val f1: 0.467

col_0  0  1   2   3
label              
0      1  4   0   0
1      0  8   7   2
2      0  1  16  38
3      0  0   7  56
4      0  0   0   6

              precision    recall  f1-score   support

           0       1.00      0.20      0.33         5
           1       0.62      0.47      0.53        17
           2       0.53      0.29      0.38        55
           3       0.55      0.89      0.68        63
           4       0.00      0.00      0.00         6

    accuracy                           0.55       146
   macro avg       0.54      0.37      0.38       146
weighted avg       0.54      0.55      0.51       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Random Forest

In [51]:
model = RandomForestClassifier(random_state=0)
train_eval_model(model, X_train, y_train, X_test, y_test)

Train accuracy 1.0
Test accuracy 0.671
Train F1 Score 1.0
Test F1 Score 0.684

Cross val accuracy: 0.663
Cross val f1: 0.645

col_0  0   1   2   3
label               
0      5   0   0   0
1      2  10   5   0
2      0   2  35  18
3      0   0  15  48
4      0   0   0   6

              precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       0.83      0.59      0.69        17
           2       0.64      0.64      0.64        55
           3       0.67      0.76      0.71        63
           4       0.00      0.00      0.00         6

    accuracy                           0.67       146
   macro avg       0.57      0.60      0.57       146
weighted avg       0.65      0.67      0.66       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
model = RandomForestClassifier(random_state=0)
train_eval_model(model, X_train, y_train, X_test, y_test,use_sample_weights=True)

Train accuracy 1.0
Test accuracy 0.685
Train F1 Score 1.0
Test F1 Score 0.69

Cross val accuracy: 0.663
Cross val f1: 0.645

col_0  0   1   2   3
label               
0      5   0   0   0
1      2  10   5   0
2      0   3  30  22
3      0   0   8  55
4      0   0   0   6

              precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       0.77      0.59      0.67        17
           2       0.70      0.55      0.61        55
           3       0.66      0.87      0.75        63
           4       0.00      0.00      0.00         6

    accuracy                           0.68       146
   macro avg       0.57      0.60      0.57       146
weighted avg       0.66      0.68      0.66       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Gradient Boosting

In [53]:
model=GradientBoostingClassifier()
train_eval_model(model, X_train, y_train, X_test, y_test)

Train accuracy 1.0
Test accuracy 0.719
Train F1 Score 1.0
Test F1 Score 0.707

Cross val accuracy: 0.655
Cross val f1: 0.641

col_0  0  1   2   3  4
label                 
0      5  0   0   0  0
1      1  9   7   0  0
2      0  4  37  14  0
3      1  0   9  53  0
4      0  0   0   5  1

              precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       0.69      0.53      0.60        17
           2       0.70      0.67      0.69        55
           3       0.74      0.84      0.79        63
           4       1.00      0.17      0.29         6

    accuracy                           0.72       146
   macro avg       0.77      0.64      0.64       146
weighted avg       0.73      0.72      0.71       146



In [66]:
model=GradientBoostingClassifier()
train_eval_model(model, X_train, y_train, X_test, y_test)

Train accuracy 1.0
Test accuracy 0.658
Train F1 Score 1.0
Test F1 Score 0.656

Cross val accuracy: 0.919
Cross val f1: 0.917

col_0  0   1   2   3
label               
0      5   0   0   0
1      2  10   5   0
2      2   6  25  22
3      1   1   5  56
4      0   0   0   6

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         5
           1       0.59      0.59      0.59        17
           2       0.71      0.45      0.56        55
           3       0.67      0.89      0.76        63
           4       0.00      0.00      0.00         6

    accuracy                           0.66       146
   macro avg       0.49      0.59      0.51       146
weighted avg       0.64      0.66      0.63       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [38]:
model=GradientBoostingClassifier()
train_eval_model(model, X_train, y_train, X_test, y_test,use_sample_weights=True)

Train accuracy 1.0
Test accuracy 0.658
Train F1 Score 1.0
Test F1 Score 0.653

Cross val accuracy: 0.656
Cross val f1: 0.635

col_0  0  1   2   3  4
label                 
0      5  0   0   0  0
1      3  9   5   0  0
2      0  5  33  16  1
3      1  0  15  47  0
4      0  0   0   4  2

              precision    recall  f1-score   support

           0       0.56      1.00      0.71         5
           1       0.64      0.53      0.58        17
           2       0.62      0.60      0.61        55
           3       0.70      0.75      0.72        63
           4       0.67      0.33      0.44         6

    accuracy                           0.66       146
   macro avg       0.64      0.64      0.61       146
weighted avg       0.66      0.66      0.65       146



## After Scaling Data with RobustScaler

### KNN

In [58]:
model = KNeighborsClassifier(n_neighbors=5)
train_eval_model(model, X_train_robustScale, y_train, X_test_robustScale, y_test)

Train accuracy 0.634
Test accuracy 0.425
Train F1 Score 0.618
Test F1 Score 0.429

Cross val accuracy: 0.435
Cross val f1: 0.421

col_0  0  1   2   3
label              
0      3  1   1   0
1      0  2  10   5
2      1  4  26  24
3      0  3  29  31
4      0  1   3   2

              precision    recall  f1-score   support

           0       0.75      0.60      0.67         5
           1       0.18      0.12      0.14        17
           2       0.38      0.47      0.42        55
           3       0.50      0.49      0.50        63
           4       0.00      0.00      0.00         6

    accuracy                           0.42       146
   macro avg       0.36      0.34      0.34       146
weighted avg       0.40      0.42      0.41       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
model=KNeighborsClassifier()
param_grid = {'n_neighbors': np.arange(1, 200)}

gridSearch = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',
    verbose=10,
    cv=5
)
gridSearch.fit(X_train_robustScale,y_train)

Fitting 5 folds for each of 199 candidates, totalling 995 fits
[CV 1/5; 1/199] START n_neighbors=1.............................................
[CV 1/5; 1/199] END ..............n_neighbors=1;, score=0.342 total time=   0.0s
[CV 2/5; 1/199] START n_neighbors=1.............................................
[CV 2/5; 1/199] END ..............n_neighbors=1;, score=0.368 total time=   0.0s
[CV 3/5; 1/199] START n_neighbors=1.............................................
[CV 3/5; 1/199] END ..............n_neighbors=1;, score=0.371 total time=   0.0s
[CV 4/5; 1/199] START n_neighbors=1.............................................
[CV 4/5; 1/199] END ..............n_neighbors=1;, score=0.457 total time=   0.0s
[CV 5/5; 1/199] START n_neighbors=1.............................................
[CV 5/5; 1/199] END ..............n_neighbors=1;, score=0.405 total time=   0.0s
[CV 1/5; 2/199] START n_neighbors=2.............................................
[CV 1/5; 2/199] END ..............n_neighbors=

In [60]:
gridSearch.best_params_

{'n_neighbors': 46}

In [61]:
gridSearch.best_score_

0.47422634836427935

In [62]:
model = KNeighborsClassifier(n_neighbors=46)
train_eval_model(model, X_train_robustScale, y_train, X_test_robustScale, y_test)

Train accuracy 0.515
Test accuracy 0.418
Train F1 Score 0.541
Test F1 Score 0.417

Cross val accuracy: 0.474
Cross val f1: 0.391

col_0   2   3
label        
0       4   1
1       9   8
2      10  45
3      12  51
4       0   6

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.00      0.00      0.00        17
           2       0.29      0.18      0.22        55
           3       0.46      0.81      0.59        63
           4       0.00      0.00      0.00         6

    accuracy                           0.42       146
   macro avg       0.15      0.20      0.16       146
weighted avg       0.31      0.42      0.34       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Cap Outliers

In [None]:
# XGBoost

model = xgb.XGBClassifier(
                       objective='multi:softmax',
                       random_state=50)      

train_eval_model(model, X_train_ext_cap, y_train, X_test_ext_cap, y_test)

In [None]:
# Random Forest

model = RandomForestClassifier(random_state=0)
train_eval_model(model, X_train_ext_cap, y_train, X_test_ext_cap, y_test)

### Remove Corelated Features

In [None]:
# XGBoost

model = xgb.XGBClassifier(
                       objective='multi:softmax',
                       random_state=50)      

train_eval_model(model, X_train_sel2, y_train, X_test_sel2, y_test)

In [None]:
# Random Forest

model = RandomForestClassifier(random_state=0)
train_eval_model(model, X_train_sel2, y_train, X_test_sel2, y_test)

### Mutial Info Classification

In [None]:
# XGBoost

model = xgb.XGBClassifier(
                       objective='multi:softmax',
                       random_state=50)      

train_eval_model(model, X_train_fsel_mic, y_train, X_test_fsel_mic, y_test)

In [None]:
# Random Forest

model = RandomForestClassifier(random_state=0)
train_eval_model(model, X_train_fsel_mic, y_train, X_test_fsel_mic, y_test)

### Optimised Models

### XGBoost

In [91]:
model = xgb.XGBClassifier(
                       max_depth = 3,
                       n_estimators = 120,
                       learning_rate = 0.14,
                       gamma = 0.01,
                       reg_lambda = 0.5,
                       reg_alpha = 0.6,
                       min_child_weight=5,
                       random_state=100)      

train_eval_model(model, X_train_fsel_mic, y_train, X_test_fsel_mic, y_test)

Train accuracy 1.0
Test accuracy 0.74
Train F1 Score 1.0
Test F1 Score 0.73

Cross val accuracy: 0.636
Cross val f1: 0.618

col_0  0   1   2   3  4
label                  
0      5   0   0   0  0
1      0  13   4   0  0
2      0   3  37  15  0
3      0   1  10  52  0
4      0   0   0   5  1

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       0.76      0.76      0.76        17
           2       0.73      0.67      0.70        55
           3       0.72      0.83      0.77        63
           4       1.00      0.17      0.29         6

    accuracy                           0.74       146
   macro avg       0.84      0.69      0.70       146
weighted avg       0.75      0.74      0.73       146



### Gradient Boosting

In [95]:
model=GradientBoostingClassifier( n_estimators = 75,
                                  max_depth = 3,
                                  learning_rate = 0.16 )
    
train_eval_model(model, X_train_fsel_mic, y_train, X_test_fsel_mic, y_test)

Train accuracy 1.0
Test accuracy 0.705
Train F1 Score 1.0
Test F1 Score 0.722

Cross val accuracy: 0.662
Cross val f1: 0.665

col_0  0   1   2   3  4
label                  
0      5   0   0   0  0
1      1  11   5   0  0
2      0   2  40  13  0
3      1   1  13  47  1
4      0   0   0   6  0

              precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       0.79      0.65      0.71        17
           2       0.69      0.73      0.71        55
           3       0.71      0.75      0.73        63
           4       0.00      0.00      0.00         6

    accuracy                           0.71       146
   macro avg       0.58      0.62      0.60       146
weighted avg       0.68      0.71      0.69       146



### Random Forest

In [128]:
model=RandomForestClassifier(n_estimators = 100,
                             max_depth = 6,
                             random_state=0)
train_eval_model(model, X_train_fsel_mic, y_train, X_test_fsel_mic, y_test)

Train accuracy 0.897
Test accuracy 0.699
Train F1 Score 0.882
Test F1 Score 0.708

Cross val accuracy: 0.663
Cross val f1: 0.643

col_0  0   1   2   3
label               
0      5   0   0   0
1      1  10   6   0
2      0   2  33  20
3      0   0   9  54
4      0   0   0   6

              precision    recall  f1-score   support

           0       0.83      1.00      0.91         5
           1       0.83      0.59      0.69        17
           2       0.69      0.60      0.64        55
           3       0.68      0.86      0.76        63
           4       0.00      0.00      0.00         6

    accuracy                           0.70       146
   macro avg       0.61      0.61      0.60       146
weighted avg       0.68      0.70      0.68       146



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
