## XGBoost

Notes:
    
1. Missing data should be set to 0. A unique feature of XGBoost is that it can handle missing data and simply
requires it to set to 0.

2. Only allows int, float and boolean data datatypes. So any categorical fields would need to converted.


In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir
import re

import csv
import datetime
import pickle
import os

import xgboost as xgb

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

In [124]:
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score

In [3]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline, make_pipeline
from collections import Counter

In [12]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer


In [151]:
from sklearn import set_config
set_config(display='diagran')

In [5]:
pd.set_option('display.max_rows', 800)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = '{:.0f}'.format

In [6]:
DATA_ROOT_DIR='/mnt/data/projects/MD7'
PROJ_ROOT_DIR='/home/priyesh/projects/MD7'

In [7]:
filepath=os.path.join(PROJ_ROOT_DIR,'pickle','yahoo_complete_flat.pkl')
df_complete_flat = pd.read_pickle(filepath)

In [8]:
df_complete_flat.head()

Unnamed: 0,company,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,FinalScore
0,A,Diagnostics & Research,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000,3
1,AA,Aluminum,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000,2
2,AAL,Airlines,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000,0
3,AAP,Specialty Retail,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000,2
4,AAPL,Consumer Electronics,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000,2


In [9]:
df_complete_flat.iloc[:,0:10].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 711 entries, 0 to 710
Data columns (total 10 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   company                              711 non-null    object
 1   industry                             711 non-null    object
 2   0_inc_totalRevenue                   711 non-null    int64 
 3   0_inc_costOfRevenue                  711 non-null    int64 
 4   0_inc_grossProfit                    711 non-null    int64 
 5   0_inc_netIncome                      711 non-null    int64 
 6   0_inc_operatingIncome                711 non-null    int64 
 7   0_inc_netIncomeContinuousOperations  711 non-null    int64 
 8   0_inc_netInterestIncome              711 non-null    int64 
 9   0_inc_interestIncome                 711 non-null    int64 
dtypes: int64(8), object(2)
memory usage: 55.7+ KB


In [19]:
# Take a copy so that we can always refer back to original 

df = df_complete_flat.copy()

In [22]:
df.head()

Unnamed: 0,company,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,FinalScore
0,A,Diagnostics & Research,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000,3
1,AA,Aluminum,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000,2
2,AAL,Airlines,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000,0
3,AAP,Specialty Retail,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000,2
4,AAPL,Consumer Electronics,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000,2


In [23]:
df.rename(columns={'FinalScore':'label'},inplace=True)

In [24]:
# Drop column company

df.drop('company',axis=1,inplace=True)

## Split Data into Independent and Dependent Variables

In [25]:
X = df.drop('label',axis=1)
X.head()

Unnamed: 0,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
0,Diagnostics & Research,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000
1,Aluminum,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000
2,Airlines,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000
3,Specialty Retail,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000
4,Consumer Electronics,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000


In [26]:
y = df['label']
y.head()

0    3
1    2
2    0
3    2
4    2
Name: label, dtype: int64

In [27]:
X.head()

Unnamed: 0,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
0,Diagnostics & Research,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000
1,Aluminum,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000
2,Airlines,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000
3,Specialty Retail,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000
4,Consumer Electronics,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000


## Train Test Split

In [28]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

## Column Transformer

In [38]:
numeric_cols = [feature for feature in df.columns if df[feature].dtype != 'O']
df[numeric_cols].dtypes

0_inc_totalRevenue                              int64
0_inc_costOfRevenue                             int64
0_inc_grossProfit                               int64
0_inc_netIncome                                 int64
0_inc_operatingIncome                           int64
0_inc_netIncomeContinuousOperations             int64
0_inc_netInterestIncome                         int64
0_inc_interestIncome                            int64
0_inc_otherIncomeExpense                        int64
0_inc_operatingExpense                          int64
0_inc_totalExpenses                             int64
0_inc_taxProvision                              int64
0_inc_interestExpense                           int64
0_inc_SGA                                       int64
0_inc_researchAndDevelopment                    int64
0_inc_ebit                                      int64
0_inc_dilutedEPS                                int64
0_inc_basicEPS                                  int64
0_bal_cashEquivalent        

In [47]:
# Create Transformer

transformer = ColumnTransformer(transformers=[
      ('tnf1',SimpleImputer(missing_values = np.nan,strategy='constant',fill_value=0),['3_cas_freeCashFlow']), 
      ('tnf3',OneHotEncoder(sparse_output=False,drop='first'),['industry']) 
],remainder='passthrough')

In [53]:
X_temp = transformer.fit_transform(X_train)
X_temp

array([[ 1.55000e+09,  0.00000e+00,  0.00000e+00, ..., -6.47000e+08,
        -1.53000e+08, -1.56200e+09],
       [ 5.83610e+08,  0.00000e+00,  0.00000e+00, ..., -7.28655e+08,
         0.00000e+00, -9.38781e+08],
       [ 7.01530e+08,  0.00000e+00,  0.00000e+00, ..., -1.27762e+08,
        -9.42750e+07, -3.11071e+08],
       ...,
       [ 7.67200e+08,  0.00000e+00,  0.00000e+00, ..., -2.37800e+08,
         0.00000e+00, -7.74900e+08],
       [ 4.06162e+08,  0.00000e+00,  0.00000e+00, ..., -3.38880e+08,
        -1.69093e+08, -5.26848e+08],
       [ 3.40500e+09,  0.00000e+00,  0.00000e+00, ..., -1.50000e+09,
         7.23000e+08, -6.89000e+08]])

In [52]:
transformer.fit_transform(X_test).shape

(143, 388)

In [11]:
# Drop column company

df.drop('company',axis=1,inplace=True)

In [None]:
# Replace space in industry column with _

df['industry'].replace(' ','_',regex=True, inplace=True)
df.head()

In [None]:
df.rename(columns={'FinalScore':'label'},inplace=True)

In [None]:
df.head()

In [None]:
df['label'].value_counts().plot(kind='bar')

In [None]:
X = df.drop('label',axis=1)
X.head()

In [None]:
df['label'].value_counts()

In [None]:
y = df['label']
y.head()

## One hot encoding

In [30]:
X_encoded = pd.get_dummies(X, columns=['industry'])

In [32]:
X_encoded.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,industry_Advertising Agencies,industry_Aerospace & Defense,industry_Agricultural Inputs,industry_Airlines,industry_Aluminum,industry_Apparel Manufacturing,industry_Apparel Retail,industry_Auto & Truck Dealerships,industry_Auto Manufacturers,industry_Auto Parts,industry_Beverages—Brewers,industry_Beverages—Non-Alcoholic,industry_Beverages—Wineries & Distilleries,industry_Biotechnology,industry_Building Materials,industry_Building Products & Equipment,industry_Chemicals,industry_Communication Equipment,industry_Computer Hardware,industry_Confectioners,industry_Conglomerates,industry_Consulting Services,industry_Consumer Electronics,industry_Copper,industry_Credit Services,industry_Department Stores,industry_Diagnostics & Research,industry_Discount Stores,industry_Drug Manufacturers—General,industry_Drug Manufacturers—Specialty & Generic,industry_Education & Training Services,industry_Electrical Equipment & Parts,industry_Electronic Components,industry_Electronic Gaming & Multimedia,industry_Electronics & Computer Distribution,industry_Engineering & Construction,industry_Entertainment,industry_Farm & Heavy Construction Machinery,industry_Farm Products,industry_Food Distribution,industry_Footwear & Accessories,"industry_Furnishings, Fixtures & Appliances",industry_Gambling,industry_Gold,industry_Grocery Stores,industry_Health Information Services,industry_Home Improvement Retail,industry_Household & Personal Products,industry_Industrial Distribution,industry_Information Technology Services,industry_Integrated Freight & Logistics,industry_Internet Content & Information,industry_Internet Retail,industry_Leisure,industry_Lodging,industry_Lumber & Wood Production,industry_Luxury Goods,industry_Marine Shipping,industry_Medical Care Facilities,industry_Medical Devices,industry_Medical Distribution,industry_Medical Instruments & Supplies,industry_Oil & Gas E&P,industry_Oil & Gas Equipment & Services,industry_Oil & Gas Integrated,industry_Oil & Gas Midstream,industry_Oil & Gas Refining & Marketing,industry_Other Industrial Metals & Mining,industry_Packaged Foods,industry_Packaging & Containers,industry_Personal Services,industry_Pharmaceutical Retailers,industry_Publishing,industry_Railroads,industry_Recreational Vehicles,industry_Rental & Leasing Services,industry_Residential Construction,industry_Resorts & Casinos,industry_Restaurants,industry_Scientific & Technical Instruments,industry_Security & Protection Services,industry_Semiconductor Equipment & Materials,industry_Semiconductors,industry_Software—Application,industry_Software—Infrastructure,industry_Solar,industry_Specialty Business Services,industry_Specialty Chemicals,industry_Specialty Industrial Machinery,industry_Specialty Retail,industry_Staffing & Employment Services,industry_Steel,industry_Telecom Services,industry_Tobacco,industry_Tools & Accessories,industry_Travel Services,industry_Trucking,industry_Utilities—Diversified,industry_Utilities—Independent Power Producers,industry_Utilities—Regulated Electric,industry_Utilities—Regulated Gas,industry_Utilities—Regulated Water,industry_Utilities—Renewable,industry_Waste Management
0,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [31]:
oversample = SMOTE()
X_encoded, y = oversample.fit_resample(X_encoded,y)

In [33]:
oversample = SMOTE()
X_temp, y_temp = oversample.fit_resample(X_encoded,y)

In [34]:
X_encoded.head()

Unnamed: 0,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,industry_Advertising Agencies,industry_Aerospace & Defense,industry_Agricultural Inputs,industry_Airlines,industry_Aluminum,industry_Apparel Manufacturing,industry_Apparel Retail,industry_Auto & Truck Dealerships,industry_Auto Manufacturers,industry_Auto Parts,industry_Beverages—Brewers,industry_Beverages—Non-Alcoholic,industry_Beverages—Wineries & Distilleries,industry_Biotechnology,industry_Building Materials,industry_Building Products & Equipment,industry_Chemicals,industry_Communication Equipment,industry_Computer Hardware,industry_Confectioners,industry_Conglomerates,industry_Consulting Services,industry_Consumer Electronics,industry_Copper,industry_Credit Services,industry_Department Stores,industry_Diagnostics & Research,industry_Discount Stores,industry_Drug Manufacturers—General,industry_Drug Manufacturers—Specialty & Generic,industry_Education & Training Services,industry_Electrical Equipment & Parts,industry_Electronic Components,industry_Electronic Gaming & Multimedia,industry_Electronics & Computer Distribution,industry_Engineering & Construction,industry_Entertainment,industry_Farm & Heavy Construction Machinery,industry_Farm Products,industry_Food Distribution,industry_Footwear & Accessories,"industry_Furnishings, Fixtures & Appliances",industry_Gambling,industry_Gold,industry_Grocery Stores,industry_Health Information Services,industry_Home Improvement Retail,industry_Household & Personal Products,industry_Industrial Distribution,industry_Information Technology Services,industry_Integrated Freight & Logistics,industry_Internet Content & Information,industry_Internet Retail,industry_Leisure,industry_Lodging,industry_Lumber & Wood Production,industry_Luxury Goods,industry_Marine Shipping,industry_Medical Care Facilities,industry_Medical Devices,industry_Medical Distribution,industry_Medical Instruments & Supplies,industry_Oil & Gas E&P,industry_Oil & Gas Equipment & Services,industry_Oil & Gas Integrated,industry_Oil & Gas Midstream,industry_Oil & Gas Refining & Marketing,industry_Other Industrial Metals & Mining,industry_Packaged Foods,industry_Packaging & Containers,industry_Personal Services,industry_Pharmaceutical Retailers,industry_Publishing,industry_Railroads,industry_Recreational Vehicles,industry_Rental & Leasing Services,industry_Residential Construction,industry_Resorts & Casinos,industry_Restaurants,industry_Scientific & Technical Instruments,industry_Security & Protection Services,industry_Semiconductor Equipment & Materials,industry_Semiconductors,industry_Software—Application,industry_Software—Infrastructure,industry_Solar,industry_Specialty Business Services,industry_Specialty Chemicals,industry_Specialty Industrial Machinery,industry_Specialty Retail,industry_Staffing & Employment Services,industry_Steel,industry_Telecom Services,industry_Tobacco,industry_Tools & Accessories,industry_Travel Services,industry_Trucking,industry_Utilities—Diversified,industry_Utilities—Independent Power Producers,industry_Utilities—Regulated Electric,industry_Utilities—Regulated Gas,industry_Utilities—Regulated Water,industry_Utilities—Renewable,industry_Waste Management
0,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
counter = Counter(y)
for k, v in counter.items():
  dist = v / len(y) * 100
  print(f"Class={k}, n={v} ({dist}%)")

In [None]:
oversample = SMOTE()
undersample = RandomUnderSampler()
#steps = [("o", oversample),("u",undersample)]
steps = [("o", oversample)]
pipeline = Pipeline(steps=steps)
# transform the dataset 
X_encoded, y = pipeline.fit_resample(X_encoded,y)

In [None]:
#model = xgb.XGBClassifier(random_state=666)
#scores = cross_validate(model, X_encoded, y, cv=6, n_jobs=-1, scoring="f1_macro")
#scores

In [None]:
XGB_dataset = {}
XGB_dataset['X'] = X_encoded
XGB_dataset['y'] = y

In [None]:
## Save Data

filepath=os.path.join(PROJ_ROOT_DIR,'pickle','XGB_dataset.pkl')
with open(filepath,'wb') as f:
    pickle.dump(XGB_dataset,f)

In [None]:
XGB_dataset['X']

In [None]:
# Instantiate XGBClassifier

xgb_clf = xgb.XGBClassifier(random_state=666)

#Inspect the parameters

xgb_clf.get_params()

In [None]:
# Set parameters 

xgb_clf.set_params(n_estimators=10)

xgb_clf.set_params(max_depth=3)

xgb_clf.set_params(eval_metric='error')



In [None]:
# Fit to training set

xgb_clf.fit(X_train, y_train)


In [None]:
# Predict the labels of the test set

preds = xgb_clf.predict(X_test)


In [None]:
# Calculate the accuracy

accuracy = float(np.sum(preds==y_test))/y_test.shape[0]

accuracy

In [None]:
## Plot Feature importance
import matplotlib

matplotlib.rcParams['figure.figsize'] = (10.0,8)

# Plot feature importance

xgb.plot_importance(xgb_clf)

In [None]:
## Plot a decision tree from model

matplotlib.rcParams['figure.figsize'] = (20.0,8)

# Plot the first tree

xgb.plot_tree(xgb_clf,num_trees=0)

In [None]:
# Try with different parameters

xgb_clf.set_params(n_estimators=100)

xgb_clf.set_params(max_depth=3)

# Fit to training set

xgb_clf.fit(X_train, y_train)

# Predict the labels of the test set

preds = xgb_clf.predict(X_test)

# Calculate the accuracy

accuracy = float(np.sum(preds==y_test))/y_test.shape[0]

accuracy


In [None]:
from sklearn.model_selection import RandomizedSearchCV

# Define a parameter grid

rs_param_grid = {
    # max_depth: values from 3 to 12
    
    'max_depth': [3,4,5,6,7,8,9,10,11,12],
    
    # alpha: values 0, .001, .01, .1
    
    'alpha': [0, .001, .01, .1],
    
    # subsample: values 0.25, 0.5, 0.75, 1
    
    'subsample': [0.25,0.5,0.75,1],
    
    # learning_rate: ten values between 0.01 = 0.5
    
    'learning_rate': np.linspace(0.01, 0.5, 10),
    
    # n_estimators: values 10, 25, 40
    
    'n_estimators':[30,40,50,60,70,80,90,100]
}


In [None]:
# Instantiate XGBoost Classifier

xgb_clf = xgb.XGBClassifier(eval_metric='error',random_state=123)

In [None]:
#Instantiate RandomizedSearch

xgb_rs = RandomizedSearchCV(estimator=xgb_clf, 
                            param_distributions=rs_param_grid, cv=5, n_iter=200, verbose=1, random_state=666)

In [None]:
# Train the mode on the training set

xgb_rs.fit(X_train, y_train)

In [None]:
# Print the best parameters and highest accuracy

print("Best parameters found:", xgb_rs.best_params_)
print("Best accuracy found:", xgb_rs.best_score_)

In [None]:
clf_xgb = xgb.XGBClassifier(objective='multi:softmax',num_class=4,max_depth=6, seed=666,early_stopping_rounds=10 )

clf_xgb.fit(X_train, y_train, verbose=True, 
            eval_metric='aucpr',
            early_stopping_rounds=10,
            eval_set=[(X_test, y_test)])

# Pipelines

In [None]:
Steps:
* Missing Values
* One hot encoding
* Scaling
* feature selection
* Train model

In [55]:
df.head()

Unnamed: 0,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow,label
0,Diagnostics & Research,5163000000,2358000000,2805000000,1071000000,941000000,1071000000,-38000000,36000000,16000000,1864000000,4222000000,-152000000,74000000,1460000000,404000000,993000000,3,3,1382000000,930000000,416000000,0,263000000,198000000,679000000,3189000000,850000000,611000000,0,4700000000,4700000000,1107000000,0,6263000000,616000000,794000000,0,2080000000,1791000000,473000000,0,0,0,2624000000,-18000000,4748000000,1071000000,1071000000,238000000,72000000,1021000000,-156000000,0,-1408000000,-1408000000,0,-155000000,-155000000,0,-23000000,-23000000,0,0,0,0,-1000000,-1000000,-3000000,-1590000000,600000000,-15000000,615000000,-206000000,0,-206000000,-723000000,0,-723000000,-24000000,-299000000,865000000,5339000000,2502000000,2837000000,719000000,846000000,719000000,-70000000,8000000,66000000,1991000000,4493000000,123000000,78000000,1496000000,495000000,920000000,2,2,1441000000,1038000000,417000000,0,303000000,216000000,720000000,3415000000,845000000,776000000,0,4433000000,4433000000,831000000,0,6212000000,75000000,639000000,0,1467000000,2284000000,614000000,0,0,0,3287000000,81000000,4873000000,719000000,719000000,308000000,83000000,921000000,-119000000,0,0,0,1000000,-119000000,-118000000,0,-20000000,-20000000,0,0,0,0,0,0,-9000000,-147000000,-45000000,-918000000,873000000,-222000000,0,-222000000,-469000000,0,-469000000,-41000000,-717000000,802000000,6319000000,2912000000,3407000000,1210000000,1347000000,1210000000,-79000000,2000000,92000000,2060000000,4972000000,150000000,81000000,1619000000,441000000,1441000000,3,3,1575000000,1172000000,463000000,0,367000000,222000000,830000000,3799000000,945000000,820000000,0,4956000000,4956000000,981000000,0,6906000000,0,774000000,0,1708000000,2729000000,659000000,0,0,0,3608000000,348000000,5389000000,1210000000,1210000000,321000000,110000000,1485000000,-189000000,0,-546000000,-546000000,1000000,-188000000,-187000000,12000000,-22000000,-10000000,0,0,0,0,-1000000,-1000000,-5000000,-749000000,356000000,431000000,-75000000,-236000000,0,-236000000,-788000000,0,-788000000,-83000000,-696000000,1296000000,6848000000,3126000000,3722000000,1254000000,1618000000,1254000000,-75000000,9000000,-39000000,2104000000,5230000000,250000000,84000000,1637000000,467000000,1588000000,4,4,1053000000,1405000000,555000000,0,483000000,282000000,1038000000,3778000000,1100000000,670000000,0,4773000000,4773000000,821000000,0,6738000000,36000000,909000000,0,1861000000,2733000000,536000000,0,0,0,3366000000,324000000,5289000000,1254000000,1254000000,317000000,125000000,1312000000,-291000000,0,-52000000,-52000000,0,-291000000,-291000000,22000000,-13000000,9000000,0,0,0,0,0,0,-4000000,-338000000,26000000,-9000000,35000000,-250000000,0,-250000000,-1139000000,0,-1139000000,-67000000,-1372000000,1021000000,3
1,Aluminum,10433000000,8537000000,1896000000,-1125000000,876000000,-853000000,-121000000,0,-1193000000,1020000000,9557000000,415000000,121000000,280000000,27000000,-317000000,-6,-6,879000000,660000000,305000000,282000000,611000000,288000000,1644000000,3530000000,7916000000,1414000000,18000000,150000000,202000000,52000000,179000000,11110000000,1000000,1588000000,561000000,2563000000,1799000000,371000000,102000000,0,902000000,6221000000,-555000000,4082000000,-1125000000,-853000000,713000000,30000000,686000000,-379000000,0,0,0,0,0,0,0,-112000000,-112000000,0,0,0,0,0,0,23000000,-468000000,-7000000,-7000000,0,0,0,0,0,0,0,-439000000,-444000000,307000000,9286000000,7969000000,1317000000,-170000000,431000000,-14000000,-146000000,0,-112000000,886000000,8855000000,187000000,146000000,206000000,27000000,319000000,0,0,1607000000,556000000,321000000,112000000,553000000,290000000,1398000000,4520000000,7190000000,1444000000,0,145000000,190000000,45000000,134000000,10344000000,2000000,1494000000,870000000,2761000000,2463000000,515000000,101000000,0,918000000,7112000000,-725000000,3287000000,-170000000,-14000000,653000000,25000000,394000000,-353000000,0,0,0,0,0,0,0,-12000000,-12000000,0,0,0,0,0,0,198000000,-167000000,738000000,738000000,0,0,0,0,0,0,0,-225000000,514000000,41000000,12152000000,9153000000,2999000000,429000000,2077000000,570000000,-195000000,0,-683000000,922000000,10075000000,629000000,195000000,227000000,31000000,1394000000,2,2,1814000000,884000000,538000000,85000000,794000000,358000000,1956000000,5026000000,6623000000,1644000000,7000000,144000000,180000000,36000000,215000000,9977000000,1000000,2048000000,791000000,3223000000,1726000000,599000000,90000000,0,887000000,5531000000,-315000000,4638000000,429000000,570000000,664000000,39000000,920000000,-390000000,0,0,0,0,0,0,0,-11000000,-11000000,0,0,0,0,0,0,966000000,565000000,-799000000,-799000000,0,-19000000,0,-19000000,-150000000,0,-150000000,-215000000,-1158000000,530000000,12451000000,10212000000,2239000000,-102000000,1331000000,38000000,-106000000,0,-523000000,908000000,11120000000,664000000,106000000,204000000,32000000,808000000,0,0,1363000000,909000000,385000000,350000000,1108000000,417000000,2427000000,5250000000,6493000000,1587000000,2000000,145000000,174000000,29000000,366000000,9533000000,1000000,1987000000,681000000,3004000000,1806000000,486000000,65000000,0,937000000,5207000000,-549000000,5058000000,-102000000,38000000,617000000,40000000,822000000,-480000000,0,0,0,0,0,0,10000000,-32000000,-22000000,0,0,0,0,0,0,7000000,-495000000,3000000,3000000,0,-72000000,0,-72000000,-500000000,0,-500000000,-221000000,-768000000,342000000,2
2,Airlines,45768000000,35379000000,10389000000,1686000000,3700000000,1686000000,-968000000,127000000,-476000000,6689000000,42068000000,570000000,1095000000,1602000000,0,3351000000,3,3,3826000000,1750000000,0,0,0,0,1851000000,8206000000,43732000000,1237000000,0,4091000000,6175000000,2084000000,0,51789000000,4569000000,5741000000,4808000000,18311000000,28875000000,1453000000,5422000000,0,0,41802000000,2264000000,-118000000,1686000000,1686000000,2318000000,94000000,3815000000,-4268000000,0,0,0,904000000,0,904000000,4144000000,-3187000000,957000000,0,0,0,0,0,0,164000000,-2243000000,-230000000,-230000000,0,-178000000,0,-178000000,-1097000000,0,-1097000000,-63000000,-1568000000,-453000000,17337000000,24933000000,-7596000000,-8885000000,-11078000000,-8885000000,-1186000000,41000000,811000000,3482000000,28415000000,-2568000000,1227000000,513000000,0,-10226000000,-18,-18,6864000000,1342000000,0,0,0,0,1614000000,11095000000,39738000000,1816000000,0,4091000000,6120000000,2029000000,0,50913000000,4448000000,5331000000,0,16569000000,36573000000,1502000000,7162000000,0,0,52306000000,-6664000000,-6867000000,-8885000000,-8885000000,2370000000,91000000,-6543000000,-1958000000,0,0,0,1016000000,0,1016000000,2803000000,-5873000000,-3070000000,0,0,0,0,0,0,-330000000,-4342000000,8245000000,8245000000,0,-43000000,0,-43000000,2797000000,0,-173000000,-5000000,10994000000,-8501000000,29882000000,29855000000,27000000,-1993000000,-5065000000,-1993000000,-1782000000,18000000,4299000000,5092000000,34947000000,-555000000,1800000000,1098000000,0,-748000000,-3,-3,12431000000,1505000000,0,0,0,0,1795000000,17336000000,37362000000,2109000000,0,4091000000,6079000000,1988000000,0,49106000000,3995000000,6027000000,0,19005000000,42157000000,1328000000,6239000000,0,0,54777000000,-8638000000,-7340000000,-1993000000,-1993000000,2335000000,98000000,704000000,-208000000,5000000,-28000000,-23000000,374000000,-204000000,374000000,13923000000,-19658000000,-5735000000,0,0,0,0,0,0,-414000000,-5983000000,4847000000,4847000000,0,0,0,0,442000000,0,-18000000,-1000000,5288000000,496000000,48971000000,39934000000,9037000000,127000000,1763000000,127000000,-1746000000,216000000,169000000,7274000000,47208000000,59000000,1962000000,1815000000,0,2148000000,0,0,8965000000,2138000000,0,0,0,892000000,2279000000,15269000000,38294000000,1904000000,0,4091000000,6150000000,2059000000,0,49447000000,4739000000,6843000000,0,21496000000,38948000000,1258000000,5976000000,0,0,49019000000,-8511000000,-5799000000,127000000,127000000,2298000000,78000000,2173000000,-2906000000,0,-321000000,-321000000,61000000,-360000000,-213000000,14972000000,-11617000000,3355000000,0,0,0,0,0,0,87000000,636000000,-2683000000,-2683000000,0,0,0,0,0,0,-21000000,73000000,-2631000000,-733000000,0
3,Specialty Retail,9709003000,5454257000,4254746000,486896000,677180000,486896000,-39898000,0,464000,3577566000,9031823000,150850000,39898000,3577566000,0,677644000,6,6,418665000,689469000,0,0,0,155241000,4432168000,5695543000,3798538000,52448000,0,992240000,1701996000,709756000,0,5552982000,0,3957850000,519852000,4477702000,2764479000,123250000,334013000,334013000,0,3221742000,3772848000,3549081000,486896000,486896000,238371000,37438000,866909000,-471648000,0,0,0,8709000,-270129000,-261420000,0,0,0,0,0,0,0,-201519000,-201519000,0,-462939000,-369386000,-310047000,-59339000,-17185000,0,-17185000,-495101000,0,-498435000,-481000,-882153000,395261000,10106321000,5624707000,4481614000,493021000,749907000,493021000,-46886000,0,-52006000,3731707000,9356414000,157994000,46886000,3731707000,0,697901000,7,7,834992000,749999000,0,0,0,146811000,4538199000,6270001000,3842589000,52329000,0,993590000,1674717000,681127000,0,5569635000,0,4247443000,496472000,4743915000,3047483000,146281000,342445000,342445000,0,3536209000,4196634000,3559512000,493021000,493021000,250081000,45271000,969688000,-267806000,0,0,0,909000,-267576000,-266667000,0,0,0,0,0,0,0,-230000,-230000,0,-266897000,244524000,244524000,0,-56347000,0,-56347000,-466421000,0,-469691000,-7753000,-285997000,701882000,10997989000,6069241000,4928748000,616108000,838717000,616108000,-37791000,0,4999000,4090031000,10159272000,189817000,37791000,4090031000,0,843716000,9,9,601428000,782785000,0,0,0,232245000,4659018000,6275476000,4200121000,73651000,0,993744000,1644961000,651217000,0,5918733000,0,4699058000,481249000,5180307000,3371971000,103034000,410606000,410606000,0,3885611000,4605791000,3128291000,616108000,616108000,259933000,63067000,1112262000,-289639000,0,0,0,2325000,-289639000,-287314000,0,0,0,0,0,0,0,0,0,0,-287314000,0,0,0,0,0,-160925000,-903134000,0,-906208000,-53000,-1064112000,822623000,11154722000,6192622000,4962100000,501872000,714151000,501872000,-51060000,0,-14404000,4247949000,10440571000,146815000,51060000,4247949000,0,699747000,8,8,269282000,698613000,0,0,0,163695000,4915262000,6046852000,4297829000,62429000,0,990471000,1611372000,620901000,0,5971630000,185000000,4757909000,427480000,5370389000,3466601000,87214000,415997000,415997000,0,3969812000,4744624000,2678281000,501872000,501872000,283800000,50978000,722222000,-425961000,0,0,0,1513000,-424061000,-422548000,0,0,0,0,0,0,0,-1900000,-1900000,0,-424448000,332537000,147537000,185000000,-336230000,0,-336230000,-618480000,0,-618480000,1469000,-620704000,296261000,2
4,Consumer Electronics,260174000000,161782000000,98392000000,55256000000,63930000000,55256000000,1385000000,4961000000,1807000000,34462000000,196244000000,10481000000,3576000000,18245000000,16217000000,63930000000,2,2,100557000000,45804000000,0,0,0,12352000000,4106000000,162819000000,37378000000,32978000000,0,0,0,0,0,175697000000,16240000000,46236000000,37720000000,105718000000,91807000000,50503000000,0,0,0,142310000000,45898000000,90488000000,55256000000,55256000000,12547000000,6068000000,69391000000,-10495000000,0,-624000000,-624000000,0,-10495000000,-10495000000,98724000000,-40631000000,58093000000,0,0,0,0,0,0,-1078000000,45896000000,-7819000000,-1842000000,-5977000000,-14119000000,0,-14119000000,-66116000000,0,-66897000000,-2922000000,-90976000000,58896000000,274515000000,169559000000,104956000000,57411000000,66288000000,57411000000,890000000,3763000000,803000000,38668000000,208227000000,9680000000,2873000000,19916000000,18752000000,66288000000,3,3,90943000000,37445000000,0,0,0,11264000000,4061000000,143713000000,36766000000,42522000000,0,0,0,0,0,180175000000,13769000000,42296000000,42684000000,105392000000,98667000000,54490000000,0,0,0,153157000000,14966000000,65339000000,57411000000,57411000000,11056000000,6829000000,80674000000,-7309000000,0,-1524000000,-1524000000,0,-7309000000,-7309000000,120483000000,-115148000000,5335000000,0,0,0,0,0,0,-791000000,-4289000000,2499000000,3462000000,-963000000,-14081000000,0,-14081000000,-71478000000,0,-72358000000,-3760000000,-86820000000,73365000000,365817000000,212981000000,152836000000,94680000000,108949000000,94680000000,198000000,2843000000,258000000,43887000000,256868000000,14527000000,2645000000,21973000000,21914000000,108949000000,5,5,62639000000,51506000000,0,0,0,14111000000,6580000000,134836000000,39440000000,48849000000,0,0,0,0,0,216166000000,15613000000,54763000000,47493000000,125481000000,109106000000,53325000000,0,0,0,162431000000,5562000000,63090000000,94680000000,94680000000,11284000000,7906000000,104038000000,-11085000000,0,-33000000,-33000000,0,-11085000000,-11085000000,106483000000,-109558000000,-3075000000,0,0,0,0,0,0,-352000000,-14545000000,12665000000,11643000000,1022000000,-14467000000,0,-14467000000,-84866000000,0,-85971000000,-6685000000,-93353000000,92953000000,394328000000,223546000000,170782000000,99803000000,119437000000,99803000000,-106000000,2825000000,-334000000,51345000000,274891000000,19300000000,2931000000,25094000000,26251000000,119437000000,6,6,48304000000,60932000000,0,0,0,21223000000,4946000000,135405000000,42117000000,54428000000,0,0,0,0,0,217350000000,21110000000,64115000000,60845000000,153982000000,98959000000,49142000000,0,0,0,148101000000,-3068000000,50672000000,99803000000,99803000000,11104000000,9038000000,122151000000,-10708000000,0,-306000000,-306000000,0,-10708000000,-10708000000,67363000000,-76923000000,-9560000000,0,0,0,0,0,0,-1780000000,-22354000000,-123000000,-4078000000,3955000000,-14841000000,0,-14841000000,-89402000000,0,-89402000000,-6383000000,-110749000000,111443000000,2


In [58]:
# Step 1 - Test - Test - Spit
X = df.drop('label',axis=1)
y = df['label']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=99)


In [60]:
X_train.head()

Unnamed: 0,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
319,Specialty Industrial Machinery,2451900000,1540200000,911700000,159100000,275300000,159100000,-88900000,0,4500000,636400000,2176600000,31800000,88900000,436400000,0,279800000,0,0,505500000,459100000,71400000,47600000,370500000,76800000,502500000,1543900000,326600000,212200000,0,1287700000,2542700000,1255000000,0,3084500000,7600000,567000000,0,574600000,1603800000,229400000,251000000,251000000,0,2183900000,-141400000,1869900000,159100000,159100000,178100000,19200000,343300000,-43200000,0,-12000000,-12000000,900000,0,900000,0,0,0,0,0,0,0,0,0,0,-54300000,-32800000,-32800000,0,0,0,0,-18600000,0,-18600000,-2800000,-11500000,300100000,4910200000,3296800000,1613400000,-33300000,105600000,-32400000,-111100000,0,-13900000,1507800000,4804600000,13000000,111100000,894800000,0,91700000,0,0,1750900000,966600000,258400000,88600000,587800000,201000000,943600000,3862100000,797300000,346900000,0,6303600000,11036200000,4732600000,0,12196500000,40400000,1458200000,0,1498600000,3859100000,360700000,875700000,875700000,0,5370500000,-175700000,9119700000,-33300000,-32400000,500900000,51300000,914300000,-48700000,0,0,9000000,1800000,0,1800000,0,0,0,0,0,0,0,0,0,0,-37900000,361000000,361000000,0,0,0,0,-2100000,0,-2100000,-52900000,328700000,865600000,5152400000,3163900000,1988500000,562500000,565700000,523400000,-87700000,0,35000000,1422800000,4586700000,-21800000,87700000,1028000000,0,600700000,1,1,2109600000,948600000,283400000,88600000,506600000,186900000,854200000,4114900000,648600000,468700000,0,5981600000,9894300000,3912700000,0,11039600000,38800000,1411800000,17100000,1467700000,3401800000,310100000,708600000,708600000,0,4615600000,378600000,9001500000,562500000,523400000,422100000,87200000,627800000,-64100000,0,0,-974800000,9500000,0,9500000,0,0,0,0,0,0,0,0,0,0,-1029400000,-435700000,-435700000,0,-8200000,0,-8200000,-736800000,0,-736800000,0,-1157000000,551400000,5916300000,3590700000,2325600000,604700000,817300000,593300000,-103200000,0,28100000,1508300000,5036900000,149600000,103200000,1095800000,0,845400000,1,1,1613000000,1122000000,338700000,122200000,625000000,206900000,1025400000,3967300000,624400000,509100000,0,6064200000,9642800000,3578600000,0,10798600000,36500000,1637500000,0,1674000000,2716100000,360800000,610600000,610600000,0,3834700000,950900000,9195800000,604700000,593300000,432800000,78900000,865400000,-94600000,0,-246800000,-246800000,0,0,0,0,0,0,0,0,0,0,0,0,4100000,-337300000,-655600000,-655600000,0,-32400000,0,-32400000,-261100000,0,-261100000,-24200000,-954000000,765700000
37,Semiconductors,6731000000,3863000000,2868000000,341000000,631000000,341000000,-94000000,15000000,-165000000,2237000000,6100000000,31000000,94000000,750000000,1547000000,466000000,0,0,1503000000,1859000000,197000000,691000000,94000000,233000000,982000000,4597000000,705000000,379000000,0,289000000,289000000,210000000,0,1431000000,43000000,2285000000,74000000,2359000000,685000000,157000000,0,0,0,842000000,-7095000000,2827000000,341000000,341000000,258000000,197000000,493000000,-217000000,0,0,0,0,-217000000,-217000000,325000000,-284000000,41000000,0,0,0,0,0,0,27000000,-149000000,-473000000,-403000000,-70000000,0,0,0,0,0,0,-7000000,43000000,493000000,9763000000,5416000000,4347000000,2490000000,1369000000,2490000000,-47000000,8000000,-47000000,2978000000,8394000000,-1210000000,47000000,995000000,1983000000,1322000000,2,2,2290000000,2076000000,167000000,1139000000,93000000,378000000,1399000000,6143000000,849000000,373000000,0,289000000,289000000,229000000,0,2819000000,0,2342000000,75000000,2417000000,531000000,177000000,0,0,0,708000000,-4605000000,5837000000,2490000000,2490000000,354000000,274000000,1071000000,-294000000,0,0,0,0,-294000000,-294000000,192000000,-850000000,-658000000,0,0,0,0,0,0,0,-952000000,0,-200000000,200000000,0,0,0,-78000000,0,-78000000,-1000000,6000000,1071000000,16434000000,8505000000,7929000000,3162000000,3648000000,3162000000,-34000000,8000000,55000000,4281000000,12786000000,513000000,34000000,1448000000,2845000000,3703000000,2,2,3608000000,2708000000,197000000,1676000000,82000000,312000000,1955000000,8583000000,1069000000,1478000000,0,289000000,289000000,328000000,0,3836000000,312000000,3830000000,98000000,4240000000,349000000,333000000,0,0,0,682000000,-1451000000,7497000000,3162000000,3162000000,463000000,379000000,3521000000,-301000000,0,0,0,0,-301000000,-301000000,1678000000,-2056000000,-378000000,0,0,0,0,0,0,-7000000,-686000000,0,0,0,0,0,0,-1999000000,0,-1999000000,0,-1895000000,3521000000,23601000000,12998000000,10603000000,1320000000,1264000000,1320000000,-88000000,65000000,8000000,9339000000,22337000000,-122000000,88000000,2336000000,5005000000,1272000000,0,0,5855000000,4128000000,892000000,2648000000,231000000,1265000000,3771000000,15019000000,1973000000,2152000000,0,24177000000,48295000000,24118000000,0,52561000000,0,6033000000,336000000,6369000000,2863000000,1664000000,1934000000,1934000000,0,6461000000,-131000000,54750000000,1320000000,1320000000,4262000000,1081000000,3565000000,-450000000,2366000000,-1544000000,822000000,0,-450000000,-450000000,4310000000,-2667000000,1643000000,0,0,0,0,0,0,-16000000,1999000000,679000000,679000000,0,0,0,0,-4108000000,0,-4108000000,-2000000,-3264000000,3565000000
57,Electronic Gaming & Multimedia,6489000000,2094000000,4395000000,1503000000,1739000000,1503000000,26000000,79000000,-132000000,2656000000,4750000000,130000000,90000000,1658000000,998000000,1739000000,1,1,5794000000,848000000,0,0,0,618000000,32000000,7292000000,253000000,658000000,0,9764000000,10349000000,585000000,0,12553000000,0,1540000000,0,2915000000,2675000000,945000000,505000000,505000000,0,4125000000,7813000000,12805000000,1503000000,1503000000,553000000,166000000,1831000000,-116000000,0,0,0,0,0,0,153000000,-65000000,88000000,0,0,0,0,0,0,6000000,-22000000,0,0,0,-283000000,0,-283000000,105000000,0,0,-59000000,-237000000,1715000000,8086000000,2260000000,5826000000,2197000000,2828000000,2197000000,-87000000,21000000,-125000000,2998000000,5258000000,419000000,99000000,1848000000,1150000000,2828000000,2,2,8647000000,1052000000,0,0,0,866000000,0,10565000000,209000000,641000000,0,9765000000,10376000000,611000000,0,12544000000,0,1411000000,0,3100000000,3605000000,949000000,418000000,418000000,0,4972000000,9691000000,15037000000,2197000000,2197000000,446000000,218000000,2252000000,-78000000,0,0,0,0,0,0,121000000,-221000000,-100000000,0,0,0,0,0,0,0,-178000000,944000000,944000000,0,-316000000,0,-316000000,170000000,0,0,-87000000,711000000,2174000000,8803000000,2317000000,6486000000,2699000000,3336000000,2699000000,-95000000,5000000,-77000000,3150000000,5467000000,465000000,108000000,1813000000,1337000000,3336000000,3,3,10423000000,972000000,0,0,0,1161000000,0,12556000000,169000000,497000000,0,9799000000,10457000000,658000000,0,12500000000,0,1293000000,0,2411000000,3608000000,932000000,506000000,506000000,0,5046000000,12025000000,17599000000,2699000000,2699000000,440000000,508000000,2414000000,-80000000,0,0,0,0,0,0,280000000,-248000000,32000000,0,0,0,0,0,0,-11000000,-59000000,0,0,0,0,0,-365000000,90000000,0,0,-246000000,-521000000,2334000000,7528000000,2222000000,5306000000,1513000000,1667000000,1513000000,-108000000,0,185000000,3639000000,5861000000,231000000,108000000,2218000000,1421000000,1852000000,1,1,11992000000,1204000000,0,0,0,1273000000,0,14469000000,193000000,508000000,0,9929000000,11012000000,1083000000,0,12914000000,0,1467000000,1143000000,3555000000,3611000000,816000000,158000000,158000000,0,4585000000,13171000000,19243000000,1513000000,1513000000,319000000,462000000,2220000000,-91000000,0,-135000000,-135000000,0,0,0,239000000,-5008000000,-4769000000,0,0,0,0,0,0,1000000,-4994000000,0,0,0,-367000000,0,-367000000,47000000,0,0,-214000000,-534000000,2129000000
605,Recreational Vehicles,7864758000,6891664000,973094000,133275000,361412000,132465000,-60032000,8080000,-116714000,611682000,7503346000,52201000,60032000,536044000,0,244698000,2,2,451262000,716227000,291076000,126636000,455820000,0,827988000,2037357000,1092471000,82418000,0,1358032000,2328843000,970811000,0,3623089000,17370000,1251301000,44094000,1448325000,1885253000,231640000,135703000,135703000,0,2116893000,2066674000,2095228000,133275000,132465000,148777000,18950000,508019000,-130224000,0,-1735854000,-1735854000,2732000,-130224000,-127492000,0,0,0,0,0,0,0,0,0,-2157000,-1865503000,1696647000,1696647000,0,-84139000,0,-84139000,0,0,0,-73435000,1539073000,377795000,8167933000,7049726000,1118207000,222974000,386854000,221384000,-104206000,3116000,-9752000,731353000,7781079000,51512000,104206000,634119000,0,377102000,4,4,541363000,814227000,197076000,128181000,438007000,0,716305000,2102277000,1107649000,91531000,0,1476541000,2391265000,914724000,0,3669183000,13817000,1305442000,35939000,1515281000,1652831000,257779000,123802000,123802000,0,1910610000,2201330000,2345569000,222974000,221384000,196167000,19889000,540941000,-106697000,0,0,0,27677000,-106697000,-79020000,0,0,0,0,0,0,0,0,0,-5229000,-84249000,-290662000,-289898000,-764000,-88318000,0,-88318000,0,0,0,-13936000,-392916000,434244000,12317380000,10422407000,1894973000,659872000,907874000,660870000,-93545000,797000,30252000,987099000,11409506000,183711000,94342000,869916000,0,938923000,11,11,445852000,949932000,172653000,376594000,895027000,0,1369384000,2803523000,1185131000,123792000,0,1563255000,2500426000,937171000,0,3850565000,12411000,1506893000,25720000,1794785000,1594821000,316376000,113598000,113598000,0,1911197000,2770401000,2921843000,659872000,660870000,230581000,30514000,526482000,-128835000,0,-310938000,-310938000,1950000,-128835000,-126885000,0,0,0,0,0,0,0,0,0,9330000,-428493000,-73559000,-74399000,840000,0,0,-90801000,0,0,0,-24078000,-188438000,526482000,16312525000,13506495000,2806030000,1137804000,1532622000,1138243000,-90092000,1130000,17334000,1273408000,14779903000,321621000,91222000,1116462000,0,1532622000,20,20,311690000,944181000,362881000,397495000,1131849000,0,1754773000,3062479000,1258159000,157901000,0,1804151000,2921643000,1117492000,0,4345653000,13190000,1466551000,21403000,1755916000,1754239000,297323000,115931000,115931000,0,2051562000,3813261000,3574391000,1137804000,1138243000,284453000,31421000,990253000,-242357000,0,-781967000,-781967000,16067000,-242357000,-226290000,0,0,0,0,0,0,0,0,0,-41000000,-1049257000,255527000,154474000,101053000,-94944000,0,-94944000,-165107000,0,-165107000,-43317000,-47841000,747896000
208,Semiconductor Equipment & Materials,1591066000,879413000,711653000,254860000,239278000,254860000,-42310000,4652000,121081000,472375000,1351788000,63189000,42310000,284807000,121140000,360359000,1,1,351911000,234409000,163393000,30856000,92849000,34427000,287098000,932397000,529704000,13744000,0,695044000,1028996000,333952000,0,1583689000,4000000,260433000,0,264433000,976311000,109453000,71586000,71586000,0,1085764000,366127000,1165889000,254860000,254860000,141403000,19629000,382298000,-112355000,0,-277369000,-277369000,0,-112355000,-112355000,0,0,0,0,0,0,0,0,0,3884000,-385840000,-4000000,-4000000,0,-40566000,0,-40566000,-80321000,0,-80321000,-9224000,-126820000,269943000,1859313000,1009591000,849722000,294969000,395445000,294969000,-47814000,786000,6656000,454277000,1463868000,59318000,47814000,265128000,136057000,402101000,2,2,580893000,264392000,194309000,32316000,97319000,43892000,323944000,1234257000,571291000,11960000,0,748037000,1085669000,337632000,0,1683439000,0,302626000,0,302626000,1125513000,110063000,73606000,73606000,0,1235576000,577833000,1379494000,294969000,294969000,136522000,22920000,446674000,-131752000,0,-111912000,-111912000,0,-131752000,-131752000,0,0,0,0,0,0,0,0,0,338000,-243326000,149000000,149000000,0,-43245000,0,-43245000,-44563000,0,-44563000,-47781000,22149000,314922000,2298893000,1239229000,1059664000,409126000,551768000,409126000,-40997000,243000,-31695000,507896000,1747125000,69950000,40997000,292408000,167632000,520073000,3,3,402565000,347413000,242970000,40257000,191986000,52867000,475213000,1313370000,720661000,11379000,0,793702000,1128815000,335113000,0,1878526000,0,379001000,0,379001000,997128000,101986000,64170000,64170000,0,1099114000,879776000,1713781000,409126000,409126000,138167000,29884000,400454000,-210626000,0,-91942000,-91942000,0,-210626000,-210626000,0,0,0,0,0,0,0,0,0,4450000,-298118000,-150000000,-150000000,0,-43545000,0,-43545000,-42365000,0,-67109000,-40587000,-276497000,189828000,3282033000,1885620000,1396413000,208920000,479981000,208920000,-208975000,3694000,-23926000,916432000,2802052000,38160000,208975000,543485000,228994000,456055000,1,1,563439000,535485000,415057000,60182000,337576000,129297000,812815000,2335185000,1488277000,36242000,0,4408331000,6250286000,1841955000,0,7803672000,151965000,599329000,10637000,761931000,5713644000,445282000,391192000,391192000,0,6158926000,1031391000,3218000000,208920000,208920000,279324000,66577000,352283000,-466192000,0,-4474925000,-4474925000,0,-466192000,-466192000,0,0,0,0,0,0,0,0,0,-4592000,-4945709000,4930753000,4930753000,135000000,-57309000,0,-57309000,0,0,0,-123409000,4766203000,-113909000


In [159]:
# Imputation transformer

trf1 = ColumnTransformer([
    ('Impute_age', SimpleImputer(strategy='constant',fill_value=0),slice(1,426)),
     ],remainder='passthrough')

In [88]:
# One hot encoding - Apply to first column (Industry)

trf2 = ColumnTransformer([
    ('ohe_sex_embarked',OneHotEncoder(sparse_output=False, handle_unknown='ignore'),[0]),
       ],remainder='passthrough')

In [116]:
# Scaling 
trf3 = ColumnTransformer([
    ('scale',MinMaxScaler(),slice(0,426)),
       ],remainder='passthrough')

In [198]:
# Resample

trf4 = SMOTE()

In [172]:
# Feature selection

trf5 = SelectKBest(score_func=chi2,k=50)

In [192]:
# train the model
trf6 = xgb.XGBClassifier(random_state=666)

In [199]:
from imblearn.pipeline import Pipeline
# Create Pipeline

pipe = Pipeline([
    ('trf1',trf2),
    ('trf2',trf1),
    ('trf3',trf3),
    ('trf4',trf4),
    ('trf5',trf5)
])

In [196]:
X_train.head()

Unnamed: 0,industry,0_inc_totalRevenue,0_inc_costOfRevenue,0_inc_grossProfit,0_inc_netIncome,0_inc_operatingIncome,0_inc_netIncomeContinuousOperations,0_inc_netInterestIncome,0_inc_interestIncome,0_inc_otherIncomeExpense,0_inc_operatingExpense,0_inc_totalExpenses,0_inc_taxProvision,0_inc_interestExpense,0_inc_SGA,0_inc_researchAndDevelopment,0_inc_ebit,0_inc_dilutedEPS,0_inc_basicEPS,0_bal_cashEquivalent,0_bal_receivables,0_bal_finishedGoods,0_bal_workInProcess,0_bal_rawMaterials,0_bal_otherCurrentAssets,0_bal_inventory,0_bal_currentAssets,0_bal_netPPE,0_bal_otherNonCurrentAssets,0_bal_financialAssets,0_bal_goodwill,0_bal_goodwillAndOtherIntangibleAssets,0_bal_otherIntangibleAssets,0_bal_nonCurrentAccountsReceivable,0_bal_totalNonCurrentAssets,0_bal_currentDebt,0_bal_payablesAndAccruedExpenses,0_bal_otherCurrentLiabilities,0_bal_currentLiabilities,0_bal_longTermDebt,0_bal_otherNonCurrentLiabilities,0_bal_nonCurrentDeferredLiabilities,0_bal_nonCurrentDeferredTaxesLiabilities,0_bal_longTermProvisions,0_bal_totalNonCurrentLiabilities,0_bal_retainedEarnings,0_bal_stockholdersEquity,0_cas_netIncome,0_cas_netIncomeFromContinuingOperations,0_cas_depreciation,0_cas_stockBasedCompensation,0_cas_cashFlowOperatingActivities,0_cas_capitalExpenditure,0_cas_saleOfBusiness,0_cas_purchaseOfBusiness,0_cas_netBusinessPurchaseAndSale,0_cas_saleOfPPE,0_cas_purchaseOfPPE,0_cas_netPPEPurchaseAndSale,0_cas_saleOfInvestment,0_cas_purchaseOfInvestment,0_cas_netInvestmentPurchaseAndSale,0_cas_saleOfInvestmentProperties,0_cas_purchaseOfInvestmentProperties,0_cas_netInvestmentPropertiesPurchaseAndSale,0_cas_saleOfIntangibles,0_cas_purchaseOfIntangibles,0_cas_netIntangiblesPurchaseAndSale,0_cas_netOtherInvestingChanges,0_cas_cashFlowInvestingActivities,0_cas_netIssuancePaymentsOfDebt,0_cas_netLongTermDebtIssuance,0_cas_netShortTermDebtIssuance,0_cas_commonStockDividendPaid,0_cas_preferredStockDividendPaid,0_cas_cashDividendsPaid,0_cas_netCommonStockIssuance,0_cas_netPreferredStockIssuance,0_cas_repurchaseOfCapitalStock,0_cas_netOtherFinancingCharges,0_cas_cashFlowFinancingActivities,0_cas_freeCashFlow,1_inc_totalRevenue,1_inc_costOfRevenue,1_inc_grossProfit,1_inc_netIncome,1_inc_operatingIncome,1_inc_netIncomeContinuousOperations,1_inc_netInterestIncome,1_inc_interestIncome,1_inc_otherIncomeExpense,1_inc_operatingExpense,1_inc_totalExpenses,1_inc_taxProvision,1_inc_interestExpense,1_inc_SGA,1_inc_researchAndDevelopment,1_inc_ebit,1_inc_dilutedEPS,1_inc_basicEPS,1_bal_cashEquivalent,1_bal_receivables,1_bal_finishedGoods,1_bal_workInProcess,1_bal_rawMaterials,1_bal_otherCurrentAssets,1_bal_inventory,1_bal_currentAssets,1_bal_netPPE,1_bal_otherNonCurrentAssets,1_bal_financialAssets,1_bal_goodwill,1_bal_goodwillAndOtherIntangibleAssets,1_bal_otherIntangibleAssets,1_bal_nonCurrentAccountsReceivable,1_bal_totalNonCurrentAssets,1_bal_currentDebt,1_bal_payablesAndAccruedExpenses,1_bal_otherCurrentLiabilities,1_bal_currentLiabilities,1_bal_longTermDebt,1_bal_otherNonCurrentLiabilities,1_bal_nonCurrentDeferredLiabilities,1_bal_nonCurrentDeferredTaxesLiabilities,1_bal_longTermProvisions,1_bal_totalNonCurrentLiabilities,1_bal_retainedEarnings,1_bal_stockholdersEquity,1_cas_netIncome,1_cas_netIncomeFromContinuingOperations,1_cas_depreciation,1_cas_stockBasedCompensation,1_cas_cashFlowOperatingActivities,1_cas_capitalExpenditure,1_cas_saleOfBusiness,1_cas_purchaseOfBusiness,1_cas_netBusinessPurchaseAndSale,1_cas_saleOfPPE,1_cas_purchaseOfPPE,1_cas_netPPEPurchaseAndSale,1_cas_saleOfInvestment,1_cas_purchaseOfInvestment,1_cas_netInvestmentPurchaseAndSale,1_cas_saleOfInvestmentProperties,1_cas_purchaseOfInvestmentProperties,1_cas_netInvestmentPropertiesPurchaseAndSale,1_cas_saleOfIntangibles,1_cas_purchaseOfIntangibles,1_cas_netIntangiblesPurchaseAndSale,1_cas_netOtherInvestingChanges,1_cas_cashFlowInvestingActivities,1_cas_netIssuancePaymentsOfDebt,1_cas_netLongTermDebtIssuance,1_cas_netShortTermDebtIssuance,1_cas_commonStockDividendPaid,1_cas_preferredStockDividendPaid,1_cas_cashDividendsPaid,1_cas_netCommonStockIssuance,1_cas_netPreferredStockIssuance,1_cas_repurchaseOfCapitalStock,1_cas_netOtherFinancingCharges,1_cas_cashFlowFinancingActivities,1_cas_freeCashFlow,2_inc_totalRevenue,2_inc_costOfRevenue,2_inc_grossProfit,2_inc_netIncome,2_inc_operatingIncome,2_inc_netIncomeContinuousOperations,2_inc_netInterestIncome,2_inc_interestIncome,2_inc_otherIncomeExpense,2_inc_operatingExpense,2_inc_totalExpenses,2_inc_taxProvision,2_inc_interestExpense,2_inc_SGA,2_inc_researchAndDevelopment,2_inc_ebit,2_inc_dilutedEPS,2_inc_basicEPS,2_bal_cashEquivalent,2_bal_receivables,2_bal_finishedGoods,2_bal_workInProcess,2_bal_rawMaterials,2_bal_otherCurrentAssets,2_bal_inventory,2_bal_currentAssets,2_bal_netPPE,2_bal_otherNonCurrentAssets,2_bal_financialAssets,2_bal_goodwill,2_bal_goodwillAndOtherIntangibleAssets,2_bal_otherIntangibleAssets,2_bal_nonCurrentAccountsReceivable,2_bal_totalNonCurrentAssets,2_bal_currentDebt,2_bal_payablesAndAccruedExpenses,2_bal_otherCurrentLiabilities,2_bal_currentLiabilities,2_bal_longTermDebt,2_bal_otherNonCurrentLiabilities,2_bal_nonCurrentDeferredLiabilities,2_bal_nonCurrentDeferredTaxesLiabilities,2_bal_longTermProvisions,2_bal_totalNonCurrentLiabilities,2_bal_retainedEarnings,2_bal_stockholdersEquity,2_cas_netIncome,2_cas_netIncomeFromContinuingOperations,2_cas_depreciation,2_cas_stockBasedCompensation,2_cas_cashFlowOperatingActivities,2_cas_capitalExpenditure,2_cas_saleOfBusiness,2_cas_purchaseOfBusiness,2_cas_netBusinessPurchaseAndSale,2_cas_saleOfPPE,2_cas_purchaseOfPPE,2_cas_netPPEPurchaseAndSale,2_cas_saleOfInvestment,2_cas_purchaseOfInvestment,2_cas_netInvestmentPurchaseAndSale,2_cas_saleOfInvestmentProperties,2_cas_purchaseOfInvestmentProperties,2_cas_netInvestmentPropertiesPurchaseAndSale,2_cas_saleOfIntangibles,2_cas_purchaseOfIntangibles,2_cas_netIntangiblesPurchaseAndSale,2_cas_netOtherInvestingChanges,2_cas_cashFlowInvestingActivities,2_cas_netIssuancePaymentsOfDebt,2_cas_netLongTermDebtIssuance,2_cas_netShortTermDebtIssuance,2_cas_commonStockDividendPaid,2_cas_preferredStockDividendPaid,2_cas_cashDividendsPaid,2_cas_netCommonStockIssuance,2_cas_netPreferredStockIssuance,2_cas_repurchaseOfCapitalStock,2_cas_netOtherFinancingCharges,2_cas_cashFlowFinancingActivities,2_cas_freeCashFlow,3_inc_totalRevenue,3_inc_costOfRevenue,3_inc_grossProfit,3_inc_netIncome,3_inc_operatingIncome,3_inc_netIncomeContinuousOperations,3_inc_netInterestIncome,3_inc_interestIncome,3_inc_otherIncomeExpense,3_inc_operatingExpense,3_inc_totalExpenses,3_inc_taxProvision,3_inc_interestExpense,3_inc_SGA,3_inc_researchAndDevelopment,3_inc_ebit,3_inc_dilutedEPS,3_inc_basicEPS,3_bal_cashEquivalent,3_bal_receivables,3_bal_finishedGoods,3_bal_workInProcess,3_bal_rawMaterials,3_bal_otherCurrentAssets,3_bal_inventory,3_bal_currentAssets,3_bal_netPPE,3_bal_otherNonCurrentAssets,3_bal_financialAssets,3_bal_goodwill,3_bal_goodwillAndOtherIntangibleAssets,3_bal_otherIntangibleAssets,3_bal_nonCurrentAccountsReceivable,3_bal_totalNonCurrentAssets,3_bal_currentDebt,3_bal_payablesAndAccruedExpenses,3_bal_otherCurrentLiabilities,3_bal_currentLiabilities,3_bal_longTermDebt,3_bal_otherNonCurrentLiabilities,3_bal_nonCurrentDeferredLiabilities,3_bal_nonCurrentDeferredTaxesLiabilities,3_bal_longTermProvisions,3_bal_totalNonCurrentLiabilities,3_bal_retainedEarnings,3_bal_stockholdersEquity,3_cas_netIncome,3_cas_netIncomeFromContinuingOperations,3_cas_depreciation,3_cas_stockBasedCompensation,3_cas_cashFlowOperatingActivities,3_cas_capitalExpenditure,3_cas_saleOfBusiness,3_cas_purchaseOfBusiness,3_cas_netBusinessPurchaseAndSale,3_cas_saleOfPPE,3_cas_purchaseOfPPE,3_cas_netPPEPurchaseAndSale,3_cas_saleOfInvestment,3_cas_purchaseOfInvestment,3_cas_netInvestmentPurchaseAndSale,3_cas_saleOfInvestmentProperties,3_cas_purchaseOfInvestmentProperties,3_cas_netInvestmentPropertiesPurchaseAndSale,3_cas_saleOfIntangibles,3_cas_purchaseOfIntangibles,3_cas_netIntangiblesPurchaseAndSale,3_cas_netOtherInvestingChanges,3_cas_cashFlowInvestingActivities,3_cas_netIssuancePaymentsOfDebt,3_cas_netLongTermDebtIssuance,3_cas_netShortTermDebtIssuance,3_cas_commonStockDividendPaid,3_cas_preferredStockDividendPaid,3_cas_cashDividendsPaid,3_cas_netCommonStockIssuance,3_cas_netPreferredStockIssuance,3_cas_repurchaseOfCapitalStock,3_cas_netOtherFinancingCharges,3_cas_cashFlowFinancingActivities,3_cas_freeCashFlow
319,Specialty Industrial Machinery,2451900000,1540200000,911700000,159100000,275300000,159100000,-88900000,0,4500000,636400000,2176600000,31800000,88900000,436400000,0,279800000,0,0,505500000,459100000,71400000,47600000,370500000,76800000,502500000,1543900000,326600000,212200000,0,1287700000,2542700000,1255000000,0,3084500000,7600000,567000000,0,574600000,1603800000,229400000,251000000,251000000,0,2183900000,-141400000,1869900000,159100000,159100000,178100000,19200000,343300000,-43200000,0,-12000000,-12000000,900000,0,900000,0,0,0,0,0,0,0,0,0,0,-54300000,-32800000,-32800000,0,0,0,0,-18600000,0,-18600000,-2800000,-11500000,300100000,4910200000,3296800000,1613400000,-33300000,105600000,-32400000,-111100000,0,-13900000,1507800000,4804600000,13000000,111100000,894800000,0,91700000,0,0,1750900000,966600000,258400000,88600000,587800000,201000000,943600000,3862100000,797300000,346900000,0,6303600000,11036200000,4732600000,0,12196500000,40400000,1458200000,0,1498600000,3859100000,360700000,875700000,875700000,0,5370500000,-175700000,9119700000,-33300000,-32400000,500900000,51300000,914300000,-48700000,0,0,9000000,1800000,0,1800000,0,0,0,0,0,0,0,0,0,0,-37900000,361000000,361000000,0,0,0,0,-2100000,0,-2100000,-52900000,328700000,865600000,5152400000,3163900000,1988500000,562500000,565700000,523400000,-87700000,0,35000000,1422800000,4586700000,-21800000,87700000,1028000000,0,600700000,1,1,2109600000,948600000,283400000,88600000,506600000,186900000,854200000,4114900000,648600000,468700000,0,5981600000,9894300000,3912700000,0,11039600000,38800000,1411800000,17100000,1467700000,3401800000,310100000,708600000,708600000,0,4615600000,378600000,9001500000,562500000,523400000,422100000,87200000,627800000,-64100000,0,0,-974800000,9500000,0,9500000,0,0,0,0,0,0,0,0,0,0,-1029400000,-435700000,-435700000,0,-8200000,0,-8200000,-736800000,0,-736800000,0,-1157000000,551400000,5916300000,3590700000,2325600000,604700000,817300000,593300000,-103200000,0,28100000,1508300000,5036900000,149600000,103200000,1095800000,0,845400000,1,1,1613000000,1122000000,338700000,122200000,625000000,206900000,1025400000,3967300000,624400000,509100000,0,6064200000,9642800000,3578600000,0,10798600000,36500000,1637500000,0,1674000000,2716100000,360800000,610600000,610600000,0,3834700000,950900000,9195800000,604700000,593300000,432800000,78900000,865400000,-94600000,0,-246800000,-246800000,0,0,0,0,0,0,0,0,0,0,0,0,4100000,-337300000,-655600000,-655600000,0,-32400000,0,-32400000,-261100000,0,-261100000,-24200000,-954000000,765700000
37,Semiconductors,6731000000,3863000000,2868000000,341000000,631000000,341000000,-94000000,15000000,-165000000,2237000000,6100000000,31000000,94000000,750000000,1547000000,466000000,0,0,1503000000,1859000000,197000000,691000000,94000000,233000000,982000000,4597000000,705000000,379000000,0,289000000,289000000,210000000,0,1431000000,43000000,2285000000,74000000,2359000000,685000000,157000000,0,0,0,842000000,-7095000000,2827000000,341000000,341000000,258000000,197000000,493000000,-217000000,0,0,0,0,-217000000,-217000000,325000000,-284000000,41000000,0,0,0,0,0,0,27000000,-149000000,-473000000,-403000000,-70000000,0,0,0,0,0,0,-7000000,43000000,493000000,9763000000,5416000000,4347000000,2490000000,1369000000,2490000000,-47000000,8000000,-47000000,2978000000,8394000000,-1210000000,47000000,995000000,1983000000,1322000000,2,2,2290000000,2076000000,167000000,1139000000,93000000,378000000,1399000000,6143000000,849000000,373000000,0,289000000,289000000,229000000,0,2819000000,0,2342000000,75000000,2417000000,531000000,177000000,0,0,0,708000000,-4605000000,5837000000,2490000000,2490000000,354000000,274000000,1071000000,-294000000,0,0,0,0,-294000000,-294000000,192000000,-850000000,-658000000,0,0,0,0,0,0,0,-952000000,0,-200000000,200000000,0,0,0,-78000000,0,-78000000,-1000000,6000000,1071000000,16434000000,8505000000,7929000000,3162000000,3648000000,3162000000,-34000000,8000000,55000000,4281000000,12786000000,513000000,34000000,1448000000,2845000000,3703000000,2,2,3608000000,2708000000,197000000,1676000000,82000000,312000000,1955000000,8583000000,1069000000,1478000000,0,289000000,289000000,328000000,0,3836000000,312000000,3830000000,98000000,4240000000,349000000,333000000,0,0,0,682000000,-1451000000,7497000000,3162000000,3162000000,463000000,379000000,3521000000,-301000000,0,0,0,0,-301000000,-301000000,1678000000,-2056000000,-378000000,0,0,0,0,0,0,-7000000,-686000000,0,0,0,0,0,0,-1999000000,0,-1999000000,0,-1895000000,3521000000,23601000000,12998000000,10603000000,1320000000,1264000000,1320000000,-88000000,65000000,8000000,9339000000,22337000000,-122000000,88000000,2336000000,5005000000,1272000000,0,0,5855000000,4128000000,892000000,2648000000,231000000,1265000000,3771000000,15019000000,1973000000,2152000000,0,24177000000,48295000000,24118000000,0,52561000000,0,6033000000,336000000,6369000000,2863000000,1664000000,1934000000,1934000000,0,6461000000,-131000000,54750000000,1320000000,1320000000,4262000000,1081000000,3565000000,-450000000,2366000000,-1544000000,822000000,0,-450000000,-450000000,4310000000,-2667000000,1643000000,0,0,0,0,0,0,-16000000,1999000000,679000000,679000000,0,0,0,0,-4108000000,0,-4108000000,-2000000,-3264000000,3565000000
57,Electronic Gaming & Multimedia,6489000000,2094000000,4395000000,1503000000,1739000000,1503000000,26000000,79000000,-132000000,2656000000,4750000000,130000000,90000000,1658000000,998000000,1739000000,1,1,5794000000,848000000,0,0,0,618000000,32000000,7292000000,253000000,658000000,0,9764000000,10349000000,585000000,0,12553000000,0,1540000000,0,2915000000,2675000000,945000000,505000000,505000000,0,4125000000,7813000000,12805000000,1503000000,1503000000,553000000,166000000,1831000000,-116000000,0,0,0,0,0,0,153000000,-65000000,88000000,0,0,0,0,0,0,6000000,-22000000,0,0,0,-283000000,0,-283000000,105000000,0,0,-59000000,-237000000,1715000000,8086000000,2260000000,5826000000,2197000000,2828000000,2197000000,-87000000,21000000,-125000000,2998000000,5258000000,419000000,99000000,1848000000,1150000000,2828000000,2,2,8647000000,1052000000,0,0,0,866000000,0,10565000000,209000000,641000000,0,9765000000,10376000000,611000000,0,12544000000,0,1411000000,0,3100000000,3605000000,949000000,418000000,418000000,0,4972000000,9691000000,15037000000,2197000000,2197000000,446000000,218000000,2252000000,-78000000,0,0,0,0,0,0,121000000,-221000000,-100000000,0,0,0,0,0,0,0,-178000000,944000000,944000000,0,-316000000,0,-316000000,170000000,0,0,-87000000,711000000,2174000000,8803000000,2317000000,6486000000,2699000000,3336000000,2699000000,-95000000,5000000,-77000000,3150000000,5467000000,465000000,108000000,1813000000,1337000000,3336000000,3,3,10423000000,972000000,0,0,0,1161000000,0,12556000000,169000000,497000000,0,9799000000,10457000000,658000000,0,12500000000,0,1293000000,0,2411000000,3608000000,932000000,506000000,506000000,0,5046000000,12025000000,17599000000,2699000000,2699000000,440000000,508000000,2414000000,-80000000,0,0,0,0,0,0,280000000,-248000000,32000000,0,0,0,0,0,0,-11000000,-59000000,0,0,0,0,0,-365000000,90000000,0,0,-246000000,-521000000,2334000000,7528000000,2222000000,5306000000,1513000000,1667000000,1513000000,-108000000,0,185000000,3639000000,5861000000,231000000,108000000,2218000000,1421000000,1852000000,1,1,11992000000,1204000000,0,0,0,1273000000,0,14469000000,193000000,508000000,0,9929000000,11012000000,1083000000,0,12914000000,0,1467000000,1143000000,3555000000,3611000000,816000000,158000000,158000000,0,4585000000,13171000000,19243000000,1513000000,1513000000,319000000,462000000,2220000000,-91000000,0,-135000000,-135000000,0,0,0,239000000,-5008000000,-4769000000,0,0,0,0,0,0,1000000,-4994000000,0,0,0,-367000000,0,-367000000,47000000,0,0,-214000000,-534000000,2129000000
605,Recreational Vehicles,7864758000,6891664000,973094000,133275000,361412000,132465000,-60032000,8080000,-116714000,611682000,7503346000,52201000,60032000,536044000,0,244698000,2,2,451262000,716227000,291076000,126636000,455820000,0,827988000,2037357000,1092471000,82418000,0,1358032000,2328843000,970811000,0,3623089000,17370000,1251301000,44094000,1448325000,1885253000,231640000,135703000,135703000,0,2116893000,2066674000,2095228000,133275000,132465000,148777000,18950000,508019000,-130224000,0,-1735854000,-1735854000,2732000,-130224000,-127492000,0,0,0,0,0,0,0,0,0,-2157000,-1865503000,1696647000,1696647000,0,-84139000,0,-84139000,0,0,0,-73435000,1539073000,377795000,8167933000,7049726000,1118207000,222974000,386854000,221384000,-104206000,3116000,-9752000,731353000,7781079000,51512000,104206000,634119000,0,377102000,4,4,541363000,814227000,197076000,128181000,438007000,0,716305000,2102277000,1107649000,91531000,0,1476541000,2391265000,914724000,0,3669183000,13817000,1305442000,35939000,1515281000,1652831000,257779000,123802000,123802000,0,1910610000,2201330000,2345569000,222974000,221384000,196167000,19889000,540941000,-106697000,0,0,0,27677000,-106697000,-79020000,0,0,0,0,0,0,0,0,0,-5229000,-84249000,-290662000,-289898000,-764000,-88318000,0,-88318000,0,0,0,-13936000,-392916000,434244000,12317380000,10422407000,1894973000,659872000,907874000,660870000,-93545000,797000,30252000,987099000,11409506000,183711000,94342000,869916000,0,938923000,11,11,445852000,949932000,172653000,376594000,895027000,0,1369384000,2803523000,1185131000,123792000,0,1563255000,2500426000,937171000,0,3850565000,12411000,1506893000,25720000,1794785000,1594821000,316376000,113598000,113598000,0,1911197000,2770401000,2921843000,659872000,660870000,230581000,30514000,526482000,-128835000,0,-310938000,-310938000,1950000,-128835000,-126885000,0,0,0,0,0,0,0,0,0,9330000,-428493000,-73559000,-74399000,840000,0,0,-90801000,0,0,0,-24078000,-188438000,526482000,16312525000,13506495000,2806030000,1137804000,1532622000,1138243000,-90092000,1130000,17334000,1273408000,14779903000,321621000,91222000,1116462000,0,1532622000,20,20,311690000,944181000,362881000,397495000,1131849000,0,1754773000,3062479000,1258159000,157901000,0,1804151000,2921643000,1117492000,0,4345653000,13190000,1466551000,21403000,1755916000,1754239000,297323000,115931000,115931000,0,2051562000,3813261000,3574391000,1137804000,1138243000,284453000,31421000,990253000,-242357000,0,-781967000,-781967000,16067000,-242357000,-226290000,0,0,0,0,0,0,0,0,0,-41000000,-1049257000,255527000,154474000,101053000,-94944000,0,-94944000,-165107000,0,-165107000,-43317000,-47841000,747896000
208,Semiconductor Equipment & Materials,1591066000,879413000,711653000,254860000,239278000,254860000,-42310000,4652000,121081000,472375000,1351788000,63189000,42310000,284807000,121140000,360359000,1,1,351911000,234409000,163393000,30856000,92849000,34427000,287098000,932397000,529704000,13744000,0,695044000,1028996000,333952000,0,1583689000,4000000,260433000,0,264433000,976311000,109453000,71586000,71586000,0,1085764000,366127000,1165889000,254860000,254860000,141403000,19629000,382298000,-112355000,0,-277369000,-277369000,0,-112355000,-112355000,0,0,0,0,0,0,0,0,0,3884000,-385840000,-4000000,-4000000,0,-40566000,0,-40566000,-80321000,0,-80321000,-9224000,-126820000,269943000,1859313000,1009591000,849722000,294969000,395445000,294969000,-47814000,786000,6656000,454277000,1463868000,59318000,47814000,265128000,136057000,402101000,2,2,580893000,264392000,194309000,32316000,97319000,43892000,323944000,1234257000,571291000,11960000,0,748037000,1085669000,337632000,0,1683439000,0,302626000,0,302626000,1125513000,110063000,73606000,73606000,0,1235576000,577833000,1379494000,294969000,294969000,136522000,22920000,446674000,-131752000,0,-111912000,-111912000,0,-131752000,-131752000,0,0,0,0,0,0,0,0,0,338000,-243326000,149000000,149000000,0,-43245000,0,-43245000,-44563000,0,-44563000,-47781000,22149000,314922000,2298893000,1239229000,1059664000,409126000,551768000,409126000,-40997000,243000,-31695000,507896000,1747125000,69950000,40997000,292408000,167632000,520073000,3,3,402565000,347413000,242970000,40257000,191986000,52867000,475213000,1313370000,720661000,11379000,0,793702000,1128815000,335113000,0,1878526000,0,379001000,0,379001000,997128000,101986000,64170000,64170000,0,1099114000,879776000,1713781000,409126000,409126000,138167000,29884000,400454000,-210626000,0,-91942000,-91942000,0,-210626000,-210626000,0,0,0,0,0,0,0,0,0,4450000,-298118000,-150000000,-150000000,0,-43545000,0,-43545000,-42365000,0,-67109000,-40587000,-276497000,189828000,3282033000,1885620000,1396413000,208920000,479981000,208920000,-208975000,3694000,-23926000,916432000,2802052000,38160000,208975000,543485000,228994000,456055000,1,1,563439000,535485000,415057000,60182000,337576000,129297000,812815000,2335185000,1488277000,36242000,0,4408331000,6250286000,1841955000,0,7803672000,151965000,599329000,10637000,761931000,5713644000,445282000,391192000,391192000,0,6158926000,1031391000,3218000000,208920000,208920000,279324000,66577000,352283000,-466192000,0,-4474925000,-4474925000,0,-466192000,-466192000,0,0,0,0,0,0,0,0,0,-4592000,-4945709000,4930753000,4930753000,135000000,-57309000,0,-57309000,0,0,0,-123409000,4766203000,-113909000


In [201]:
pipe.fit(X,y)

Pipeline(steps=[('trf1',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('ohe_sex_embarked',
                                                  OneHotEncoder(handle_unknown='ignore',
                                                                sparse_output=False),
                                                  [0])])),
                ('trf2',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('Impute_age',
                                                  SimpleImputer(fill_value=0,
                                                                strategy='constant'),
                                                  slice(1, 426, None))])),
                ('trf3',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('scale', MinMaxScaler(),
                                                  slice(0, 426,

NameError: name 'RepeatedStratifiedKFold' is not defined

## Pipeline vs make_pipeline

Pipeline requires naming of steps, make_pipeline does not.

Same applies to Column_Transformer vs make_column_transformer)

In [130]:
from sklearn.pipeline import Pipeline, make_pipeline
# Alternate Syntax
pipe = make_pipeline(trf1,trf2,trf3,trf4,trf5)

In [174]:
# train
pipe.fit(X_train,y_train)

Pipeline(steps=[('trf1',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('ohe_sex_embarked',
                                                  OneHotEncoder(handle_unknown='ignore',
                                                                sparse_output=False),
                                                  [0])])),
                ('trf2',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('Impute_age',
                                                  SimpleImputer(fill_value=0,
                                                                strategy='constant'),
                                                  slice(1, 426, None))])),
                ('trf3',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('scale', MinMaxScaler(),
                                                  slice(0, 426,

## Explore Pipeline

In [162]:
pipe.named_steps['trf1']

ColumnTransformer(remainder='passthrough',
                  transformers=[('Impute_age',
                                 SimpleImputer(fill_value=0,
                                               strategy='constant'),
                                 slice(1, 426, None))])

In [175]:
# Predict 
y_pred = pipe.predict(X_test)

In [170]:
x_pred

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
       3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 2,
       2, 2, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 1, 2, 2, 3, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3])

In [176]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.3916083916083916

## Imbalanced Data - Use Smote

## Cross Validation using Pipeline

In [204]:
from sklearn.model_selection import cross_val_score
cross_val_score(pipe, X, y, cv=3, scoring='accuracy').mean()

ValueError: 
All the 3 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/pipeline.py", line 293, in fit
    Xt, yt = self._fit(X, y, **fit_params_steps)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/pipeline.py", line 250, in _fit
    X, y, fitted_transformer = fit_resample_one_cached(
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/joblib/memory.py", line 353, in __call__
    return self.func(*args, **kwargs)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/pipeline.py", line 422, in _fit_resample_one
    X_res, y_res = sampler.fit_resample(X, y, **fit_params)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/base.py", line 208, in fit_resample
    return super().fit_resample(X, y)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/base.py", line 112, in fit_resample
    output = self._fit_resample(X, y)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 364, in _fit_resample
    nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 808, in kneighbors
    raise ValueError(
ValueError: Expected n_neighbors <= n_samples,  but n_samples = 5, n_neighbors = 6

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/pipeline.py", line 293, in fit
    Xt, yt = self._fit(X, y, **fit_params_steps)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/pipeline.py", line 250, in _fit
    X, y, fitted_transformer = fit_resample_one_cached(
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/joblib/memory.py", line 353, in __call__
    return self.func(*args, **kwargs)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/pipeline.py", line 422, in _fit_resample_one
    X_res, y_res = sampler.fit_resample(X, y, **fit_params)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/base.py", line 208, in fit_resample
    return super().fit_resample(X, y)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/base.py", line 112, in fit_resample
    output = self._fit_resample(X, y)
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 364, in _fit_resample
    nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
  File "/home/priyesh/projects/MD7/venv/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 808, in kneighbors
    raise ValueError(
ValueError: Expected n_neighbors <= n_samples,  but n_samples = 4, n_neighbors = 6


In [None]:
oversample = SMOTE()
undersample = RandomUnderSampler()
#steps = [("o", oversample),("u",undersample)]
steps = [("o", oversample)]
pipeline = Pipeline(steps=steps)
# transform the dataset 
X_encoded, y = pipeline.fit_resample(X_encoded,y)