In [1]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.feature_selection import SelectKBest, f_regression, f_classif
import gc
gc.enable()
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, auc, roc_curve
from bayes_opt import BayesianOptimization
import lightgbm as lgb
from sklearn.calibration import calibration_curve,CalibratedClassifierCV
from pycaret.classification import *


file = "dataset/transformed_dataset.csv"
reg_cols = ['atlas_pct_diabetes_adults13',
 'atlas_pct_wic15',
 'total_physician_office_net_paid_pmpm_cost_9to12m_b4',
 'atlas_pct_laccess_hisp15',
 'atlas_pct_fmrkt_frveg16',
 'credit_hh_nonmtgcredit_60dpd',
 'atlas_dirsales_farms12',
 'rx_nonmaint_pmpm_ct',
 'zip_cd',
 'atlas_pct_laccess_white15',
 'credit_hh_bankcard_severederog',
 'atlas_pct_fmrkt_credit16',
 'credit_bal_autofinance_new',
 'rej_days_since_last_clm',
 'rx_generic_pmpm_ct_0to3m_b4',
 'rwjf_social_associate_rate',
 'med_physician_office_ds_clm_6to9m_b4',
 'atlas_totalocchu',
 'atlas_veg_acrespth12',
 'atlas_pct_loclsale12',
 'atlas_pct_fmrkt_anmlprod16',
 'atlas_freshveg_farms12',
 'rwjf_resident_seg_black_inx',
 'atlas_pct_loclfarm12',
 'total_outpatient_mbr_resp_pmpm_cost_6to9m_b4',
 'atlas_berry_acrespth12',
 'rx_maint_pmpm_ct_9to12m_b4',
 'rx_tier_2_pmpm_ct',
 'atlas_agritrsm_rct12',
 'atlas_pct_laccess_snap15',
 'atlas_deep_pov_all',
 'ccsp_227_pct',
 'bh_outpatient_net_paid_pmpm_cost',
 'atlas_veg_farms12',
 'rx_hum_16_pmpm_ct',
 'cms_risk_adjustment_factor_a_amt',
 'atlas_recfac14',
 'total_physician_office_copay_pmpm_cost',
 'atlas_pc_fsrsales12',
 'atlas_pct_fmrkt_baked16',
 'atlas_net_international_migration_rate',
 'rx_maint_mbr_resp_pmpm_cost_6to9m_b4',
 'rx_generic_pmpm_cost_6to9m_b4',
 'rx_gpi2_49_pmpm_cost_0to3m_b4',
 'atlas_pct_sbp15',
 'atlas_pct_laccess_child15',
 'met_obe_diag_pct',
 'atlas_orchard_acrespth12',
 'atlas_pct_laccess_hhnv15',
 'cnt_cp_webstatement_pmpm_ct',
 'atlas_pct_laccess_lowi15',
 'rx_gpi2_02_pmpm_cost',
 'cms_partd_ra_factor_amt',
 'atlas_pct_free_lunch14',
 'rx_tier_2_pmpm_ct_3to6m_b4',
 'cons_chva',
 'atlas_pct_fmrkt_wiccash16',
 'rx_overall_net_paid_pmpm_cost_6to9m_b4',
 'total_med_allowed_pmpm_cost_9to12m_b4',
 'bh_physician_office_copay_pmpm_cost_6to9m_b4',
 'atlas_pct_snap16',
 'atlas_ghveg_sqftpth12',
 'atlas_pc_dirsales12',
 'atlas_pct_reduced_lunch14',
 'ccsp_236_pct',
 'atlas_deep_pov_children',
 'atlas_pct_sfsp15',
 'rwjf_air_pollute_density',
 'rx_generic_pmpm_cost',
 'cms_tot_partd_payment_amt',
 'cons_nwperadult',
 'rx_days_since_last_script',
 'atlas_pct_laccess_nhasian15',
 'rx_nonbh_mbr_resp_pmpm_cost_6to9m_b4',
 'rx_days_since_last_script_6to9m_b4',
 'atlas_pct_obese_adults13',
 'credit_bal_consumerfinance',
 'atlas_pct_fmrkt_wic16',
 'atlas_orchard_farms12',
 'atlas_berry_farms12',
 'atlas_pct_laccess_multir15',
 'rx_bh_mbr_resp_pmpm_cost_9to12m_b4',
 'atlas_pc_wic_redemp12',
 'rwjf_mv_deaths_rate',
 'atlas_povertyunder18pct',
 'rx_gpi2_72_pmpm_cost_6to9m_b4',
 'atlas_pct_fmrkt_snap16',
 'atlas_medhhinc',
 'rx_nonbh_net_paid_pmpm_cost',
 'credit_bal_bankcard_severederog',
 'bh_ip_snf_net_paid_pmpm_cost',
 'atlas_pc_snapben15',
 'rx_nonbh_pmpm_ct_0to3m_b4',
 'rx_overall_mbr_resp_pmpm_cost_0to3m_b4',
 'auth_3mth_post_acute_mean_los',
 'rx_branded_mbr_resp_pmpm_cost',
 'rx_tier_1_pmpm_ct_0to3m_b4',
 'bh_ncdm_pct',
 'atlas_naturalchangerate1016',
 'rx_mail_mbr_resp_pmpm_cost_0to3m_b4',
 'credit_bal_autobank',
 'rx_nonotc_dist_gpi6_pmpm_ct',
 'cons_cgqs',
 'rx_overall_gpi_pmpm_ct_0to3m_b4',
 'credit_hh_bankcardcredit_60dpd',
 'rx_gpi2_01_pmpm_cost_0to3m_b4',
 'cci_dia_m_pmpm_ct',
 'atlas_pct_nslp15',
 'mcc_end_pct',
 'atlas_pct_laccess_black15',
 'credit_bal_mtgcredit_new',
 'credit_hh_1stmtgcredit',
 'cons_chmi',
 'rwjf_income_inequ_ratio',
 'atlas_pct_laccess_pop15',
 'atlas_pc_ffrsales12',
 'atlas_hh65plusalonepct',
 'atlas_pct_fmrkt_sfmnp16',
 'auth_3mth_acute_mean_los',
 'rx_hum_28_pmpm_cost',
 'atlas_pct_laccess_nhna15',
 'atlas_povertyallagespct',
 'rx_nonbh_mbr_resp_pmpm_cost',
 'rx_nonmaint_mbr_resp_pmpm_cost_9to12m_b4',
 'atlas_pct_fmrkt_otherfood16',
 'lab_dist_loinc_pmpm_ct',
 'rx_generic_mbr_resp_pmpm_cost',
 'atlas_pct_laccess_seniors15',
 'atlas_pct_cacfp15',
 'total_outpatient_allowed_pmpm_cost_6to9m_b4',
 'rx_nonmaint_mbr_resp_pmpm_cost',
 'credit_bal_nonmtgcredit_60dpd',
 'atlas_ownhomepct',
 'rx_overall_mbr_resp_pmpm_cost',
 'atlas_redemp_snaps16',
 'atlas_netmigrationrate1016',
 'atlas_percapitainc',
 'phy_em_px_pct',
 'rx_generic_mbr_resp_pmpm_cost_0to3m_b4']

cat_cols = ['bh_ncdm_ind',
 'auth_3mth_post_acute_inf',
 'rx_maint_net_paid_pmpm_cost_t_9-6-3m_b4',
 'ccsp_065_pmpm_ct',
 'auth_3mth_acute_vco',
 'rx_gpi2_72_pmpm_ct_6to9m_b4',
 'auth_3mth_post_acute_men',
 'rej_total_physician_office_visit_ct_pmpm_0to3m_b4',
 'total_physician_office_net_paid_pmpm_cost_t_9-6-3m_b4',
 'bh_ip_snf_net_paid_pmpm_cost_0to3m_b4',
 'mcc_ano_pmpm_ct_t_9-6-3m_b4',
 'atlas_type_2015_update',
 'atlas_retirement_destination_2015_upda',
 'auth_3mth_post_acute_sns',
 'atlas_hiamenity',
 'cons_ltmedicr',
 'auth_3mth_acute_ccs_086',
 'total_physician_office_mbr_resp_pmpm_cost_t_9-6-3m_b4',
 'auth_3mth_acute_cir',
 'atlas_csa12',
 'total_med_net_paid_pmpm_cost_t_6-3-0m_b4',
 'cons_n2pwh',
 'auth_3mth_snf_post_hsp',
 'auth_3mth_post_acute_inj',
 'med_outpatient_mbr_resp_pmpm_cost_t_9-6-3m_b4',
 'rx_gpi2_56_dist_gpi6_pmpm_ct_3to6m_b4',
 'atlas_low_employment_2015_update',
 'auth_3mth_acute_inf',
 'lab_albumin_loinc_pmpm_ct',
 'rx_gpi2_17_pmpm_cost_t_12-9-6m_b4',
 'cons_rxadhs',
 'cons_mobplus',
 'atlas_foodinsec_child_03_11',
 'lang_spoken_cd',
 'bh_ip_snf_mbr_resp_pmpm_cost_9to12m_b4',
 'auth_3mth_post_acute_gus',
 'auth_3mth_acute_cad',
 'rx_maint_pmpm_ct_t_6-3-0m_b4',
 'auth_3mth_acute_ccs_044',
 'cons_hxmioc',
 'med_outpatient_visit_ct_pmpm_t_12-9-6m_b4',
 'med_physician_office_allowed_pmpm_cost_t_9-6-3m_b4',
 'auth_3mth_acute_res',
 'auth_3mth_acute_chf',
 'auth_3mth_acute_ccs_030',
 'auth_3mth_dc_hospice',
 'auth_3mth_acute_neo',
 'atlas_type_2015_recreation_no',
 'hum_region',
 'atlas_ghveg_farms12',
 'rx_maint_net_paid_pmpm_cost_t_12-9-6m_b4',
 'auth_3mth_acute_ccs_048',
 'rx_overall_gpi_pmpm_ct_t_6-3-0m_b4',
 'rx_overall_gpi_pmpm_ct_t_12-9-6m_b4',
 'rx_nonbh_pmpm_ct_t_9-6-3m_b4',
 'mcc_chf_pmpm_ct_t_9-6-3m_b4',
 'auth_3mth_post_acute_chf',
 'auth_3mth_psychic',
 'rx_nonotc_pmpm_cost_t_6-3-0m_b4',
 'auth_3mth_acute_end',
 'atlas_low_education_2015_update',
 'src_div_id',
 'auth_3mth_bh_acute',
 'auth_3mth_acute_ccs_067',
 'atlas_type_2015_mining_no',
 'cons_n2pmr',
 'rx_mail_net_paid_pmpm_cost_t_6-3-0m_b4',
 'rej_med_er_net_paid_pmpm_cost_t_9-6-3m_b4',
 'med_outpatient_deduct_pmpm_cost_t_9-6-3m_b4',
 'rej_med_ip_snf_coins_pmpm_cost_t_9-6-3m_b4',
 'rx_generic_dist_gpi6_pmpm_ct_t_9-6-3m_b4',
 'auth_3mth_dc_home',
 'auth_3mth_acute_bld',
 'auth_3mth_acute_ner',
 'oontwk_mbr_resp_pmpm_cost_t_6-3-0m_b4',
 'rx_gpi2_90_dist_gpi6_pmpm_ct_9to12m_b4',
 'atlas_foodhub16',
 'rx_maint_pmpm_cost_t_6-3-0m_b4',
 'auth_3mth_post_acute_ben',
 'est_age',
 'auth_3mth_post_acute_cer',
 'auth_3mth_acute_ccs_153',
 'auth_3mth_acute_dig',
 'total_ip_maternity_net_paid_pmpm_cost_t_12-9-6m_b4',
 'auth_3mth_post_acute_cad',
 'rx_bh_pmpm_ct_0to3m_b4',
 'rx_nonmail_dist_gpi6_pmpm_ct_t_9-6-3m_b4',
 'atlas_persistentchildpoverty_1980_2011',
 'atlas_slhouse12',
 'atlas_population_loss_2015_update',
 'auth_3mth_acute_ccs_094',
 'auth_3mth_post_acute_ner',
 'auth_3mth_acute_ccs_227',
 'rx_overall_dist_gpi6_pmpm_ct_t_6-3-0m_b4',
 'auth_3mth_acute_trm',
 'auth_3mth_post_acute',
 'auth_3mth_acute_dia',
 'auth_3mth_acute_ccs_043',
 'rx_overall_mbr_resp_pmpm_cost_t_6-3-0m_b4',
 'cms_orig_reas_entitle_cd',
 'auth_3mth_post_acute_end',
 'auth_3mth_acute_can',
 'auth_3mth_acute_ccs_172',
 'auth_3mth_dc_home_health',
 'atlas_hipov_1115',
 'rx_phar_cat_cvs_pmpm_ct_t_9-6-3m_b4',
 'rx_gpi2_62_pmpm_cost_t_9-6-3m_b4',
 'cons_n2phi',
 'auth_3mth_post_acute_hdz',
 'auth_3mth_bh_acute_mean_los',
 'auth_3mth_post_acute_dig',
 'auth_3mth_transplant',
 'rx_mail_mbr_resp_pmpm_cost_t_9-6-3m_b4',
 'auth_3mth_acute_sns',
 'auth_3mth_post_acute_vco',
 'auth_3mth_home',
 'rx_nonbh_net_paid_pmpm_cost_t_6-3-0m_b4',
 'auth_3mth_post_acute_ckd',
 'rx_gpi2_34_dist_gpi6_pmpm_ct',
 'rx_gpi2_33_pmpm_ct_0to3m_b4',
 'auth_3mth_dc_ltac',
 'cons_estinv30_rc',
 'rx_phar_cat_humana_pmpm_ct_t_9-6-3m_b4',
 'auth_3mth_acute_men',
 'auth_3mth_dc_snf',
 'cons_hhcomp',
 'bh_ip_snf_mbr_resp_pmpm_cost_6to9m_b4',
 'auth_3mth_acute_inj',
 'total_physician_office_visit_ct_pmpm_t_6-3-0m_b4',
 'mabh_seg',
 'auth_3mth_post_acute_res',
 'auth_3mth_bh_acute_men',
 'auth_3mth_acute_hdz',
 'hedis_dia_hba1c_ge9',
 'auth_3mth_post_acute_trm',
 'auth_3mth_hospice',
 'rx_gpi2_39_pmpm_cost_t_6-3-0m_b4',
 'atlas_vlfoodsec_13_15',
 'auth_3mth_dc_acute_rehab',
 'rx_generic_pmpm_cost_t_6-3-0m_b4',
 'auth_3mth_acute_ccs_154',
 'cons_rxmaint',
 'total_bh_copay_pmpm_cost_t_9-6-3m_b4',
 'rx_nonmaint_dist_gpi6_pmpm_ct_t_12-9-6m_b4',
 'rej_med_outpatient_visit_ct_pmpm_t_6-3-0m_b4',
 'cons_rxadhm',
 'auth_3mth_acute_mus',
 'rx_nonbh_pmpm_cost_t_9-6-3m_b4',
 'rx_days_since_last_script_0to3m_b4',
 'auth_3mth_post_acute_cir',
 'auth_3mth_post_acute_dia',
 'auth_3mth_post_er',
 'auth_3mth_dc_no_ref',
 'bh_ip_snf_mbr_resp_pmpm_cost_3to6m_b4',
 'auth_3mth_acute',
 'rx_branded_pmpm_ct_t_6-3-0m_b4',
 'atlas_farm_to_school13',
 'auth_3mth_acute_cer',
 'med_ambulance_coins_pmpm_cost_t_9-6-3m_b4',
 'auth_3mth_acute_gus',
 'rx_gpi4_6110_pmpm_ct',
 'cons_hxwearbl',
 'auth_3mth_ltac',
 'auth_3mth_acute_ckd',
 'bh_ip_snf_net_paid_pmpm_cost_6to9m_b4',
 'sex_cd',
 'days_since_last_clm_0to3m_b4',
 'atlas_perpov_1980_0711',
 'auth_3mth_post_acute_mus',
 'auth_3mth_non_er',
 'bh_ncal_ind',
 'auth_3mth_facility',
 'atlas_foodinsec_13_15',
 'auth_3mth_dc_left_ama',
 'race_cd',
 'bh_ip_snf_admit_days_pmpm_t_9-6-3m_b4',
 'auth_3mth_dc_other',
 'cons_stlnindx',
 'auth_3mth_acute_skn',
 'total_allowed_pmpm_cost_t_9-6-3m_b4',
 'auth_3mth_rehab',
 'bh_urgent_care_copay_pmpm_cost_t_12-9-6m_b4',
 'auth_3mth_dc_custodial',
 'auth_3mth_snf_direct',
 'auth_3mth_acute_ccs_042',
 'bh_ip_snf_net_paid_pmpm_cost_9to12m_b4',
 'bh_ip_snf_net_paid_pmpm_cost_3to6m_b4',
 'rx_maint_pmpm_cost_t_12-9-6m_b4',
 'auth_3mth_post_acute_rsk',
 'rev_cms_ansth_pmpm_ct',
 'cons_cwht']

imp_features= ['cons_n2pwh',
 'days_since_last_clm_0to3m_b4',
 'cons_cwht',
 'rx_days_since_last_script_0to3m_b4',
 'cons_n2pmr',
 'est_age',
 'cons_n2phi',
 'atlas_foodinsec_13_15',
 'atlas_foodinsec_child_03_11',
 'atlas_vlfoodsec_13_15',
 'atlas_ghveg_farms12',
 'rx_gpi2_17_pmpm_cost_t_12-9-6m_b4',
 'cms_tot_partd_payment_amt',
 'race_cd',
 'src_div_id',
 'cms_risk_adjustment_factor_a_amt',
 'atlas_csa12',
 'rx_overall_mbr_resp_pmpm_cost_t_6-3-0m_b4',
 'rx_bh_pmpm_ct_0to3m_b4',
 'mabh_seg',
 'hum_region',
 'cons_estinv30_rc',
 'rx_generic_pmpm_ct_0to3m_b4',
 'credit_hh_bankcard_severederog',
 'rx_overall_gpi_pmpm_ct_0to3m_b4',
 'cnt_cp_webstatement_pmpm_ct',
 'credit_hh_bankcardcredit_60dpd',
 'rx_gpi2_39_pmpm_cost_t_6-3-0m_b4',
 'credit_hh_nonmtgcredit_60dpd',
 'rx_mail_net_paid_pmpm_cost_t_6-3-0m_b4',
 'cons_nwperadult',
 'cons_cgqs',
 'cons_rxadhm',
 'atlas_totalocchu',
 'cons_hhcomp',
 'rx_nonbh_mbr_resp_pmpm_cost',
 'lab_albumin_loinc_pmpm_ct',
 'rx_phar_cat_cvs_pmpm_ct_t_9-6-3m_b4',
 'rx_tier_2_pmpm_ct',
 'credit_bal_consumerfinance',
 'rx_maint_net_paid_pmpm_cost_t_12-9-6m_b4',
 'rx_generic_pmpm_cost',
 'rx_nonotc_pmpm_cost_t_6-3-0m_b4',
 'cms_orig_reas_entitle_cd',
 'atlas_recfac14',
 'cons_ltmedicr',
 'rx_overall_mbr_resp_pmpm_cost_0to3m_b4',
 'ccsp_227_pct',
 'cons_hxmioc',
 'rx_tier_1_pmpm_ct_0to3m_b4',
 'rx_generic_mbr_resp_pmpm_cost',
 'cons_stlnindx',
 'rx_tier_2_pmpm_ct_3to6m_b4',
 'mcc_chf_pmpm_ct_t_9-6-3m_b4',
 'rx_maint_pmpm_ct_9to12m_b4',
 'total_allowed_pmpm_cost_t_9-6-3m_b4',
 'rx_overall_mbr_resp_pmpm_cost',
 'cms_partd_ra_factor_amt',
 'rx_hum_16_pmpm_ct',
 'rx_generic_pmpm_cost_t_6-3-0m_b4',
 'atlas_pc_ffrsales12',
 'credit_bal_mtgcredit_new',
 'rx_mail_mbr_resp_pmpm_cost_t_9-6-3m_b4',
 'rx_overall_dist_gpi6_pmpm_ct_t_6-3-0m_b4',
 'rx_maint_pmpm_cost_t_6-3-0m_b4',
 'sex_cd',
 'lab_dist_loinc_pmpm_ct',
 'phy_em_px_pct',
 'rx_overall_gpi_pmpm_ct_t_12-9-6m_b4',
 'rx_nonbh_mbr_resp_pmpm_cost_6to9m_b4',
 'atlas_pc_dirsales12',
 'rx_maint_pmpm_cost_t_12-9-6m_b4',
 'rx_maint_net_paid_pmpm_cost_t_9-6-3m_b4',
 'rx_nonmaint_mbr_resp_pmpm_cost',
 'atlas_pct_snap16',
 'rx_generic_mbr_resp_pmpm_cost_0to3m_b4',
 'rx_generic_pmpm_cost_6to9m_b4',
 'credit_bal_nonmtgcredit_60dpd',
 'atlas_hipov_1115',
 'rx_maint_pmpm_ct_t_6-3-0m_b4',
 'cons_rxadhs',
 'med_outpatient_mbr_resp_pmpm_cost_t_9-6-3m_b4',
 'atlas_pct_sbp15',
 'atlas_pct_cacfp15',
 'total_med_net_paid_pmpm_cost_t_6-3-0m_b4',
 'rx_nonmaint_mbr_resp_pmpm_cost_9to12m_b4',
 'rx_overall_gpi_pmpm_ct_t_6-3-0m_b4',
 'atlas_pct_wic15',
 'lang_spoken_cd',
 'cci_dia_m_pmpm_ct',
 'atlas_pct_loclsale12',
 'met_obe_diag_pct',
 'rwjf_mv_deaths_rate',
 'atlas_pct_laccess_nhasian15',
 'rx_generic_dist_gpi6_pmpm_ct_t_9-6-3m_b4',
 'cons_rxmaint',
 'atlas_pct_reduced_lunch14',
 'cons_chva',
 'rx_nonotc_dist_gpi6_pmpm_ct',
 'atlas_netmigrationrate1016',
 'rx_nonbh_pmpm_cost_t_9-6-3m_b4',
 'cons_chmi',
 'rx_nonbh_pmpm_ct_t_9-6-3m_b4',
 'rx_phar_cat_humana_pmpm_ct_t_9-6-3m_b4',
 'rx_nonbh_pmpm_ct_0to3m_b4',
 'rx_nonbh_net_paid_pmpm_cost_t_6-3-0m_b4',
 'atlas_pct_fmrkt_otherfood16',
 'rx_gpi2_56_dist_gpi6_pmpm_ct_3to6m_b4',
 'atlas_pct_fmrkt_sfmnp16',
 'atlas_pct_nslp15',
 'auth_3mth_dc_home',
 'rej_med_outpatient_visit_ct_pmpm_t_6-3-0m_b4',
 'atlas_pct_laccess_hhnv15',
 'atlas_pct_laccess_lowi15',
 'atlas_net_international_migration_rate',
 'atlas_retirement_destination_2015_upda',
 'rx_days_since_last_script',
 'total_physician_office_mbr_resp_pmpm_cost_t_9-6-3m_b4',
 'rx_gpi2_49_pmpm_cost_0to3m_b4',
 'rx_gpi2_72_pmpm_ct_6to9m_b4',
 'atlas_pc_fsrsales12',
 'atlas_pct_sfsp15',
 'rx_nonmaint_dist_gpi6_pmpm_ct_t_12-9-6m_b4',
 'rx_gpi2_34_dist_gpi6_pmpm_ct',
 'auth_3mth_acute',
 'rx_nonmail_dist_gpi6_pmpm_ct_t_9-6-3m_b4',
 'atlas_orchard_farms12',
 'zip_cd',
 'credit_hh_1stmtgcredit',
 'atlas_pct_diabetes_adults13',
 'med_outpatient_visit_ct_pmpm_t_12-9-6m_b4',
 'atlas_pct_laccess_white15',
 'atlas_slhouse12',
 'rx_branded_pmpm_ct_t_6-3-0m_b4',
 'auth_3mth_home',
 'total_physician_office_net_paid_pmpm_cost_t_9-6-3m_b4',
 'atlas_pct_laccess_black15',
 'atlas_pct_fmrkt_anmlprod16',
 'oontwk_mbr_resp_pmpm_cost_t_6-3-0m_b4',
 'atlas_pct_laccess_nhna15',
 'cons_hxwearbl',
 'rx_mail_mbr_resp_pmpm_cost_0to3m_b4',
 'atlas_percapitainc',
 'med_physician_office_allowed_pmpm_cost_t_9-6-3m_b4',
 'atlas_pct_laccess_pop15',
 'atlas_pct_fmrkt_frveg16',
 'atlas_foodhub16',
 'rx_nonbh_net_paid_pmpm_cost',
 'atlas_pct_fmrkt_credit16',
 'total_physician_office_visit_ct_pmpm_t_6-3-0m_b4',
 'rej_days_since_last_clm',
 'rx_maint_mbr_resp_pmpm_cost_6to9m_b4',
 'atlas_agritrsm_rct12',
 'rx_nonmaint_pmpm_ct',
 'atlas_pc_wic_redemp12',
 'atlas_pct_free_lunch14',
 'rwjf_air_pollute_density',
 'atlas_redemp_snaps16',
 'credit_bal_bankcard_severederog',
 'auth_3mth_dc_snf',
 'atlas_orchard_acrespth12',
 'rx_gpi4_6110_pmpm_ct',
 'credit_bal_autobank',
 'atlas_type_2015_update',
 'rev_cms_ansth_pmpm_ct',
 'mcc_end_pct',
 'atlas_deep_pov_children',
 'rwjf_income_inequ_ratio',
 'ccsp_236_pct',
 'rwjf_social_associate_rate',
 'total_med_allowed_pmpm_cost_9to12m_b4',
 'atlas_pct_laccess_multir15',
 'credit_bal_autofinance_new',
 'atlas_medhhinc',
 'atlas_pct_obese_adults13',
 'total_outpatient_mbr_resp_pmpm_cost_6to9m_b4',
 'total_bh_copay_pmpm_cost_t_9-6-3m_b4',
 'rwjf_resident_seg_black_inx',
 'rx_gpi2_90_dist_gpi6_pmpm_ct_9to12m_b4',
 'atlas_hiamenity',
 'rx_overall_net_paid_pmpm_cost_6to9m_b4',
 'atlas_naturalchangerate1016',
 'atlas_pct_laccess_snap15',
 'rx_days_since_last_script_6to9m_b4',
 'rx_hum_28_pmpm_cost',
 'atlas_pct_loclfarm12',
 'atlas_dirsales_farms12',
 'atlas_freshveg_farms12',
 'atlas_ownhomepct',
 'cons_mobplus',
 'atlas_type_2015_mining_no',
 'auth_3mth_acute_mean_los',
 'atlas_berry_farms12',
 'atlas_pct_laccess_seniors15',
 'atlas_veg_farms12',
 'atlas_hh65plusalonepct',
 'rx_bh_mbr_resp_pmpm_cost_9to12m_b4',
 'atlas_povertyallagespct',
 'atlas_pct_fmrkt_baked16',
 'total_physician_office_copay_pmpm_cost',
 'atlas_pc_snapben15',
 'rx_gpi2_62_pmpm_cost_t_9-6-3m_b4',
 'bh_ip_snf_mbr_resp_pmpm_cost_3to6m_b4',
 'atlas_perpov_1980_0711',
 'rx_gpi2_33_pmpm_ct_0to3m_b4',
 'atlas_veg_acrespth12',
 'rx_branded_mbr_resp_pmpm_cost',
 'atlas_pct_laccess_hisp15',
 'bh_urgent_care_copay_pmpm_cost_t_12-9-6m_b4',
 'atlas_berry_acrespth12',
 'bh_ip_snf_mbr_resp_pmpm_cost_9to12m_b4',
 'atlas_ghveg_sqftpth12',
 'atlas_pct_fmrkt_snap16',
 'auth_3mth_facility',
 'atlas_deep_pov_all',
 'total_physician_office_net_paid_pmpm_cost_9to12m_b4',
 'total_ip_maternity_net_paid_pmpm_cost_t_12-9-6m_b4',
 'med_physician_office_ds_clm_6to9m_b4',
 'atlas_farm_to_school13',
 'bh_ip_snf_net_paid_pmpm_cost_9to12m_b4',
 'bh_physician_office_copay_pmpm_cost_6to9m_b4',
 'rx_gpi2_01_pmpm_cost_0to3m_b4',
 'atlas_persistentchildpoverty_1980_2011',
 'atlas_pct_laccess_child15',
 'atlas_povertyunder18pct',
 'atlas_pct_fmrkt_wiccash16',
 'rx_gpi2_72_pmpm_cost_6to9m_b4',
 'total_outpatient_allowed_pmpm_cost_6to9m_b4',
 'atlas_pct_fmrkt_wic16',
 'atlas_low_education_2015_update',
 'bh_ip_snf_net_paid_pmpm_cost_3to6m_b4',
 'atlas_type_2015_recreation_no',
 'auth_3mth_post_acute_mean_los',
 'bh_ncdm_pct',
 'auth_3mth_post_acute',
 'auth_3mth_acute_ccs_227',
 'auth_3mth_acute_ccs_153',
 'auth_3mth_acute_ccs_172',
 'auth_3mth_acute_ccs_086',
 'auth_3mth_acute_ccs_094',
 'auth_3mth_acute_ccs_154',
 'auth_3mth_post_acute_ner',
 'rx_gpi2_02_pmpm_cost',
 'auth_3mth_acute_ccs_067',
 'auth_3mth_acute_ccs_048',
 'auth_3mth_acute_ccs_044',
 'auth_3mth_acute_ccs_043',
 'auth_3mth_acute_ccs_042',
 'auth_3mth_acute_ccs_030',
 'auth_3mth_acute_can']
target = "covid_vaccination"


# reg_cols = [i for i in reg_cols if i in imp_features]
# cat_cols = [i for i in cat_cols if i in imp_features]

print(len(cat_cols), len(reg_cols))
student_id = 2000728661

id = "ID"
reg_scalar = MinMaxScaler()
reg_f, cat_f = fdf = SelectKBest(f_regression, k=50), SelectKBest(f_classif, k=70)
def scale_df(dataframe, train=True):
    
    if train: dataframe[reg_cols] = reg_scalar.fit_transform(dataframe[reg_cols])
    else: dataframe[reg_cols] = reg_scalar.transform(dataframe[reg_cols])
    return dataframe

def get_reduced_features(dataframe, train=True):
    rcat, rreg = [], []
    if train: 
        rcat = cat_f.fit(dataframe[cat_cols], dataframe[target]).get_support(indices=True)
        rreg = reg_f.fit(dataframe[reg_cols], dataframe[target]).get_support(indices=True)
    else:
        rcat = cat_f.get_support(indices=True)
        rreg = reg_f.get_support(indices=True)
    rcat = [i for idx, i in enumerate(dataframe[cat_cols].columns) if idx in rcat]
    rreg = [i for idx, i in enumerate(dataframe[reg_cols].columns) if idx in rreg]
    return rcat, rreg

def sc_df(dataframe):
    
    for col in dataframe.columns:
        if dataframe[col].dtype == np.int64:
            dataframe[col] = dataframe[col].astype(np.int32)

        if dataframe[col].dtype == np.float64:
            dataframe[col] = dataframe[col].astype(np.float32)
    return dataframe
df = pd.read_csv('dataset/transformed_dataset.csv')

# cat_cols, reg_cols = get_reduced_features(df)


df = df[cat_cols + reg_cols + [target]]
df=scale_df(df)
df = sc_df(df)
df.head()

191 139


Unnamed: 0,bh_ncdm_ind,auth_3mth_post_acute_inf,rx_maint_net_paid_pmpm_cost_t_9-6-3m_b4,ccsp_065_pmpm_ct,auth_3mth_acute_vco,rx_gpi2_72_pmpm_ct_6to9m_b4,auth_3mth_post_acute_men,rej_total_physician_office_visit_ct_pmpm_0to3m_b4,total_physician_office_net_paid_pmpm_cost_t_9-6-3m_b4,bh_ip_snf_net_paid_pmpm_cost_0to3m_b4,mcc_ano_pmpm_ct_t_9-6-3m_b4,atlas_type_2015_update,atlas_retirement_destination_2015_upda,auth_3mth_post_acute_sns,atlas_hiamenity,cons_ltmedicr,auth_3mth_acute_ccs_086,total_physician_office_mbr_resp_pmpm_cost_t_9-6-3m_b4,auth_3mth_acute_cir,atlas_csa12,total_med_net_paid_pmpm_cost_t_6-3-0m_b4,cons_n2pwh,auth_3mth_snf_post_hsp,auth_3mth_post_acute_inj,med_outpatient_mbr_resp_pmpm_cost_t_9-6-3m_b4,rx_gpi2_56_dist_gpi6_pmpm_ct_3to6m_b4,atlas_low_employment_2015_update,auth_3mth_acute_inf,lab_albumin_loinc_pmpm_ct,rx_gpi2_17_pmpm_cost_t_12-9-6m_b4,cons_rxadhs,cons_mobplus,atlas_foodinsec_child_03_11,lang_spoken_cd,bh_ip_snf_mbr_resp_pmpm_cost_9to12m_b4,auth_3mth_post_acute_gus,auth_3mth_acute_cad,rx_maint_pmpm_ct_t_6-3-0m_b4,auth_3mth_acute_ccs_044,cons_hxmioc,med_outpatient_visit_ct_pmpm_t_12-9-6m_b4,med_physician_office_allowed_pmpm_cost_t_9-6-3m_b4,auth_3mth_acute_res,auth_3mth_acute_chf,auth_3mth_acute_ccs_030,auth_3mth_dc_hospice,auth_3mth_acute_neo,atlas_type_2015_recreation_no,hum_region,atlas_ghveg_farms12,rx_maint_net_paid_pmpm_cost_t_12-9-6m_b4,auth_3mth_acute_ccs_048,rx_overall_gpi_pmpm_ct_t_6-3-0m_b4,rx_overall_gpi_pmpm_ct_t_12-9-6m_b4,rx_nonbh_pmpm_ct_t_9-6-3m_b4,mcc_chf_pmpm_ct_t_9-6-3m_b4,auth_3mth_post_acute_chf,auth_3mth_psychic,rx_nonotc_pmpm_cost_t_6-3-0m_b4,auth_3mth_acute_end,atlas_low_education_2015_update,src_div_id,auth_3mth_bh_acute,auth_3mth_acute_ccs_067,atlas_type_2015_mining_no,cons_n2pmr,rx_mail_net_paid_pmpm_cost_t_6-3-0m_b4,rej_med_er_net_paid_pmpm_cost_t_9-6-3m_b4,med_outpatient_deduct_pmpm_cost_t_9-6-3m_b4,rej_med_ip_snf_coins_pmpm_cost_t_9-6-3m_b4,rx_generic_dist_gpi6_pmpm_ct_t_9-6-3m_b4,auth_3mth_dc_home,auth_3mth_acute_bld,auth_3mth_acute_ner,oontwk_mbr_resp_pmpm_cost_t_6-3-0m_b4,rx_gpi2_90_dist_gpi6_pmpm_ct_9to12m_b4,atlas_foodhub16,rx_maint_pmpm_cost_t_6-3-0m_b4,auth_3mth_post_acute_ben,est_age,auth_3mth_post_acute_cer,auth_3mth_acute_ccs_153,auth_3mth_acute_dig,total_ip_maternity_net_paid_pmpm_cost_t_12-9-6m_b4,auth_3mth_post_acute_cad,rx_bh_pmpm_ct_0to3m_b4,rx_nonmail_dist_gpi6_pmpm_ct_t_9-6-3m_b4,atlas_persistentchildpoverty_1980_2011,atlas_slhouse12,atlas_population_loss_2015_update,auth_3mth_acute_ccs_094,auth_3mth_post_acute_ner,auth_3mth_acute_ccs_227,rx_overall_dist_gpi6_pmpm_ct_t_6-3-0m_b4,auth_3mth_acute_trm,auth_3mth_post_acute,auth_3mth_acute_dia,auth_3mth_acute_ccs_043,rx_overall_mbr_resp_pmpm_cost_t_6-3-0m_b4,cms_orig_reas_entitle_cd,auth_3mth_post_acute_end,auth_3mth_acute_can,auth_3mth_acute_ccs_172,auth_3mth_dc_home_health,atlas_hipov_1115,rx_phar_cat_cvs_pmpm_ct_t_9-6-3m_b4,rx_gpi2_62_pmpm_cost_t_9-6-3m_b4,cons_n2phi,auth_3mth_post_acute_hdz,auth_3mth_bh_acute_mean_los,auth_3mth_post_acute_dig,auth_3mth_transplant,rx_mail_mbr_resp_pmpm_cost_t_9-6-3m_b4,auth_3mth_acute_sns,auth_3mth_post_acute_vco,auth_3mth_home,rx_nonbh_net_paid_pmpm_cost_t_6-3-0m_b4,auth_3mth_post_acute_ckd,rx_gpi2_34_dist_gpi6_pmpm_ct,rx_gpi2_33_pmpm_ct_0to3m_b4,auth_3mth_dc_ltac,cons_estinv30_rc,rx_phar_cat_humana_pmpm_ct_t_9-6-3m_b4,auth_3mth_acute_men,auth_3mth_dc_snf,cons_hhcomp,bh_ip_snf_mbr_resp_pmpm_cost_6to9m_b4,auth_3mth_acute_inj,total_physician_office_visit_ct_pmpm_t_6-3-0m_b4,mabh_seg,auth_3mth_post_acute_res,auth_3mth_bh_acute_men,auth_3mth_acute_hdz,hedis_dia_hba1c_ge9,auth_3mth_post_acute_trm,auth_3mth_hospice,rx_gpi2_39_pmpm_cost_t_6-3-0m_b4,atlas_vlfoodsec_13_15,auth_3mth_dc_acute_rehab,rx_generic_pmpm_cost_t_6-3-0m_b4,auth_3mth_acute_ccs_154,cons_rxmaint,total_bh_copay_pmpm_cost_t_9-6-3m_b4,rx_nonmaint_dist_gpi6_pmpm_ct_t_12-9-6m_b4,rej_med_outpatient_visit_ct_pmpm_t_6-3-0m_b4,cons_rxadhm,auth_3mth_acute_mus,rx_nonbh_pmpm_cost_t_9-6-3m_b4,rx_days_since_last_script_0to3m_b4,auth_3mth_post_acute_cir,auth_3mth_post_acute_dia,auth_3mth_post_er,auth_3mth_dc_no_ref,bh_ip_snf_mbr_resp_pmpm_cost_3to6m_b4,auth_3mth_acute,rx_branded_pmpm_ct_t_6-3-0m_b4,atlas_farm_to_school13,auth_3mth_acute_cer,med_ambulance_coins_pmpm_cost_t_9-6-3m_b4,auth_3mth_acute_gus,rx_gpi4_6110_pmpm_ct,cons_hxwearbl,auth_3mth_ltac,auth_3mth_acute_ckd,bh_ip_snf_net_paid_pmpm_cost_6to9m_b4,sex_cd,days_since_last_clm_0to3m_b4,atlas_perpov_1980_0711,auth_3mth_post_acute_mus,auth_3mth_non_er,bh_ncal_ind,auth_3mth_facility,atlas_foodinsec_13_15,auth_3mth_dc_left_ama,race_cd,bh_ip_snf_admit_days_pmpm_t_9-6-3m_b4,auth_3mth_dc_other,cons_stlnindx,auth_3mth_acute_skn,total_allowed_pmpm_cost_t_9-6-3m_b4,auth_3mth_rehab,bh_urgent_care_copay_pmpm_cost_t_12-9-6m_b4,auth_3mth_dc_custodial,auth_3mth_snf_direct,auth_3mth_acute_ccs_042,bh_ip_snf_net_paid_pmpm_cost_9to12m_b4,bh_ip_snf_net_paid_pmpm_cost_3to6m_b4,rx_maint_pmpm_cost_t_12-9-6m_b4,auth_3mth_post_acute_rsk,rev_cms_ansth_pmpm_ct,cons_cwht,atlas_pct_diabetes_adults13,atlas_pct_wic15,total_physician_office_net_paid_pmpm_cost_9to12m_b4,atlas_pct_laccess_hisp15,atlas_pct_fmrkt_frveg16,credit_hh_nonmtgcredit_60dpd,atlas_dirsales_farms12,rx_nonmaint_pmpm_ct,zip_cd,atlas_pct_laccess_white15,credit_hh_bankcard_severederog,atlas_pct_fmrkt_credit16,credit_bal_autofinance_new,rej_days_since_last_clm,rx_generic_pmpm_ct_0to3m_b4,rwjf_social_associate_rate,med_physician_office_ds_clm_6to9m_b4,atlas_totalocchu,atlas_veg_acrespth12,atlas_pct_loclsale12,atlas_pct_fmrkt_anmlprod16,atlas_freshveg_farms12,rwjf_resident_seg_black_inx,atlas_pct_loclfarm12,total_outpatient_mbr_resp_pmpm_cost_6to9m_b4,atlas_berry_acrespth12,rx_maint_pmpm_ct_9to12m_b4,rx_tier_2_pmpm_ct,atlas_agritrsm_rct12,atlas_pct_laccess_snap15,atlas_deep_pov_all,ccsp_227_pct,bh_outpatient_net_paid_pmpm_cost,atlas_veg_farms12,rx_hum_16_pmpm_ct,cms_risk_adjustment_factor_a_amt,atlas_recfac14,total_physician_office_copay_pmpm_cost,atlas_pc_fsrsales12,atlas_pct_fmrkt_baked16,atlas_net_international_migration_rate,rx_maint_mbr_resp_pmpm_cost_6to9m_b4,rx_generic_pmpm_cost_6to9m_b4,rx_gpi2_49_pmpm_cost_0to3m_b4,atlas_pct_sbp15,atlas_pct_laccess_child15,met_obe_diag_pct,atlas_orchard_acrespth12,atlas_pct_laccess_hhnv15,cnt_cp_webstatement_pmpm_ct,atlas_pct_laccess_lowi15,rx_gpi2_02_pmpm_cost,cms_partd_ra_factor_amt,atlas_pct_free_lunch14,rx_tier_2_pmpm_ct_3to6m_b4,cons_chva,atlas_pct_fmrkt_wiccash16,rx_overall_net_paid_pmpm_cost_6to9m_b4,total_med_allowed_pmpm_cost_9to12m_b4,bh_physician_office_copay_pmpm_cost_6to9m_b4,atlas_pct_snap16,atlas_ghveg_sqftpth12,atlas_pc_dirsales12,atlas_pct_reduced_lunch14,ccsp_236_pct,atlas_deep_pov_children,atlas_pct_sfsp15,rwjf_air_pollute_density,rx_generic_pmpm_cost,cms_tot_partd_payment_amt,cons_nwperadult,rx_days_since_last_script,atlas_pct_laccess_nhasian15,rx_nonbh_mbr_resp_pmpm_cost_6to9m_b4,rx_days_since_last_script_6to9m_b4,atlas_pct_obese_adults13,credit_bal_consumerfinance,atlas_pct_fmrkt_wic16,atlas_orchard_farms12,atlas_berry_farms12,atlas_pct_laccess_multir15,rx_bh_mbr_resp_pmpm_cost_9to12m_b4,atlas_pc_wic_redemp12,rwjf_mv_deaths_rate,atlas_povertyunder18pct,rx_gpi2_72_pmpm_cost_6to9m_b4,atlas_pct_fmrkt_snap16,atlas_medhhinc,rx_nonbh_net_paid_pmpm_cost,credit_bal_bankcard_severederog,bh_ip_snf_net_paid_pmpm_cost,atlas_pc_snapben15,rx_nonbh_pmpm_ct_0to3m_b4,rx_overall_mbr_resp_pmpm_cost_0to3m_b4,auth_3mth_post_acute_mean_los,rx_branded_mbr_resp_pmpm_cost,rx_tier_1_pmpm_ct_0to3m_b4,bh_ncdm_pct,atlas_naturalchangerate1016,rx_mail_mbr_resp_pmpm_cost_0to3m_b4,credit_bal_autobank,rx_nonotc_dist_gpi6_pmpm_ct,cons_cgqs,rx_overall_gpi_pmpm_ct_0to3m_b4,credit_hh_bankcardcredit_60dpd,rx_gpi2_01_pmpm_cost_0to3m_b4,cci_dia_m_pmpm_ct,atlas_pct_nslp15,mcc_end_pct,atlas_pct_laccess_black15,credit_bal_mtgcredit_new,credit_hh_1stmtgcredit,cons_chmi,rwjf_income_inequ_ratio,atlas_pct_laccess_pop15,atlas_pc_ffrsales12,atlas_hh65plusalonepct,atlas_pct_fmrkt_sfmnp16,auth_3mth_acute_mean_los,rx_hum_28_pmpm_cost,atlas_pct_laccess_nhna15,atlas_povertyallagespct,rx_nonbh_mbr_resp_pmpm_cost,rx_nonmaint_mbr_resp_pmpm_cost_9to12m_b4,atlas_pct_fmrkt_otherfood16,lab_dist_loinc_pmpm_ct,rx_generic_mbr_resp_pmpm_cost,atlas_pct_laccess_seniors15,atlas_pct_cacfp15,total_outpatient_allowed_pmpm_cost_6to9m_b4,rx_nonmaint_mbr_resp_pmpm_cost,credit_bal_nonmtgcredit_60dpd,atlas_ownhomepct,rx_overall_mbr_resp_pmpm_cost,atlas_redemp_snaps16,atlas_netmigrationrate1016,atlas_percapitainc,phy_em_px_pct,rx_generic_mbr_resp_pmpm_cost_0to3m_b4,covid_vaccination
0,0,0,1,1,1,1,0,1,9,1,5,3,0,0,0,4,0,9,0,1,9,95,1,1,9,1,0,0,1,7,5,0,12,8,1,1,0,0,0,9,9,9,0,0,0,0,0,0,6,5,4,0,0,0,4,9,1,1,0,0,0,2,1,0,0,52,4,0,8,0,4,1,1,1,9,1,0,0,0,58,1,0,1,0,0,1,4,0,0,1,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,9,9,3,0,1,0,0,4,0,0,1,0,0,2,2,1,6,10,0,1,1,1,0,9,15,0,0,0,0,1,0,10,20,1,0,0,2,9,9,8,2,0,0,10,0,1,1,1,1,1,9,1,0,3,0,1,8,1,1,1,1,97,0,0,1,0,1,28,0,1,1,1,9,0,9,0,1,0,0,0,1,1,4,0,1,70,0.539171,0.628826,0.0,0.018989,0.0,0.341425,0.020262,0.009524,0.519617,0.291521,0.28103,0.0,0.18656,1.0,0.051282,0.489751,0.0,0.004907,0.001608,0.001193,0.2,0.006234,0.926925,0.041995,0.0,0.0,0.036697,0.04186,0.0,0.050736,0.095148,0.0,0.0,0.03681,0.072464,0.0,0.00355,0.0,0.24761,0.25,0.048195,0.007535,0.015646,0.006077,0.38546,0.253895,0.0,0.0,0.025031,0.034979,0.167266,0.005958,0.062831,0.468391,0.051724,0.158158,0.0,0.000918,0.0,0.0,0.530817,0.013162,0.044882,0.382581,0.0,0.126154,0.12518,0.519481,0.010847,0.035532,0.106401,0.033403,0.007184,0.022641,0.791971,0.717063,0.155966,0.0,0.0,0.005556,0.078515,0.0,0.079574,0.177085,0.276018,0.0,0.0,0.422416,0.000607,0.070319,0.0,0.100929,0.057143,0.00706,0.0,0.0,0.046154,0.0,0.420848,0.00104,0.379794,0.038462,0.517647,0.048,0.312792,0.0,0.0,0.950616,0.0,0.000826,0.058107,0.469713,0.262745,0.40558,0.268917,0.463103,0.420889,0.0,0.0,0.0,0.000632,0.252212,0.008587,0.0,0.0,0.0,0.025524,0.197572,0.598796,0.0,0.003288,0.159828,0.748533,0.008597,0.184427,0.434299,0.351813,0.0,0.018443,0
1,0,0,8,1,1,1,0,1,9,1,5,0,0,0,0,3,0,9,0,4,9,93,1,1,9,2,0,0,1,7,0,2,5,8,2,1,0,0,0,9,10,9,0,0,0,0,0,0,6,6,9,0,4,9,8,9,1,1,4,0,0,2,1,0,0,56,4,0,8,0,8,1,1,1,9,1,0,4,0,55,1,0,1,1,0,1,9,0,2,0,0,0,1,4,1,1,0,0,4,0,0,1,0,1,0,9,9,1,0,1,0,0,8,0,0,1,4,0,1,2,1,9,8,0,1,1,1,0,9,15,0,0,0,0,1,0,0,4,1,7,0,1,9,9,8,2,0,8,17,0,1,1,1,1,1,9,1,0,3,0,1,9,1,1,1,1,97,0,0,1,0,1,4,0,1,1,1,9,0,9,0,2,0,0,0,2,1,9,0,1,75,0.460829,0.735903,0.0,0.019962,0.6,0.296324,0.13826,0.035714,0.552411,0.123863,0.318221,0.4,0.241037,1.0,0.094017,0.258364,0.0,0.030274,0.000249,0.025965,0.43,0.086035,0.536876,0.128143,0.0,0.000712,0.0,0.083721,0.002029,0.037004,0.17708,0.0,0.0,0.077301,0.108696,0.0,0.033136,0.0,0.319611,0.5,0.086989,0.0,0.0,0.01197,0.479526,0.055536,0.0,0.000174,0.053255,0.037037,0.080074,0.008275,0.04644,0.367219,0.086207,0.254254,0.0,0.0,0.0,0.0,0.381209,0.007147,0.031279,0.239146,0.0,0.027279,0.225185,0.668831,0.014454,0.035434,0.438903,0.022965,0.028149,0.0,0.437956,0.710583,0.152581,0.0,0.005092,0.038889,0.019342,0.0,0.245493,0.181408,0.190045,0.0,0.4,0.414742,0.00024,0.072879,0.0,0.154541,0.104762,0.028131,0.0,0.0,0.123077,0.0,0.461106,0.012312,0.42403,0.096154,0.482353,0.088,0.321685,0.0,0.0,0.869788,0.0,0.01781,0.144749,0.592186,0.270588,0.496216,0.120191,0.503421,0.386122,0.7,0.0,0.0,0.000499,0.294248,0.028989,0.0,0.5,0.0,0.067735,0.090121,0.722516,0.0,0.008956,0.197643,0.677332,0.016252,0.282557,0.436958,0.382757,0.0,0.059102,0
2,0,0,0,1,1,1,0,1,1,1,5,3,0,0,0,0,0,1,0,2,4,26,1,1,1,1,0,0,1,7,9,2,21,8,1,1,0,10,0,3,2,2,0,0,0,0,0,0,14,1,1,0,4,10,8,9,1,1,4,0,0,2,1,0,0,48,8,0,8,0,4,1,1,1,0,1,0,10,0,21,1,0,1,0,0,1,4,0,0,0,0,0,1,4,1,1,0,0,4,1,0,1,0,1,0,9,9,6,0,1,0,0,10,0,0,1,5,0,1,1,1,3,9,0,1,0,1,0,4,15,0,0,0,0,1,0,9,8,1,4,0,7,9,9,8,6,0,8,16,0,1,1,1,1,1,9,1,0,3,0,1,2,1,1,1,1,33,0,0,1,0,1,21,0,1,1,1,7,0,2,0,1,0,0,0,1,1,1,0,11,55,0.603687,0.724047,0.231674,0.027363,0.3,0.54765,0.039333,0.02381,0.290733,0.018605,0.509306,0.0,0.267076,0.371069,0.034188,0.198225,0.614379,0.00716,0.002858,0.0021,0.0,0.017456,0.225003,0.064609,0.055546,0.000665,0.009174,0.0,0.0,0.087549,0.150336,0.163239,0.0,0.01227,0.0,0.072917,0.002367,0.091626,0.318561,0.0,0.078019,0.002765,0.001674,0.0,0.666367,0.16098,0.0,0.001153,0.048139,0.211934,0.148289,0.0,0.08127,0.39605,0.0,0.118118,0.0,2.8e-05,0.113048,0.0,0.679445,0.0,0.058853,0.300746,0.0,0.156648,0.318271,0.584416,0.00917,0.186739,0.003741,0.125261,4e-05,0.0,0.843066,0.75594,0.336455,0.0,0.002425,0.027778,0.048207,0.000314,0.202333,0.209952,0.260935,0.0,0.0,0.459873,0.00012,0.067764,0.0,0.177431,0.038095,0.005768,0.0,0.0,0.030769,0.0,0.472127,0.002052,0.292317,0.025641,0.576471,0.032,0.498123,0.0,0.0,0.767513,0.0,0.039028,0.074965,0.443088,0.298039,0.412459,0.19188,0.610587,0.328914,0.0,0.0,0.0,0.001499,0.227876,0.016262,0.0,0.0,0.040441,0.021612,0.086541,0.339392,0.159723,0.008518,0.305488,0.677281,0.014049,0.285884,0.36241,0.363665,0.130694,0.010014,0
3,0,0,0,1,1,2,0,1,9,1,5,0,0,0,0,8,0,9,0,4,9,38,1,1,9,1,0,0,1,7,4,0,27,3,1,1,0,0,0,6,9,9,0,0,0,0,0,0,14,0,5,0,0,4,0,9,1,1,2,0,0,2,1,0,0,30,11,0,8,0,0,1,1,1,9,1,0,1,0,47,1,0,1,0,0,1,2,0,0,0,0,0,1,10,1,1,0,0,1,0,0,1,0,1,0,9,9,15,0,1,0,0,4,0,0,1,3,0,1,1,1,6,9,0,1,11,1,0,9,15,0,0,0,2,1,0,9,17,1,8,0,2,9,9,8,3,0,0,34,0,1,1,1,1,1,9,0,0,3,0,1,6,1,1,1,1,97,0,0,1,0,1,29,0,1,1,1,9,0,11,0,1,0,0,0,1,1,4,0,1,66,0.529954,0.800654,0.0,0.029984,0.5,0.513857,0.003576,0.0,0.297365,0.128733,0.677128,0.0,0.246034,1.0,0.034188,0.233193,0.0,0.021282,1e-05,0.002796,0.0,0.012469,0.450943,0.050007,0.0,0.0,0.036697,0.04186,0.000725,0.175727,0.155001,0.0,0.0,0.018405,0.036232,0.0,0.021302,0.0,0.30078,0.0,0.054262,0.0111,0.002587,0.0,0.752115,0.129978,0.0,0.010105,0.033237,0.014403,0.195908,0.0,0.070685,0.936948,0.0,0.185185,0.0,7e-05,0.0,0.0,0.721102,0.0,0.001236,0.31064,0.0,0.199747,0.232035,0.694805,0.00215,0.102908,0.625935,0.079332,0.085211,0.012279,0.726277,0.801296,0.179966,0.0,0.00485,0.0,0.121523,0.0,0.296698,0.187989,0.508296,0.00059,0.0,0.309742,4.3e-05,0.088261,0.0,0.31548,0.038095,0.005143,0.0,0.0,0.061538,0.0,0.491433,0.006881,0.226688,0.032051,0.505882,0.032,0.72534,0.0,0.0,0.941477,0.0,0.143545,0.038533,0.341274,0.180392,0.473563,0.289676,0.643376,0.179785,0.0,0.0,0.0,0.002887,0.360619,0.004277,0.0,0.0,0.0,0.012725,0.122723,0.588038,0.0,0.0,0.269741,0.463229,0.003586,0.366044,0.303851,0.300465,0.0,0.012345,0
4,0,0,4,1,1,1,0,1,9,1,5,0,0,0,1,4,0,9,0,28,9,88,1,1,9,1,0,0,1,9,5,2,28,8,1,1,0,1,0,8,11,9,0,0,0,0,0,0,0,17,4,0,10,10,4,9,1,1,0,0,0,2,1,0,0,57,0,0,8,0,0,1,1,1,9,1,0,1,0,50,1,0,1,0,0,1,9,0,5,0,0,0,1,10,1,1,0,0,9,0,0,1,0,1,0,9,9,13,0,1,0,0,9,0,0,1,0,0,2,4,1,9,4,0,1,8,1,0,9,15,0,0,0,0,1,0,1,7,1,0,0,4,9,11,8,3,0,5,30,0,1,1,1,1,1,9,1,0,3,0,1,9,1,1,1,1,97,0,0,1,0,1,17,0,1,1,1,9,0,9,0,1,0,0,0,1,1,4,0,1,64,0.327189,1.0,0.0,0.042068,0.882725,0.176062,0.23242,0.02381,0.936931,0.367026,0.224092,0.666667,0.210083,1.0,0.042735,0.155582,0.0,0.032244,9.5e-05,0.021159,0.162781,0.203242,0.472557,0.223306,0.0,0.0053,0.036697,0.0,0.253225,0.000643,0.1758,0.5,0.0,0.347239,0.108696,0.029206,0.04497,0.0,0.347289,0.792714,0.084408,0.0,0.010135,0.0,0.52955,0.019855,0.0,0.000948,0.004538,0.00823,0.074498,0.0,0.046782,0.362983,0.017241,0.640641,0.0,0.000815,0.0,0.0,0.481544,0.042701,0.042984,0.360748,0.0,0.104165,0.076586,0.564935,0.00616,0.093659,1.0,0.091858,0.088863,0.0,0.99635,0.520518,0.101197,0.3,0.014064,0.177778,0.066152,0.0,0.111658,0.103585,0.248869,0.0,0.3,0.638991,0.001221,0.066094,0.0,0.026028,0.047619,0.0,0.0,0.0,0.061538,0.0,0.389035,0.0,0.365099,0.064103,0.729412,0.04,0.228938,0.0,0.0,0.650264,0.0,0.004147,0.221708,0.589191,0.435294,0.482374,0.052489,0.620526,0.311764,0.0,0.0,0.0,0.000621,0.25885,0.0,0.0,0.1945,0.0,0.0,0.061913,0.609702,0.0,0.0,0.090292,0.456923,0.0,0.249197,0.384314,0.536348,0.5,0.0,0


In [2]:


# X_T, y_T = df[reg_cols + cat_cols], df[target]
# X_T, X_t, y_T, y_t = train_test_split(X, y, test_size=.2, random_state=student_id, shuffle=True, stratify=y)


In [3]:
exp1 = setup(data=df[reg_cols + cat_cols + [target]], target=target, normalize=False, categorical_features=cat_cols, 
      data_split_stratify=True, 
                  ignore_low_variance = True, n_jobs=1,preprocess=False,
                  remove_multicollinearity = True, multicollinearity_threshold = 0.95, numeric_features=reg_cols, silent=True)

Unnamed: 0,Description,Value
0,session_id,4927
1,Target,covid_vaccination
2,Target Type,Binary
3,Label Encoded,
4,Original Data,"(974842, 331)"
5,Missing Values,False
6,Numeric Features,139
7,Categorical Features,191
8,Transformed Train Set,"(682389, 330)"
9,Transformed Test Set,"(292453, 330)"


In [4]:
compare_models(cross_validation=False, sort='AUC', 
               include=['dt', 'rf', 'et', 'ada', 'lightgbm', 'catboost', 'gbc'])

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
catboost,CatBoost Classifier,0.826,0.6808,0.0107,0.4784,0.0209,0.0134,0.0502,431.96
gbc,Gradient Boosting Classifier,0.8262,0.6765,0.0014,0.5512,0.0027,0.0019,0.0208,1532.6
ada,Ada Boost Classifier,0.826,0.6678,0.0023,0.426,0.0046,0.0027,0.0205,734.43
et,Extra Trees Classifier,0.8262,0.6441,0.0003,0.4848,0.0006,0.0004,0.0087,252.53
rf,Random Forest Classifier,0.8262,0.6434,0.0008,0.5125,0.0016,0.0011,0.0148,466.49
dt,Decision Tree Classifier,0.7151,0.5248,0.2331,0.2109,0.2214,0.0476,0.0477,153.45


<catboost.core.CatBoostClassifier at 0x7fa604f85640>

In [5]:
catboost = create_model('catboost', fold=10)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8263,0.6751,0.0114,0.5233,0.0223,0.015,0.0568
1,0.8257,0.6795,0.0102,0.4368,0.0199,0.0121,0.0443
2,0.8259,0.6809,0.0108,0.4621,0.0211,0.0133,0.0486
3,0.826,0.6828,0.01,0.476,0.0196,0.0126,0.0484
4,0.8258,0.6751,0.0089,0.4449,0.0174,0.0106,0.0421
5,0.8261,0.6745,0.0109,0.4905,0.0213,0.0138,0.052
6,0.826,0.6762,0.0094,0.4805,0.0184,0.0118,0.0472
7,0.8261,0.6756,0.0108,0.483,0.0211,0.0136,0.0509
8,0.826,0.6762,0.0102,0.4708,0.02,0.0127,0.0482
9,0.8257,0.6772,0.0108,0.4399,0.0211,0.0128,0.0459


In [None]:
bagged_catboost = ensemble_model(catboost)

IntProgress(value=0, description='Processing: ', max=6)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC


In [None]:
calibrated_cat = calibrate_model(bagged_catboost)

In [None]:
tuned_cat = tune_model(caliberated_cat)