In [1]:
import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,ExtraTreesClassifier,StackingClassifier, HistGradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder, KBinsDiscretizer, RobustScaler
from sklearn.model_selection import KFold,StratifiedKFold, GroupKFold,train_test_split
import gc
import datetime
from tqdm.notebook import tqdm ,tnrange
import warnings
warnings.filterwarnings("ignore")
np.random.seed(0)
pd.set_option('display.max_columns', 500)

In [2]:
## Loading Train and Test data
df_train = pd.read_csv("train_s3TEQDk.csv")
df_test = pd.read_csv("test_mSzZ8RL.csv")
## there are some rows where all values expect id were same including the target (Is_Lead), dropping them
df_train = df_train.drop_duplicates(subset=[col for col in df_train.columns if 'ID' not in col])
## there are some observations where although all features had same values but target was different, so dropped those too
df_train = df_train[~df_train.duplicated(subset = [i for i in list(df_train.columns) if 'Is_Lead' not in i and 'ID' not in i], keep=False)].sort_values(list(df_train.columns)).reset_index(drop=True)
df_total = pd.concat([df_train,df_test],ignore_index=True, sort=False)
print("Shape of datasets: ", df_train.shape, df_test.shape, df_total.shape)

Shape of datasets:  (245700, 11) (105312, 10) (351012, 11)


In [3]:
## filling null values in credit column with a string now, but lets see if we can predict this column using other features later.
## predicting didnot work
df_total['Credit_Product'].fillna("""Missing""", inplace = True)

## one idea that just struck my mind is converting this to a text classification problem,
## just so that we can used large bert architectures. Lets see if I work on that.

## Statement will be: will a 23 year old, financially active, self employed female living in RG282,
## who is associated with the company for 14 years and already has a credit product with 1005068
## bank balance be interested in buying a credit card if contacted through X2 channel?

In [4]:
df_total

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
0,222A8XWS,Male,66,RG280,Other,X2,50,No,819609,No,0.0
1,222HSZEH,Male,49,RG268,Self_Employed,X3,69,Yes,679666,No,0.0
2,222TDSNN,Male,32,RG262,Salaried,X1,32,No,761982,No,0.0
3,224FPNSD,Female,39,RG276,Self_Employed,X2,26,Missing,583519,Yes,1.0
4,224VSEND,Male,29,RG261,Salaried,X1,13,No,736866,Yes,0.0
...,...,...,...,...,...,...,...,...,...,...,...
351007,DBENJOYI,Male,52,RG268,Salaried,X2,86,Yes,4242558,Yes,
351008,CWQ72DWS,Male,55,RG277,Other,X2,86,Yes,1159153,No,
351009,HDESC8GU,Male,35,RG254,Salaried,X4,15,No,1703727,No,
351010,2PW4SFCA,Male,53,RG254,Other,X3,93,No,737178,Yes,


### Feature Engineering

In [5]:
col_discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile')
for col in['Age','Vintage','Avg_Account_Balance']:
    df_total[f'{col}_Bins'] =col_discretizer.fit_transform(df_total[f'{col}'].values.reshape(-1,1)).astype(int)

In [6]:
for col1 in ['Gender','Region_Code','Occupation','Channel_Code','Is_Active','Age_Bins','Vintage_Bins','Avg_Account_Balance_Bins']:
    df_total[f'Credit_Product_' + f'{col1}' ] = df_total[f'Credit_Product'].astype(str)+'_'+df_total[f'{col1}'].astype(str)

for col1 in ['Gender','Region_Code','Channel_Code','Is_Active','Age_Bins','Vintage_Bins','Avg_Account_Balance_Bins']:
    df_total[f'Occupation_' + f'{col1}' ] = df_total[f'Occupation'].astype(str)+'_'+df_total[f'{col1}'].astype(str)
    
for col1 in ['Gender','Region_Code','Is_Active','Age_Bins','Vintage_Bins','Avg_Account_Balance_Bins']:
    df_total[f'Channel_Code_' + f'{col1}' ] = df_total[f'Channel_Code'].astype(str)+'_'+df_total[f'{col1}'].astype(str)
    
for col1 in ['Gender','Region_Code','Age_Bins','Vintage_Bins','Avg_Account_Balance_Bins']:
    df_total[f'Is_Active_' + f'{col1}' ] = df_total[f'Is_Active'].astype(str)+'_'+df_total[f'{col1}'].astype(str)
    
for col1 in ['Gender','Age_Bins','Vintage_Bins','Avg_Account_Balance_Bins']:
    df_total[f'Region_Code_' + f'{col1}' ] = df_total[f'Region_Code'].astype(str)+'_'+df_total[f'{col1}'].astype(str)
    
for col1 in ['Gender','Region_Code','Channel_Code','Is_Active','Age_Bins','Vintage_Bins','Avg_Account_Balance_Bins']:
    df_total[f'Credit_Product_' + f'Occupation_' + f'{col1}' ] = df_total[f'Credit_Product'].astype(str)+'_'+df_total[f'Occupation'].astype(str)+"_"+df_total[f'{col1}'].astype(str)

In [7]:
freq_encoding_features = [col for col in df_total.columns if 'ID' not in col and 'Is_Lead' not in col]
print(freq_encoding_features,'\n')

categorical_agg_features = [col for col in df_total.columns if df_total[col].dtype=='O' and 'ID' not in col]
print(categorical_agg_features)

['Gender', 'Age', 'Region_Code', 'Occupation', 'Channel_Code', 'Vintage', 'Credit_Product', 'Avg_Account_Balance', 'Is_Active', 'Age_Bins', 'Vintage_Bins', 'Avg_Account_Balance_Bins', 'Credit_Product_Gender', 'Credit_Product_Region_Code', 'Credit_Product_Occupation', 'Credit_Product_Channel_Code', 'Credit_Product_Is_Active', 'Credit_Product_Age_Bins', 'Credit_Product_Vintage_Bins', 'Credit_Product_Avg_Account_Balance_Bins', 'Occupation_Gender', 'Occupation_Region_Code', 'Occupation_Channel_Code', 'Occupation_Is_Active', 'Occupation_Age_Bins', 'Occupation_Vintage_Bins', 'Occupation_Avg_Account_Balance_Bins', 'Channel_Code_Gender', 'Channel_Code_Region_Code', 'Channel_Code_Is_Active', 'Channel_Code_Age_Bins', 'Channel_Code_Vintage_Bins', 'Channel_Code_Avg_Account_Balance_Bins', 'Is_Active_Gender', 'Is_Active_Region_Code', 'Is_Active_Age_Bins', 'Is_Active_Vintage_Bins', 'Is_Active_Avg_Account_Balance_Bins', 'Region_Code_Gender', 'Region_Code_Age_Bins', 'Region_Code_Vintage_Bins', 'Region_

In [8]:
for col in freq_encoding_features:
    val_counts = df_total[col].value_counts().to_dict()
    df_total[str(col)+"_counts"] = df_total[col].map(val_counts)

In [9]:
for col1 in categorical_agg_features:
    for col2 in categorical_agg_features:
        if col1!=col2:
            df_total['unique_of_'+str(col2)+'_per_'+str(col1)] = df_total.groupby([col1])[col2].transform('nunique')
            
for col1 in categorical_agg_features:
    for col2 in ['Age','Vintage','Avg_Account_Balance']:
        df_total['mean_of_'+str(col2)+'_per_'+str(col1)] = df_total.groupby([col1])[col2].transform('mean')
        df_total['sum_of_'+str(col2)+'_per_'+str(col1)] = df_total.groupby([col1])[col2].transform('sum')
        df_total['std_of_'+str(col2)+'_per_'+str(col1)] = df_total.groupby([col1])[col2].transform('std').fillna(-1)

In [10]:
df_total.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Age_Bins,Vintage_Bins,Avg_Account_Balance_Bins,Credit_Product_Gender,Credit_Product_Region_Code,Credit_Product_Occupation,Credit_Product_Channel_Code,Credit_Product_Is_Active,Credit_Product_Age_Bins,Credit_Product_Vintage_Bins,Credit_Product_Avg_Account_Balance_Bins,Occupation_Gender,Occupation_Region_Code,Occupation_Channel_Code,Occupation_Is_Active,Occupation_Age_Bins,Occupation_Vintage_Bins,Occupation_Avg_Account_Balance_Bins,Channel_Code_Gender,Channel_Code_Region_Code,Channel_Code_Is_Active,Channel_Code_Age_Bins,Channel_Code_Vintage_Bins,Channel_Code_Avg_Account_Balance_Bins,Is_Active_Gender,Is_Active_Region_Code,Is_Active_Age_Bins,Is_Active_Vintage_Bins,Is_Active_Avg_Account_Balance_Bins,Region_Code_Gender,Region_Code_Age_Bins,Region_Code_Vintage_Bins,Region_Code_Avg_Account_Balance_Bins,Credit_Product_Occupation_Gender,Credit_Product_Occupation_Region_Code,Credit_Product_Occupation_Channel_Code,Credit_Product_Occupation_Is_Active,Credit_Product_Occupation_Age_Bins,Credit_Product_Occupation_Vintage_Bins,Credit_Product_Occupation_Avg_Account_Balance_Bins,Gender_counts,Age_counts,Region_Code_counts,Occupation_counts,Channel_Code_counts,Vintage_counts,Credit_Product_counts,Avg_Account_Balance_counts,Is_Active_counts,Age_Bins_counts,Vintage_Bins_counts,Avg_Account_Balance_Bins_counts,Credit_Product_Gender_counts,Credit_Product_Region_Code_counts,Credit_Product_Occupation_counts,Credit_Product_Channel_Code_counts,Credit_Product_Is_Active_counts,Credit_Product_Age_Bins_counts,Credit_Product_Vintage_Bins_counts,Credit_Product_Avg_Account_Balance_Bins_counts,Occupation_Gender_counts,Occupation_Region_Code_counts,Occupation_Channel_Code_counts,Occupation_Is_Active_counts,Occupation_Age_Bins_counts,Occupation_Vintage_Bins_counts,Occupation_Avg_Account_Balance_Bins_counts,Channel_Code_Gender_counts,Channel_Code_Region_Code_counts,Channel_Code_Is_Active_counts,Channel_Code_Age_Bins_counts,Channel_Code_Vintage_Bins_counts,Channel_Code_Avg_Account_Balance_Bins_counts,Is_Active_Gender_counts,Is_Active_Region_Code_counts,Is_Active_Age_Bins_counts,Is_Active_Vintage_Bins_counts,Is_Active_Avg_Account_Balance_Bins_counts,Region_Code_Gender_counts,Region_Code_Age_Bins_counts,Region_Code_Vintage_Bins_counts,Region_Code_Avg_Account_Balance_Bins_counts,Credit_Product_Occupation_Gender_counts,Credit_Product_Occupation_Region_Code_counts,Credit_Product_Occupation_Channel_Code_counts,Credit_Product_Occupation_Is_Active_counts,Credit_Product_Occupation_Age_Bins_counts,Credit_Product_Occupation_Vintage_Bins_counts,Credit_Product_Occupation_Avg_Account_Balance_Bins_counts,unique_of_Region_Code_per_Gender,unique_of_Occupation_per_Gender,unique_of_Channel_Code_per_Gender,unique_of_Credit_Product_per_Gender,unique_of_Is_Active_per_Gender,unique_of_Credit_Product_Gender_per_Gender,unique_of_Credit_Product_Region_Code_per_Gender,unique_of_Credit_Product_Occupation_per_Gender,unique_of_Credit_Product_Channel_Code_per_Gender,unique_of_Credit_Product_Is_Active_per_Gender,unique_of_Credit_Product_Age_Bins_per_Gender,unique_of_Credit_Product_Vintage_Bins_per_Gender,unique_of_Credit_Product_Avg_Account_Balance_Bins_per_Gender,unique_of_Occupation_Gender_per_Gender,unique_of_Occupation_Region_Code_per_Gender,unique_of_Occupation_Channel_Code_per_Gender,unique_of_Occupation_Is_Active_per_Gender,unique_of_Occupation_Age_Bins_per_Gender,unique_of_Occupation_Vintage_Bins_per_Gender,unique_of_Occupation_Avg_Account_Balance_Bins_per_Gender,unique_of_Channel_Code_Gender_per_Gender,unique_of_Channel_Code_Region_Code_per_Gender,unique_of_Channel_Code_Is_Active_per_Gender,unique_of_Channel_Code_Age_Bins_per_Gender,unique_of_Channel_Code_Vintage_Bins_per_Gender,unique_of_Channel_Code_Avg_Account_Balance_Bins_per_Gender,unique_of_Is_Active_Gender_per_Gender,unique_of_Is_Active_Region_Code_per_Gender,unique_of_Is_Active_Age_Bins_per_Gender,unique_of_Is_Active_Vintage_Bins_per_Gender,unique_of_Is_Active_Avg_Account_Balance_Bins_per_Gender,unique_of_Region_Code_Gender_per_Gender,unique_of_Region_Code_Age_Bins_per_Gender,unique_of_Region_Code_Vintage_Bins_per_Gender,unique_of_Region_Code_Avg_Account_Balance_Bins_per_Gender,unique_of_Credit_Product_Occupation_Gender_per_Gender,unique_of_Credit_Product_Occupation_Region_Code_per_Gender,unique_of_Credit_Product_Occupation_Channel_Code_per_Gender,unique_of_Credit_Product_Occupation_Is_Active_per_Gender,unique_of_Credit_Product_Occupation_Age_Bins_per_Gender,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Gender,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Gender,unique_of_Gender_per_Region_Code,unique_of_Occupation_per_Region_Code,unique_of_Channel_Code_per_Region_Code,unique_of_Credit_Product_per_Region_Code,unique_of_Is_Active_per_Region_Code,unique_of_Credit_Product_Gender_per_Region_Code,unique_of_Credit_Product_Region_Code_per_Region_Code,unique_of_Credit_Product_Occupation_per_Region_Code,unique_of_Credit_Product_Channel_Code_per_Region_Code,unique_of_Credit_Product_Is_Active_per_Region_Code,unique_of_Credit_Product_Age_Bins_per_Region_Code,unique_of_Credit_Product_Vintage_Bins_per_Region_Code,unique_of_Credit_Product_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Occupation_Gender_per_Region_Code,unique_of_Occupation_Region_Code_per_Region_Code,unique_of_Occupation_Channel_Code_per_Region_Code,unique_of_Occupation_Is_Active_per_Region_Code,unique_of_Occupation_Age_Bins_per_Region_Code,unique_of_Occupation_Vintage_Bins_per_Region_Code,unique_of_Occupation_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Channel_Code_Gender_per_Region_Code,unique_of_Channel_Code_Region_Code_per_Region_Code,unique_of_Channel_Code_Is_Active_per_Region_Code,unique_of_Channel_Code_Age_Bins_per_Region_Code,unique_of_Channel_Code_Vintage_Bins_per_Region_Code,unique_of_Channel_Code_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Is_Active_Gender_per_Region_Code,unique_of_Is_Active_Region_Code_per_Region_Code,unique_of_Is_Active_Age_Bins_per_Region_Code,unique_of_Is_Active_Vintage_Bins_per_Region_Code,unique_of_Is_Active_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Region_Code_Gender_per_Region_Code,unique_of_Region_Code_Age_Bins_per_Region_Code,unique_of_Region_Code_Vintage_Bins_per_Region_Code,unique_of_Region_Code_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Credit_Product_Occupation_Gender_per_Region_Code,unique_of_Credit_Product_Occupation_Region_Code_per_Region_Code,unique_of_Credit_Product_Occupation_Channel_Code_per_Region_Code,unique_of_Credit_Product_Occupation_Is_Active_per_Region_Code,unique_of_Credit_Product_Occupation_Age_Bins_per_Region_Code,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Region_Code,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Gender_per_Occupation,unique_of_Region_Code_per_Occupation,unique_of_Channel_Code_per_Occupation,unique_of_Credit_Product_per_Occupation,unique_of_Is_Active_per_Occupation,unique_of_Credit_Product_Gender_per_Occupation,unique_of_Credit_Product_Region_Code_per_Occupation,unique_of_Credit_Product_Occupation_per_Occupation,unique_of_Credit_Product_Channel_Code_per_Occupation,unique_of_Credit_Product_Is_Active_per_Occupation,unique_of_Credit_Product_Age_Bins_per_Occupation,unique_of_Credit_Product_Vintage_Bins_per_Occupation,unique_of_Credit_Product_Avg_Account_Balance_Bins_per_Occupation,unique_of_Occupation_Gender_per_Occupation,unique_of_Occupation_Region_Code_per_Occupation,unique_of_Occupation_Channel_Code_per_Occupation,unique_of_Occupation_Is_Active_per_Occupation,unique_of_Occupation_Age_Bins_per_Occupation,unique_of_Occupation_Vintage_Bins_per_Occupation,unique_of_Occupation_Avg_Account_Balance_Bins_per_Occupation,unique_of_Channel_Code_Gender_per_Occupation,unique_of_Channel_Code_Region_Code_per_Occupation,unique_of_Channel_Code_Is_Active_per_Occupation,unique_of_Channel_Code_Age_Bins_per_Occupation,unique_of_Channel_Code_Vintage_Bins_per_Occupation,unique_of_Channel_Code_Avg_Account_Balance_Bins_per_Occupation,unique_of_Is_Active_Gender_per_Occupation,unique_of_Is_Active_Region_Code_per_Occupation,unique_of_Is_Active_Age_Bins_per_Occupation,unique_of_Is_Active_Vintage_Bins_per_Occupation,unique_of_Is_Active_Avg_Account_Balance_Bins_per_Occupation,unique_of_Region_Code_Gender_per_Occupation,unique_of_Region_Code_Age_Bins_per_Occupation,unique_of_Region_Code_Vintage_Bins_per_Occupation,unique_of_Region_Code_Avg_Account_Balance_Bins_per_Occupation,unique_of_Credit_Product_Occupation_Gender_per_Occupation,unique_of_Credit_Product_Occupation_Region_Code_per_Occupation,unique_of_Credit_Product_Occupation_Channel_Code_per_Occupation,unique_of_Credit_Product_Occupation_Is_Active_per_Occupation,unique_of_Credit_Product_Occupation_Age_Bins_per_Occupation,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Occupation,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Occupation,unique_of_Gender_per_Channel_Code,unique_of_Region_Code_per_Channel_Code,unique_of_Occupation_per_Channel_Code,unique_of_Credit_Product_per_Channel_Code,unique_of_Is_Active_per_Channel_Code,unique_of_Credit_Product_Gender_per_Channel_Code,unique_of_Credit_Product_Region_Code_per_Channel_Code,unique_of_Credit_Product_Occupation_per_Channel_Code,unique_of_Credit_Product_Channel_Code_per_Channel_Code,unique_of_Credit_Product_Is_Active_per_Channel_Code,unique_of_Credit_Product_Age_Bins_per_Channel_Code,unique_of_Credit_Product_Vintage_Bins_per_Channel_Code,unique_of_Credit_Product_Avg_Account_Balance_Bins_per_Channel_Code,unique_of_Occupation_Gender_per_Channel_Code,unique_of_Occupation_Region_Code_per_Channel_Code,unique_of_Occupation_Channel_Code_per_Channel_Code,unique_of_Occupation_Is_Active_per_Channel_Code,unique_of_Occupation_Age_Bins_per_Channel_Code,unique_of_Occupation_Vintage_Bins_per_Channel_Code,unique_of_Occupation_Avg_Account_Balance_Bins_per_Channel_Code,unique_of_Channel_Code_Gender_per_Channel_Code,unique_of_Channel_Code_Region_Code_per_Channel_Code,unique_of_Channel_Code_Is_Active_per_Channel_Code,unique_of_Channel_Code_Age_Bins_per_Channel_Code,...,std_of_Age_per_Occupation_Region_Code,mean_of_Vintage_per_Occupation_Region_Code,sum_of_Vintage_per_Occupation_Region_Code,std_of_Vintage_per_Occupation_Region_Code,mean_of_Avg_Account_Balance_per_Occupation_Region_Code,sum_of_Avg_Account_Balance_per_Occupation_Region_Code,std_of_Avg_Account_Balance_per_Occupation_Region_Code,mean_of_Age_per_Occupation_Channel_Code,sum_of_Age_per_Occupation_Channel_Code,std_of_Age_per_Occupation_Channel_Code,mean_of_Vintage_per_Occupation_Channel_Code,sum_of_Vintage_per_Occupation_Channel_Code,std_of_Vintage_per_Occupation_Channel_Code,mean_of_Avg_Account_Balance_per_Occupation_Channel_Code,sum_of_Avg_Account_Balance_per_Occupation_Channel_Code,std_of_Avg_Account_Balance_per_Occupation_Channel_Code,mean_of_Age_per_Occupation_Is_Active,sum_of_Age_per_Occupation_Is_Active,std_of_Age_per_Occupation_Is_Active,mean_of_Vintage_per_Occupation_Is_Active,sum_of_Vintage_per_Occupation_Is_Active,std_of_Vintage_per_Occupation_Is_Active,mean_of_Avg_Account_Balance_per_Occupation_Is_Active,sum_of_Avg_Account_Balance_per_Occupation_Is_Active,std_of_Avg_Account_Balance_per_Occupation_Is_Active,mean_of_Age_per_Occupation_Age_Bins,sum_of_Age_per_Occupation_Age_Bins,std_of_Age_per_Occupation_Age_Bins,mean_of_Vintage_per_Occupation_Age_Bins,sum_of_Vintage_per_Occupation_Age_Bins,std_of_Vintage_per_Occupation_Age_Bins,mean_of_Avg_Account_Balance_per_Occupation_Age_Bins,sum_of_Avg_Account_Balance_per_Occupation_Age_Bins,std_of_Avg_Account_Balance_per_Occupation_Age_Bins,mean_of_Age_per_Occupation_Vintage_Bins,sum_of_Age_per_Occupation_Vintage_Bins,std_of_Age_per_Occupation_Vintage_Bins,mean_of_Vintage_per_Occupation_Vintage_Bins,sum_of_Vintage_per_Occupation_Vintage_Bins,std_of_Vintage_per_Occupation_Vintage_Bins,mean_of_Avg_Account_Balance_per_Occupation_Vintage_Bins,sum_of_Avg_Account_Balance_per_Occupation_Vintage_Bins,std_of_Avg_Account_Balance_per_Occupation_Vintage_Bins,mean_of_Age_per_Occupation_Avg_Account_Balance_Bins,sum_of_Age_per_Occupation_Avg_Account_Balance_Bins,std_of_Age_per_Occupation_Avg_Account_Balance_Bins,mean_of_Vintage_per_Occupation_Avg_Account_Balance_Bins,sum_of_Vintage_per_Occupation_Avg_Account_Balance_Bins,std_of_Vintage_per_Occupation_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Occupation_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Occupation_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Occupation_Avg_Account_Balance_Bins,mean_of_Age_per_Channel_Code_Gender,sum_of_Age_per_Channel_Code_Gender,std_of_Age_per_Channel_Code_Gender,mean_of_Vintage_per_Channel_Code_Gender,sum_of_Vintage_per_Channel_Code_Gender,std_of_Vintage_per_Channel_Code_Gender,mean_of_Avg_Account_Balance_per_Channel_Code_Gender,sum_of_Avg_Account_Balance_per_Channel_Code_Gender,std_of_Avg_Account_Balance_per_Channel_Code_Gender,mean_of_Age_per_Channel_Code_Region_Code,sum_of_Age_per_Channel_Code_Region_Code,std_of_Age_per_Channel_Code_Region_Code,mean_of_Vintage_per_Channel_Code_Region_Code,sum_of_Vintage_per_Channel_Code_Region_Code,std_of_Vintage_per_Channel_Code_Region_Code,mean_of_Avg_Account_Balance_per_Channel_Code_Region_Code,sum_of_Avg_Account_Balance_per_Channel_Code_Region_Code,std_of_Avg_Account_Balance_per_Channel_Code_Region_Code,mean_of_Age_per_Channel_Code_Is_Active,sum_of_Age_per_Channel_Code_Is_Active,std_of_Age_per_Channel_Code_Is_Active,mean_of_Vintage_per_Channel_Code_Is_Active,sum_of_Vintage_per_Channel_Code_Is_Active,std_of_Vintage_per_Channel_Code_Is_Active,mean_of_Avg_Account_Balance_per_Channel_Code_Is_Active,sum_of_Avg_Account_Balance_per_Channel_Code_Is_Active,std_of_Avg_Account_Balance_per_Channel_Code_Is_Active,mean_of_Age_per_Channel_Code_Age_Bins,sum_of_Age_per_Channel_Code_Age_Bins,std_of_Age_per_Channel_Code_Age_Bins,mean_of_Vintage_per_Channel_Code_Age_Bins,sum_of_Vintage_per_Channel_Code_Age_Bins,std_of_Vintage_per_Channel_Code_Age_Bins,mean_of_Avg_Account_Balance_per_Channel_Code_Age_Bins,sum_of_Avg_Account_Balance_per_Channel_Code_Age_Bins,std_of_Avg_Account_Balance_per_Channel_Code_Age_Bins,mean_of_Age_per_Channel_Code_Vintage_Bins,sum_of_Age_per_Channel_Code_Vintage_Bins,std_of_Age_per_Channel_Code_Vintage_Bins,mean_of_Vintage_per_Channel_Code_Vintage_Bins,sum_of_Vintage_per_Channel_Code_Vintage_Bins,std_of_Vintage_per_Channel_Code_Vintage_Bins,mean_of_Avg_Account_Balance_per_Channel_Code_Vintage_Bins,sum_of_Avg_Account_Balance_per_Channel_Code_Vintage_Bins,std_of_Avg_Account_Balance_per_Channel_Code_Vintage_Bins,mean_of_Age_per_Channel_Code_Avg_Account_Balance_Bins,sum_of_Age_per_Channel_Code_Avg_Account_Balance_Bins,std_of_Age_per_Channel_Code_Avg_Account_Balance_Bins,mean_of_Vintage_per_Channel_Code_Avg_Account_Balance_Bins,sum_of_Vintage_per_Channel_Code_Avg_Account_Balance_Bins,std_of_Vintage_per_Channel_Code_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Channel_Code_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Channel_Code_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Channel_Code_Avg_Account_Balance_Bins,mean_of_Age_per_Is_Active_Gender,sum_of_Age_per_Is_Active_Gender,std_of_Age_per_Is_Active_Gender,mean_of_Vintage_per_Is_Active_Gender,sum_of_Vintage_per_Is_Active_Gender,std_of_Vintage_per_Is_Active_Gender,mean_of_Avg_Account_Balance_per_Is_Active_Gender,sum_of_Avg_Account_Balance_per_Is_Active_Gender,std_of_Avg_Account_Balance_per_Is_Active_Gender,mean_of_Age_per_Is_Active_Region_Code,sum_of_Age_per_Is_Active_Region_Code,std_of_Age_per_Is_Active_Region_Code,mean_of_Vintage_per_Is_Active_Region_Code,sum_of_Vintage_per_Is_Active_Region_Code,std_of_Vintage_per_Is_Active_Region_Code,mean_of_Avg_Account_Balance_per_Is_Active_Region_Code,sum_of_Avg_Account_Balance_per_Is_Active_Region_Code,std_of_Avg_Account_Balance_per_Is_Active_Region_Code,mean_of_Age_per_Is_Active_Age_Bins,sum_of_Age_per_Is_Active_Age_Bins,std_of_Age_per_Is_Active_Age_Bins,mean_of_Vintage_per_Is_Active_Age_Bins,sum_of_Vintage_per_Is_Active_Age_Bins,std_of_Vintage_per_Is_Active_Age_Bins,mean_of_Avg_Account_Balance_per_Is_Active_Age_Bins,sum_of_Avg_Account_Balance_per_Is_Active_Age_Bins,std_of_Avg_Account_Balance_per_Is_Active_Age_Bins,mean_of_Age_per_Is_Active_Vintage_Bins,sum_of_Age_per_Is_Active_Vintage_Bins,std_of_Age_per_Is_Active_Vintage_Bins,mean_of_Vintage_per_Is_Active_Vintage_Bins,sum_of_Vintage_per_Is_Active_Vintage_Bins,std_of_Vintage_per_Is_Active_Vintage_Bins,mean_of_Avg_Account_Balance_per_Is_Active_Vintage_Bins,sum_of_Avg_Account_Balance_per_Is_Active_Vintage_Bins,std_of_Avg_Account_Balance_per_Is_Active_Vintage_Bins,mean_of_Age_per_Is_Active_Avg_Account_Balance_Bins,sum_of_Age_per_Is_Active_Avg_Account_Balance_Bins,std_of_Age_per_Is_Active_Avg_Account_Balance_Bins,mean_of_Vintage_per_Is_Active_Avg_Account_Balance_Bins,sum_of_Vintage_per_Is_Active_Avg_Account_Balance_Bins,std_of_Vintage_per_Is_Active_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Is_Active_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Is_Active_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Is_Active_Avg_Account_Balance_Bins,mean_of_Age_per_Region_Code_Gender,sum_of_Age_per_Region_Code_Gender,std_of_Age_per_Region_Code_Gender,mean_of_Vintage_per_Region_Code_Gender,sum_of_Vintage_per_Region_Code_Gender,std_of_Vintage_per_Region_Code_Gender,mean_of_Avg_Account_Balance_per_Region_Code_Gender,sum_of_Avg_Account_Balance_per_Region_Code_Gender,std_of_Avg_Account_Balance_per_Region_Code_Gender,mean_of_Age_per_Region_Code_Age_Bins,sum_of_Age_per_Region_Code_Age_Bins,std_of_Age_per_Region_Code_Age_Bins,mean_of_Vintage_per_Region_Code_Age_Bins,sum_of_Vintage_per_Region_Code_Age_Bins,std_of_Vintage_per_Region_Code_Age_Bins,mean_of_Avg_Account_Balance_per_Region_Code_Age_Bins,sum_of_Avg_Account_Balance_per_Region_Code_Age_Bins,std_of_Avg_Account_Balance_per_Region_Code_Age_Bins,mean_of_Age_per_Region_Code_Vintage_Bins,sum_of_Age_per_Region_Code_Vintage_Bins,std_of_Age_per_Region_Code_Vintage_Bins,mean_of_Vintage_per_Region_Code_Vintage_Bins,sum_of_Vintage_per_Region_Code_Vintage_Bins,std_of_Vintage_per_Region_Code_Vintage_Bins,mean_of_Avg_Account_Balance_per_Region_Code_Vintage_Bins,sum_of_Avg_Account_Balance_per_Region_Code_Vintage_Bins,std_of_Avg_Account_Balance_per_Region_Code_Vintage_Bins,mean_of_Age_per_Region_Code_Avg_Account_Balance_Bins,sum_of_Age_per_Region_Code_Avg_Account_Balance_Bins,std_of_Age_per_Region_Code_Avg_Account_Balance_Bins,mean_of_Vintage_per_Region_Code_Avg_Account_Balance_Bins,sum_of_Vintage_per_Region_Code_Avg_Account_Balance_Bins,std_of_Vintage_per_Region_Code_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Region_Code_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Region_Code_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Region_Code_Avg_Account_Balance_Bins,mean_of_Age_per_Credit_Product_Occupation_Gender,sum_of_Age_per_Credit_Product_Occupation_Gender,std_of_Age_per_Credit_Product_Occupation_Gender,mean_of_Vintage_per_Credit_Product_Occupation_Gender,sum_of_Vintage_per_Credit_Product_Occupation_Gender,std_of_Vintage_per_Credit_Product_Occupation_Gender,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Gender,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Gender,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Gender,mean_of_Age_per_Credit_Product_Occupation_Region_Code,sum_of_Age_per_Credit_Product_Occupation_Region_Code,std_of_Age_per_Credit_Product_Occupation_Region_Code,mean_of_Vintage_per_Credit_Product_Occupation_Region_Code,sum_of_Vintage_per_Credit_Product_Occupation_Region_Code,std_of_Vintage_per_Credit_Product_Occupation_Region_Code,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Region_Code,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Region_Code,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Region_Code,mean_of_Age_per_Credit_Product_Occupation_Channel_Code,sum_of_Age_per_Credit_Product_Occupation_Channel_Code,std_of_Age_per_Credit_Product_Occupation_Channel_Code,mean_of_Vintage_per_Credit_Product_Occupation_Channel_Code,sum_of_Vintage_per_Credit_Product_Occupation_Channel_Code,std_of_Vintage_per_Credit_Product_Occupation_Channel_Code,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Channel_Code,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Channel_Code,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Channel_Code,mean_of_Age_per_Credit_Product_Occupation_Is_Active,sum_of_Age_per_Credit_Product_Occupation_Is_Active,std_of_Age_per_Credit_Product_Occupation_Is_Active,mean_of_Vintage_per_Credit_Product_Occupation_Is_Active,sum_of_Vintage_per_Credit_Product_Occupation_Is_Active,std_of_Vintage_per_Credit_Product_Occupation_Is_Active,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Is_Active,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Is_Active,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Is_Active,mean_of_Age_per_Credit_Product_Occupation_Age_Bins,sum_of_Age_per_Credit_Product_Occupation_Age_Bins,std_of_Age_per_Credit_Product_Occupation_Age_Bins,mean_of_Vintage_per_Credit_Product_Occupation_Age_Bins,sum_of_Vintage_per_Credit_Product_Occupation_Age_Bins,std_of_Vintage_per_Credit_Product_Occupation_Age_Bins,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Age_Bins,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Age_Bins,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Age_Bins,mean_of_Age_per_Credit_Product_Occupation_Vintage_Bins,sum_of_Age_per_Credit_Product_Occupation_Vintage_Bins,std_of_Age_per_Credit_Product_Occupation_Vintage_Bins,mean_of_Vintage_per_Credit_Product_Occupation_Vintage_Bins,sum_of_Vintage_per_Credit_Product_Occupation_Vintage_Bins,std_of_Vintage_per_Credit_Product_Occupation_Vintage_Bins,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Vintage_Bins,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Vintage_Bins,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Vintage_Bins,mean_of_Age_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,sum_of_Age_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,std_of_Age_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,mean_of_Vintage_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,sum_of_Vintage_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,std_of_Vintage_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Avg_Account_Balance_Bins
0,222A8XWS,Male,66,RG280,Other,X2,50,No,819609,No,0.0,9,6,4,No_Male,No_RG280,No_Other,No_X2,No_No,No_9,No_6,No_4,Other_Male,Other_RG280,Other_X2,Other_No,Other_9,Other_6,Other_4,X2_Male,X2_RG280,X2_No,X2_9,X2_6,X2_4,No_Male,No_RG280,No_9,No_6,No_4,RG280_Male,RG280_9,RG280_6,RG280_4,No_Other_Male,No_Other_RG280,No_Other_X2,No_Other_No,No_Other_9,No_Other_6,No_Other_4,191887,2496,18304,100302,96897,3807,205947,4,214069,38338,34021,35099,105826,11163,56262,46342,120510,19465,19458,20811,56896,5020,27796,58975,37987,11995,9701,61993,5717,46840,12541,16675,9825,111428,11342,19473,19016,21903,10316,1876,1763,1693,29944,2973,13410,30876,19412,7139,5572,35,4,4,3,2,3,105,11,12,6,30,30,30,4,140,16,8,37,40,40,4,140,8,40,32,40,2,70,20,20,20,35,350,350,350,11,385,44,22,102,110,110,2,4,4,3,2,6,3,11,12,6,30,30,30,8,4,15,8,35,40,40,8,4,8,40,31,40,4,2,20,20,20,2,10,10,10,22,11,41,22,97,110,110,2,35,4,3,2,6,105,3,12,6,30,30,30,2,35,4,2,10,10,10,8,140,8,40,32,40,4,70,20,20,20,70,350,350,350,6,105,12,6,30,30,30,2,35,4,3,2,6,105,11,3,6,30,30,30,8,140,4,8,37,40,40,2,35,2,10,...,18.845949,51.400398,258030,34.132114,829581.5,4164498985,556886.882409,59.612714,1656995,13.932329,58.557598,1627667,27.7327,1203478.0,33451880719,870494.350615,49.934294,2944875,19.148558,48.869945,2882105,32.471279,1160149.0,68419787843,876824.991807,72.568352,2756654,6.126454,71.750046,2725569,30.916791,1367496.0,51947062522,996371.17965,63.610921,763013,12.633312,54.706294,656202,5.107403,1296181.0,15547691530,942743.608881,51.808576,502595,18.576178,51.594062,500514,33.270456,830815.315638,8059739377,36172.883086,50.65759,3140416,11.048622,56.026971,3473280,28.79591,1137846.0,70538462181,836838.307422,50.23054,287168,11.218737,52.183138,298331,28.832378,764313.0,4369577392,488392.4,50.867464,2382632,11.491183,55.196734,2585415,27.580495,1140582.0,53424877753,820634.557019,72.301252,906730,6.079155,63.67985,798609,25.849493,1271066.0,15940433648,919759.825948,52.165157,869854,11.284396,55.253373,921350,5.133824,1160155.0,19345591961,818408.609137,50.25313,493737,11.12306,53.898524,529553,27.871458,831209.160305,8166630000,36234.299869,43.421698,4838393,14.843543,45.627921,5084228,31.396467,1103344.0,122943467666,826303.73124,40.257979,456606,14.396477,37.076353,420520,28.971957,772901.5,8766249087,514057.6,72.449289,1410805,6.120512,69.015971,1343948,30.478878,1358942.0,26462683119,974389.479832,54.038126,1027589,11.773012,54.438105,1035195,5.227152,1219921.0,23198014620,881968.47219,40.627311,889860,14.170129,39.77204,871127,28.256752,830360.108707,18187377461,36443.839273,45.725572,471705,14.607816,48.002617,495195,33.796688,785771.5,8106018419,506009.7,72.103945,135267,6.152495,70.121002,131547,30.229553,920950.7,1727703476,621870.20451,53.917187,95056,11.39747,54.543392,96160,5.008134,794200.1,1400174746,483780.209465,43.788541,74134,15.113211,44.502067,75342,32.582585,828277.23922,1402273366,35787.968631,52.424993,1569814,19.184805,52.858002,1582780,33.280882,1178313.0,35283406631,896517.820262,49.731248,147851,20.010161,46.800538,139138,32.327005,794119.1,2360915981,537657.79704,60.525876,811652,14.126094,57.725727,774102,26.172938,1182623.0,15858971240,867233.073676,44.15857,1363440,18.99772,41.280509,1274577,28.565579,1079271.0,33323559166,839627.608503,72.623326,1409764,6.101108,69.818514,1355317,29.756567,1328817.0,25795000279,975493.990768,64.834711,462855,12.500462,54.916515,392049,4.919957,1276723.0,9114527276,939625.388027,48.70262,271371,19.449017,46.113604,256945,31.215883,830362.991206,4626782587,35818.210685
1,222HSZEH,Male,49,RG268,Self_Employed,X3,69,Yes,679666,No,0.0,6,7,3,Yes_Male,Yes_RG268,Yes_Self_Employed,Yes_X3,Yes_No,Yes_6,Yes_7,Yes_3,Self_Employed_Male,Self_Employed_RG268,Self_Employed_X3,Self_Employed_No,Self_Employed_6,Self_Employed_7,Self_Employed_3,X3_Male,X3_RG268,X3_No,X3_6,X3_7,X3_3,No_Male,No_RG268,No_6,No_7,No_3,RG268_Male,RG268_6,RG268_7,RG268_3,Yes_Self_Employed_Male,Yes_Self_Employed_RG268,Yes_Self_Employed_X3,Yes_Self_Employed_No,Yes_Self_Employed_6,Yes_Self_Employed_7,Yes_Self_Employed_3,191887,9071,51054,144072,97979,3942,103218,3,214069,35633,33822,35105,59910,17657,48110,36757,71725,13015,11256,9940,86072,25206,52824,74009,27620,20248,14599,59887,20496,47840,15432,14095,8688,111428,25667,17414,17376,22336,28486,7161,7114,3630,29176,9078,19533,31125,9723,6727,4737,35,4,4,3,2,3,105,11,12,6,30,30,30,4,140,16,8,37,40,40,4,140,8,40,32,40,2,70,20,20,20,35,350,350,350,11,385,44,22,102,110,110,2,4,4,3,2,6,3,12,12,6,30,30,30,8,4,16,8,36,40,40,8,4,8,40,32,40,4,2,20,20,20,2,10,10,10,23,12,46,24,100,112,112,2,35,4,3,2,6,105,3,12,6,27,30,30,2,35,4,2,9,10,10,8,140,8,36,32,40,4,70,18,20,20,70,315,350,350,6,105,12,6,27,30,30,2,35,4,3,2,6,105,12,3,6,30,30,30,8,140,4,8,34,40,40,2,35,2,10,...,7.615248,63.642109,1604163,32.197924,1400144.0,35292017443,959321.15235,50.086514,2645770,6.684889,73.475466,3881268,32.246086,1223406.0,64625208152,876389.443094,45.698956,3382134,9.254768,52.088341,3855006,31.460214,1103589.0,81675532908,806710.510887,48.476322,1338916,1.108534,64.391021,1778480,30.258272,1150010.0,31763274651,824792.697902,48.976492,991676,6.082518,71.625049,1450264,5.399949,1176064.0,23812936503,840837.506068,46.033975,672050,8.631275,51.697582,754733,31.643414,712527.177409,10402184263,32835.526475,55.341794,3314254,11.794537,76.851003,4602376,32.528489,1261522.0,75548760791,916831.599042,55.253757,1132481,11.486482,80.756391,1655183,31.025344,1513315.0,31016901342,1006678.0,54.063608,2586403,12.739365,70.544712,3374859,33.119302,1253141.0,59950265110,899203.55731,48.557089,749333,1.106467,76.106273,1174472,29.37947,1238197.0,19107853734,871719.374176,54.805747,772487,11.546176,72.125151,1016604,5.489358,1238454.0,17456009843,905922.892177,53.767035,467128,12.19912,70.246547,610302,32.865164,713084.273826,6195276171,32473.974482,43.421698,4838393,14.843543,45.627921,5084228,31.396467,1103344.0,122943467666,826303.73124,48.959715,1256649,14.416602,57.275763,1470097,32.76891,1468418.0,37689886352,1007287.0,48.482428,844273,1.111746,63.421443,1104421,29.935333,1144087.0,19923138614,814298.239413,54.435083,945864,11.400757,71.46921,1241849,5.397775,1224433.0,21275739386,871923.772575,40.015983,893797,13.969594,38.399355,857688,27.455938,711970.714004,15902577868,32620.261953,50.050305,1425733,13.729344,62.055747,1767720,34.178909,1477295.0,42082224334,1025807.0,48.48443,347197,1.102088,71.128474,509351,28.929166,1383352.0,9906183047,915863.719594,53.64366,381621,11.506046,71.949396,511848,5.378566,1426371.0,10147205416,946417.727002,47.795868,173499,13.93337,54.960606,199507,32.871701,713633.37438,2590489149,32851.966517,48.064951,1402343,7.835787,58.382712,1703374,33.871224,1149917.0,33549981677,824151.868117,48.397665,439354,7.218603,64.243225,583200,33.120621,1409597.0,12796321364,923834.603672,50.165515,979883,6.636251,72.917575,1424299,33.778063,1263693.0,24683706075,887735.482805,46.835695,1457761,8.447259,51.203823,1593719,32.233311,1129693.0,35161709309,805981.538993,48.471871,471292,1.111694,64.01913,622458,31.200001,1172228.0,11397569385,803727.177651,49.152817,330651,5.84942,71.65631,482032,5.387751,1204304.0,8101349744,845419.779136,46.816128,221768,8.066013,51.472873,243827,32.615494,712697.867004,3376049796,32930.67226
2,222TDSNN,Male,32,RG262,Salaried,X1,32,No,761982,No,0.0,3,5,3,No_Male,No_RG262,No_Salaried,No_X1,No_No,No_3,No_5,No_3,Salaried_Male,Salaried_RG262,Salaried_X1,Salaried_No,Salaried_3,Salaried_5,Salaried_3,X1_Male,X1_RG262,X1_No,X1_3,X1_5,X1_3,No_Male,No_RG262,No_3,No_5,No_3,RG262_Male,RG262_3,RG262_5,RG262_3,No_Salaried_Male,No_Salaried_RG262,No_Salaried_X1,No_Salaried_No,No_Salaried_3,No_Salaried_5,No_Salaried_3,191887,8806,2588,102895,148184,13599,205947,3,214069,39493,41860,35105,105826,1747,74842,114590,120510,29980,28499,21262,46525,903,90094,78996,24313,14984,10884,65131,1248,115143,31737,23153,15794,111428,1550,30218,27827,22336,1476,279,384,282,32105,720,72010,57381,18786,11593,8084,35,4,4,3,2,3,105,11,12,6,30,30,30,4,140,16,8,37,40,40,4,140,8,40,32,40,2,70,20,20,20,35,350,350,350,11,385,44,22,102,110,110,2,4,4,3,2,6,3,11,12,6,30,30,30,8,4,14,8,32,37,38,8,4,8,39,31,40,4,2,20,20,20,2,10,10,10,21,11,40,22,82,95,100,2,35,4,3,2,6,105,3,12,6,30,30,30,2,35,4,2,10,10,10,8,139,8,40,32,40,4,70,20,20,20,70,349,349,350,6,105,12,6,30,30,30,2,35,4,3,2,6,105,12,3,6,30,30,30,8,116,4,8,35,40,40,2,35,2,10,...,5.351236,25.911406,23398,13.953236,1304622.0,1178073492,930795.995431,29.342642,2643596,3.43107,22.875597,2060954,7.503347,1008459.0,90856115937,791269.446468,30.443136,2404886,5.646091,25.407185,2007066,13.952869,985229.8,77829213452,764194.143981,32.478509,789650,1.369337,25.671616,624154,7.777694,1001793.0,24356591929,787705.493735,31.38134,470218,4.060043,33.005739,494558,1.901315,1031118.0,15450271391,806730.501204,30.558618,332600,5.982421,25.294377,275304,14.672632,711338.779952,7742211281,32419.017392,32.804594,2136596,10.161793,26.006264,1693814,13.705466,1068990.0,69624366375,836214.142864,29.553686,36883,5.694748,24.431891,30491,7.752911,1283687.0,1602041011,898275.4,31.901453,3673229,8.808122,24.888634,2865752,10.789743,983893.1,113288408561,766389.7483,32.414406,1028736,1.345264,25.655071,814215,6.710235,986389.1,31305030138,775821.515104,33.599577,777931,8.561328,33.111778,766637,2.128787,1029588.0,23838052747,807235.614291,31.843865,502942,8.606501,24.639737,389160,11.36069,711531.185767,11237923548,32473.988442,43.421698,4838393,14.843543,45.627921,5084228,31.396467,1103344.0,122943467666,826303.73124,38.02129,58933,13.750997,36.96,57288,26.411839,1227956.0,1903331233,926719.1,32.530247,982999,1.386121,26.095837,788564,8.270369,961114.3,29042950464,750425.257837,38.123585,1060865,11.948423,34.427139,958004,3.598553,1059799.0,29491023793,807333.419652,40.015983,893797,13.969594,38.399355,857688,27.455938,711970.714004,15902577868,32620.261953,43.908537,64809,14.619065,46.885501,69203,32.82068,1201371.0,1773223768,928521.4,32.394265,9038,1.391949,26.910394,7508,8.025759,1123005.0,313318270,809664.320869,38.757812,14883,13.025229,34.786458,13358,3.621985,1190612.0,457195029,834802.847652,41.446809,11688,13.875617,39.705674,11197,28.359824,713558.219858,201223418,32433.885364,29.786762,956304,4.325881,23.753527,762607,10.294966,1050716.0,33733222534,835136.293491,29.036111,20906,3.933922,24.734722,17809,9.690648,1299242.0,935454006,924509.242604,29.281003,2108525,3.020984,22.828357,1643870,6.893988,1007358.0,72539869256,802572.393593,29.63887,1700708,3.461184,23.796779,1365483,8.058724,958228.3,54984097514,755617.062531,32.391515,608507,1.333889,25.628447,481456,6.492552,988883.5,18577165306,787323.728015,30.987837,359242,2.962896,32.787285,380103,1.372027,1017710.0,11798316674,810156.172984,29.604527,239323,3.864856,23.445571,189534,9.192923,711027.850569,5747949144,32448.821638
3,224FPNSD,Female,39,RG276,Self_Employed,X2,26,Missing,583519,Yes,1.0,4,3,2,Missing_Female,Missing_RG276,Missing_Self_Employed,Missing_X2,Missing_Yes,Missing_4,Missing_3,Missing_2,Self_Employed_Female,Self_Employed_RG276,Self_Employed_X2,Self_Employed_Yes,Self_Employed_4,Self_Employed_3,Self_Employed_2,X2_Female,X2_RG276,X2_Yes,X2_4,X2_3,X2_2,Yes_Female,Yes_RG276,Yes_4,Yes_3,Yes_2,RG276_Female,RG276_4,RG276_3,RG276_2,Missing_Self_Employed_Female,Missing_Self_Employed_RG276,Missing_Self_Employed_X2,Missing_Self_Employed_Yes,Missing_Self_Employed_4,Missing_Self_Employed_3,Missing_Self_Employed_2,159125,4765,3949,144072,96897,15701,41847,6,136943,35856,48683,35097,15696,585,21121,17026,20013,5117,3117,3595,58000,1737,62422,70063,23786,12933,14296,34904,1444,50057,16371,8963,9416,56484,1510,15926,13077,12414,1717,447,479,568,8002,287,9610,10489,3209,1504,1954,35,4,4,3,2,3,105,12,12,6,30,30,30,4,139,16,8,37,40,40,4,140,8,40,32,40,2,70,20,20,20,35,350,350,350,12,381,46,24,103,112,112,2,4,4,3,2,6,3,11,12,6,30,30,30,8,4,15,8,34,40,39,8,4,8,40,32,40,4,2,20,20,20,2,10,10,10,22,11,41,22,91,109,107,2,35,4,3,2,6,105,3,12,6,27,30,30,2,35,4,2,9,10,10,8,140,8,36,32,40,4,70,18,20,20,70,315,350,350,6,105,12,6,27,30,30,2,35,4,3,2,6,105,11,3,6,30,30,30,8,140,4,8,37,40,40,2,35,2,10,...,8.642732,53.888313,93604,30.532169,1033328.0,1794890218,710338.991139,47.176172,2944831,7.112087,53.29166,3326572,27.971754,1103596.0,68888651493,813633.30389,47.486962,3327079,7.942158,59.228808,4149748,33.333732,1164185.0,81566274158,867493.868193,39.610906,942185,1.887301,38.004036,903964,23.279947,1047158.0,24907699040,808702.186483,40.977345,529960,9.358459,23.884714,308901,2.202077,996895.3,12892846910,749660.167897,45.468383,650016,8.787713,49.880946,713098,31.301771,603725.029449,8630853021,30721.344274,50.111477,1749091,11.927468,52.017333,1815613,26.889613,1149092.0,40107901180,845068.681851,51.196676,73928,11.985442,53.051247,76606,26.972487,1028065.0,1484525314,690391.6,50.080408,2506875,11.253944,54.007991,2703478,28.736603,1143127.0,57221485608,857400.199502,39.476758,646274,1.970644,38.844725,635927,22.692999,1058855.0,17334511912,830779.997784,45.195805,405090,10.841802,23.817249,213474,2.217796,1017094.0,9116214558,767111.784202,49.043649,461795,11.017579,50.050871,471279,27.777568,604104.074979,5688243970,30910.273325,45.90176,2592715,14.878577,50.394873,2846504,33.592097,1212149.0,68467024328,911012.295089,50.574834,76368,13.985059,55.645033,84024,31.74108,1073100.0,1620381073,747358.8,39.360291,626852,2.009782,38.857089,618838,24.054278,1085890.0,17293890404,846660.408524,38.348474,501483,12.062398,23.725472,310258,2.234878,1113836.0,14565628471,858054.021367,46.26744,574364,13.881262,50.007975,620799,33.06725,604190.113179,7500416065,30828.036513,44.010483,75566,15.317689,42.758299,73416,28.100165,1023649.0,1757604532,681970.2,39.06264,17461,1.973051,36.391499,16267,21.575111,1009290.0,451152488,707287.764606,36.192067,17336,11.384524,23.960334,11477,2.186037,1032015.0,494334970,736126.075174,45.021127,25572,14.642582,46.102113,26186,30.316673,605926.725352,344166380,30947.839117,48.1006,384901,6.890524,63.517996,508271,32.338453,1204783.0,9640672966,837274.636128,48.289199,13859,7.638007,63.543554,18237,31.628371,1023730.0,293810634,711423.695437,47.846722,459807,6.773849,59.953902,576157,30.170392,1118977.0,10753373286,784354.804189,48.926304,513188,6.729236,69.976642,733985,32.617249,1206961.0,12659813187,843294.021476,39.666563,127290,1.875091,39.629791,127172,25.721776,1051485.0,3374216640,773976.497804,44.469415,66882,7.602508,23.939495,36005,2.2089,1020655.0,1535064390,769475.49375,47.549642,92912,7.009674,58.830604,114955,32.558466,605232.279427,1182623874,30233.198293
4,224VSEND,Male,29,RG261,Salaried,X1,13,No,736866,Yes,0.0,2,0,3,No_Male,No_RG261,No_Salaried,No_X1,No_Yes,No_2,No_0,No_3,Salaried_Male,Salaried_RG261,Salaried_X1,Salaried_Yes,Salaried_2,Salaried_0,Salaried_3,X1_Male,X1_RG261,X1_Yes,X1_2,X1_0,X1_3,Yes_Male,Yes_RG261,Yes_2,Yes_0,Yes_3,RG261_Male,RG261_2,RG261_0,RG261_3,No_Salaried_Male,No_Salaried_RG261,No_Salaried_X1,No_Salaried_Yes,No_Salaried_2,No_Salaried_0,No_Salaried_3,191887,18981,10917,102895,148184,17712,205947,2,136943,34664,20456,35105,105826,6819,74842,114590,85437,27852,12726,21262,46525,4776,90094,23899,23343,8706,10884,65131,7141,33041,32171,12197,15794,80459,2724,6547,7624,12769,5498,1650,815,1192,32105,3248,72010,17461,18791,6318,8084,35,4,4,3,2,3,105,11,12,6,30,30,30,4,140,16,8,37,40,40,4,140,8,40,32,40,2,70,20,20,20,35,350,350,350,11,385,44,22,102,110,110,2,4,4,3,2,6,3,11,12,6,30,30,30,8,4,16,8,33,40,40,8,4,8,40,31,40,4,2,20,20,20,2,10,10,10,22,11,42,22,95,108,107,2,35,4,3,2,6,105,3,12,6,30,30,30,2,35,4,2,10,10,10,8,139,8,40,32,40,4,70,20,20,20,70,349,349,350,6,105,12,6,30,30,30,2,35,4,3,2,6,105,12,3,6,30,30,30,8,116,4,8,35,40,40,2,35,2,10,...,4.761197,24.43928,116722,11.462796,983956.9,4699378333,627635.184296,29.342642,2643596,3.43107,22.875597,2060954,7.503347,1008459.0,90856115937,791269.446468,32.277836,771408,8.800048,29.303276,700319,24.053804,1205750.0,28816217668,931235.376272,29.451998,687498,0.497701,24.816733,579297,6.402435,1028834.0,24016065493,832470.542605,28.368711,246978,4.388178,12.733287,110856,1.137159,1003267.0,8734445228,770352.269727,30.558618,332600,5.982421,25.294377,275304,14.672632,711338.779952,7742211281,32419.017392,32.804594,2136596,10.161793,26.006264,1693814,13.705466,1068990.0,69624366375,836214.142864,31.048873,221720,7.687962,23.645848,168855,8.239171,972614.7,6945441814,625461.0,33.864411,1118914,12.088168,26.788989,885135,17.436822,1206001.0,39847474353,935839.376311,29.452395,947513,0.497736,24.851668,799503,6.351584,1020495.0,32830352009,816654.643563,29.152824,355577,7.177453,12.824711,156423,0.929599,996670.4,12156388623,763356.574174,31.843865,502942,8.606501,24.639737,389160,11.36069,711531.185767,11237923548,32473.988442,49.358257,3971316,13.632059,58.990144,4746288,34.961381,1210948.0,97431670133,909707.643905,45.251836,123266,14.672921,47.145007,128423,31.671942,1008546.0,2747280338,643883.7,29.439438,192740,0.496357,23.141897,151510,7.113857,1187280.0,7773124665,953317.508684,37.974816,289520,12.983785,12.100603,92255,1.948902,1060431.0,8084727601,805853.937093,47.045657,600726,13.883884,51.691597,660050,33.422025,712771.49769,9101379254,32459.553387,40.291924,221525,14.052246,39.013459,214496,28.532276,996462.8,5478552572,621190.3,29.441818,48579,0.496754,25.72,42438,6.229521,982973.0,1621905430,622232.353681,33.695706,27462,11.59474,12.620859,10286,1.339187,997472.0,812939715,694305.382815,37.567953,44781,13.186211,33.957215,40477,24.609781,712337.034396,849105745,32086.123089,29.786762,956304,4.325881,23.753527,762607,10.294966,1050716.0,33733222534,835136.293491,29.436576,95610,3.658461,23.757389,77164,8.686775,974369.8,3164753220,627267.05921,29.281003,2108525,3.020984,22.828357,1643870,6.893988,1007358.0,72539869256,802572.393593,29.238589,510535,4.961628,22.015921,384420,12.182611,1182039.0,20639588632,924465.449195,29.454526,553480,0.497941,24.787771,465787,6.323594,1023231.0,19227538399,835485.73612,27.75182,175336,3.034467,12.79867,80862,1.001496,993239.8,6275289280,762668.394193,29.604527,239323,3.864856,23.445571,189534,9.192923,711027.850569,5747949144,32448.821638


In [11]:
remove_non_unique_cols = []
for col in df_total.columns:
    if df_total[col].nunique()<=1:
        remove_non_unique_cols.append(col)
        
print(len(remove_non_unique_cols))

701


In [12]:
df_total.drop(columns=remove_non_unique_cols, axis=1, inplace=True)

In [13]:
df_total.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Age_Bins,Vintage_Bins,Avg_Account_Balance_Bins,Credit_Product_Gender,Credit_Product_Region_Code,Credit_Product_Occupation,Credit_Product_Channel_Code,Credit_Product_Is_Active,Credit_Product_Age_Bins,Credit_Product_Vintage_Bins,Credit_Product_Avg_Account_Balance_Bins,Occupation_Gender,Occupation_Region_Code,Occupation_Channel_Code,Occupation_Is_Active,Occupation_Age_Bins,Occupation_Vintage_Bins,Occupation_Avg_Account_Balance_Bins,Channel_Code_Gender,Channel_Code_Region_Code,Channel_Code_Is_Active,Channel_Code_Age_Bins,Channel_Code_Vintage_Bins,Channel_Code_Avg_Account_Balance_Bins,Is_Active_Gender,Is_Active_Region_Code,Is_Active_Age_Bins,Is_Active_Vintage_Bins,Is_Active_Avg_Account_Balance_Bins,Region_Code_Gender,Region_Code_Age_Bins,Region_Code_Vintage_Bins,Region_Code_Avg_Account_Balance_Bins,Credit_Product_Occupation_Gender,Credit_Product_Occupation_Region_Code,Credit_Product_Occupation_Channel_Code,Credit_Product_Occupation_Is_Active,Credit_Product_Occupation_Age_Bins,Credit_Product_Occupation_Vintage_Bins,Credit_Product_Occupation_Avg_Account_Balance_Bins,Gender_counts,Age_counts,Region_Code_counts,Occupation_counts,Channel_Code_counts,Vintage_counts,Credit_Product_counts,Avg_Account_Balance_counts,Is_Active_counts,Age_Bins_counts,Vintage_Bins_counts,Avg_Account_Balance_Bins_counts,Credit_Product_Gender_counts,Credit_Product_Region_Code_counts,Credit_Product_Occupation_counts,Credit_Product_Channel_Code_counts,Credit_Product_Is_Active_counts,Credit_Product_Age_Bins_counts,Credit_Product_Vintage_Bins_counts,Credit_Product_Avg_Account_Balance_Bins_counts,Occupation_Gender_counts,Occupation_Region_Code_counts,Occupation_Channel_Code_counts,Occupation_Is_Active_counts,Occupation_Age_Bins_counts,Occupation_Vintage_Bins_counts,Occupation_Avg_Account_Balance_Bins_counts,Channel_Code_Gender_counts,Channel_Code_Region_Code_counts,Channel_Code_Is_Active_counts,Channel_Code_Age_Bins_counts,Channel_Code_Vintage_Bins_counts,Channel_Code_Avg_Account_Balance_Bins_counts,Is_Active_Gender_counts,Is_Active_Region_Code_counts,Is_Active_Age_Bins_counts,Is_Active_Vintage_Bins_counts,Is_Active_Avg_Account_Balance_Bins_counts,Region_Code_Gender_counts,Region_Code_Age_Bins_counts,Region_Code_Vintage_Bins_counts,Region_Code_Avg_Account_Balance_Bins_counts,Credit_Product_Occupation_Gender_counts,Credit_Product_Occupation_Region_Code_counts,Credit_Product_Occupation_Channel_Code_counts,Credit_Product_Occupation_Is_Active_counts,Credit_Product_Occupation_Age_Bins_counts,Credit_Product_Occupation_Vintage_Bins_counts,Credit_Product_Occupation_Avg_Account_Balance_Bins_counts,unique_of_Credit_Product_Occupation_per_Gender,unique_of_Occupation_Region_Code_per_Gender,unique_of_Credit_Product_Occupation_Gender_per_Gender,unique_of_Credit_Product_Occupation_Region_Code_per_Gender,unique_of_Credit_Product_Occupation_Channel_Code_per_Gender,unique_of_Credit_Product_Occupation_Is_Active_per_Gender,unique_of_Credit_Product_Occupation_Age_Bins_per_Gender,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Gender,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Gender,unique_of_Credit_Product_Occupation_per_Region_Code,unique_of_Occupation_Gender_per_Region_Code,unique_of_Occupation_Channel_Code_per_Region_Code,unique_of_Occupation_Is_Active_per_Region_Code,unique_of_Occupation_Age_Bins_per_Region_Code,unique_of_Occupation_Vintage_Bins_per_Region_Code,unique_of_Occupation_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Channel_Code_Age_Bins_per_Region_Code,unique_of_Channel_Code_Vintage_Bins_per_Region_Code,unique_of_Channel_Code_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Credit_Product_Occupation_Gender_per_Region_Code,unique_of_Credit_Product_Occupation_Region_Code_per_Region_Code,unique_of_Credit_Product_Occupation_Channel_Code_per_Region_Code,unique_of_Credit_Product_Occupation_Is_Active_per_Region_Code,unique_of_Credit_Product_Occupation_Age_Bins_per_Region_Code,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Region_Code,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Region_Code,unique_of_Credit_Product_Gender_per_Occupation,unique_of_Credit_Product_Region_Code_per_Occupation,unique_of_Credit_Product_Channel_Code_per_Occupation,unique_of_Credit_Product_Age_Bins_per_Occupation,unique_of_Credit_Product_Vintage_Bins_per_Occupation,unique_of_Credit_Product_Avg_Account_Balance_Bins_per_Occupation,unique_of_Occupation_Age_Bins_per_Occupation,unique_of_Channel_Code_Region_Code_per_Occupation,unique_of_Channel_Code_Age_Bins_per_Occupation,unique_of_Is_Active_Region_Code_per_Occupation,unique_of_Is_Active_Age_Bins_per_Occupation,unique_of_Region_Code_Gender_per_Occupation,unique_of_Region_Code_Age_Bins_per_Occupation,unique_of_Region_Code_Vintage_Bins_per_Occupation,unique_of_Region_Code_Avg_Account_Balance_Bins_per_Occupation,unique_of_Credit_Product_Occupation_Gender_per_Occupation,unique_of_Credit_Product_Occupation_Region_Code_per_Occupation,unique_of_Credit_Product_Occupation_Channel_Code_per_Occupation,unique_of_Credit_Product_Occupation_Age_Bins_per_Occupation,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Occupation,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Occupation,unique_of_Credit_Product_Occupation_per_Channel_Code,unique_of_Credit_Product_Vintage_Bins_per_Channel_Code,unique_of_Occupation_Region_Code_per_Channel_Code,unique_of_Occupation_Age_Bins_per_Channel_Code,unique_of_Occupation_Vintage_Bins_per_Channel_Code,unique_of_Channel_Code_Vintage_Bins_per_Channel_Code,unique_of_Is_Active_Vintage_Bins_per_Channel_Code,unique_of_Region_Code_Age_Bins_per_Channel_Code,unique_of_Region_Code_Vintage_Bins_per_Channel_Code,unique_of_Region_Code_Avg_Account_Balance_Bins_per_Channel_Code,unique_of_Credit_Product_Occupation_Gender_per_Channel_Code,unique_of_Credit_Product_Occupation_Region_Code_per_Channel_Code,unique_of_Credit_Product_Occupation_Channel_Code_per_Channel_Code,unique_of_Credit_Product_Occupation_Is_Active_per_Channel_Code,unique_of_Credit_Product_Occupation_Age_Bins_per_Channel_Code,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Channel_Code,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Channel_Code,unique_of_Occupation_Gender_per_Credit_Product,unique_of_Occupation_Region_Code_per_Credit_Product,unique_of_Occupation_Channel_Code_per_Credit_Product,unique_of_Occupation_Age_Bins_per_Credit_Product,unique_of_Occupation_Vintage_Bins_per_Credit_Product,unique_of_Occupation_Avg_Account_Balance_Bins_per_Credit_Product,unique_of_Credit_Product_Occupation_Gender_per_Credit_Product,unique_of_Credit_Product_Occupation_Region_Code_per_Credit_Product,unique_of_Credit_Product_Occupation_Channel_Code_per_Credit_Product,unique_of_Credit_Product_Occupation_Age_Bins_per_Credit_Product,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Credit_Product,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Credit_Product,unique_of_Occupation_Region_Code_per_Is_Active,unique_of_Occupation_Age_Bins_per_Is_Active,unique_of_Credit_Product_Occupation_Region_Code_per_Is_Active,unique_of_Credit_Product_Occupation_Age_Bins_per_Is_Active,unique_of_Occupation_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_per_Credit_Product_Gender,unique_of_Occupation_Gender_per_Credit_Product_Gender,unique_of_Occupation_Region_Code_per_Credit_Product_Gender,unique_of_Occupation_Channel_Code_per_Credit_Product_Gender,unique_of_Occupation_Is_Active_per_Credit_Product_Gender,unique_of_Occupation_Age_Bins_per_Credit_Product_Gender,unique_of_Occupation_Vintage_Bins_per_Credit_Product_Gender,unique_of_Occupation_Avg_Account_Balance_Bins_per_Credit_Product_Gender,unique_of_Channel_Code_Region_Code_per_Credit_Product_Gender,unique_of_Region_Code_Age_Bins_per_Credit_Product_Gender,unique_of_Region_Code_Avg_Account_Balance_Bins_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Gender_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Region_Code_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Channel_Code_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Is_Active_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Age_Bins_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Credit_Product_Gender,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Credit_Product_Gender,unique_of_Occupation_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_per_Credit_Product_Region_Code,unique_of_Occupation_Gender_per_Credit_Product_Region_Code,unique_of_Occupation_Region_Code_per_Credit_Product_Region_Code,unique_of_Occupation_Channel_Code_per_Credit_Product_Region_Code,unique_of_Occupation_Is_Active_per_Credit_Product_Region_Code,unique_of_Occupation_Age_Bins_per_Credit_Product_Region_Code,unique_of_Occupation_Vintage_Bins_per_Credit_Product_Region_Code,unique_of_Occupation_Avg_Account_Balance_Bins_per_Credit_Product_Region_Code,unique_of_Channel_Code_Gender_per_Credit_Product_Region_Code,unique_of_Channel_Code_Is_Active_per_Credit_Product_Region_Code,unique_of_Channel_Code_Age_Bins_per_Credit_Product_Region_Code,unique_of_Channel_Code_Vintage_Bins_per_Credit_Product_Region_Code,unique_of_Channel_Code_Avg_Account_Balance_Bins_per_Credit_Product_Region_Code,unique_of_Is_Active_Age_Bins_per_Credit_Product_Region_Code,unique_of_Is_Active_Vintage_Bins_per_Credit_Product_Region_Code,unique_of_Is_Active_Avg_Account_Balance_Bins_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Gender_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Region_Code_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Channel_Code_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Is_Active_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Age_Bins_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Vintage_Bins_per_Credit_Product_Region_Code,unique_of_Credit_Product_Occupation_Avg_Account_Balance_Bins_per_Credit_Product_Region_Code,unique_of_Gender_per_Credit_Product_Occupation,unique_of_Region_Code_per_Credit_Product_Occupation,unique_of_Channel_Code_per_Credit_Product_Occupation,unique_of_Credit_Product_Gender_per_Credit_Product_Occupation,unique_of_Credit_Product_Region_Code_per_Credit_Product_Occupation,unique_of_Credit_Product_Channel_Code_per_Credit_Product_Occupation,unique_of_Credit_Product_Age_Bins_per_Credit_Product_Occupation,unique_of_Credit_Product_Vintage_Bins_per_Credit_Product_Occupation,unique_of_Credit_Product_Avg_Account_Balance_Bins_per_Credit_Product_Occupation,unique_of_Occupation_Gender_per_Credit_Product_Occupation,unique_of_Occupation_Region_Code_per_Credit_Product_Occupation,unique_of_Occupation_Channel_Code_per_Credit_Product_Occupation,unique_of_Occupation_Age_Bins_per_Credit_Product_Occupation,unique_of_Occupation_Vintage_Bins_per_Credit_Product_Occupation,unique_of_Occupation_Avg_Account_Balance_Bins_per_Credit_Product_Occupation,unique_of_Channel_Code_Gender_per_Credit_Product_Occupation,unique_of_Channel_Code_Region_Code_per_Credit_Product_Occupation,unique_of_Channel_Code_Is_Active_per_Credit_Product_Occupation,unique_of_Channel_Code_Age_Bins_per_Credit_Product_Occupation,unique_of_Channel_Code_Vintage_Bins_per_Credit_Product_Occupation,unique_of_Channel_Code_Avg_Account_Balance_Bins_per_Credit_Product_Occupation,unique_of_Is_Active_Gender_per_Credit_Product_Occupation,unique_of_Is_Active_Region_Code_per_Credit_Product_Occupation,unique_of_Is_Active_Age_Bins_per_Credit_Product_Occupation,unique_of_Is_Active_Vintage_Bins_per_Credit_Product_Occupation,unique_of_Is_Active_Avg_Account_Balance_Bins_per_Credit_Product_Occupation,unique_of_Region_Code_Gender_per_Credit_Product_Occupation,...,std_of_Age_per_Occupation_Region_Code,mean_of_Vintage_per_Occupation_Region_Code,sum_of_Vintage_per_Occupation_Region_Code,std_of_Vintage_per_Occupation_Region_Code,mean_of_Avg_Account_Balance_per_Occupation_Region_Code,sum_of_Avg_Account_Balance_per_Occupation_Region_Code,std_of_Avg_Account_Balance_per_Occupation_Region_Code,mean_of_Age_per_Occupation_Channel_Code,sum_of_Age_per_Occupation_Channel_Code,std_of_Age_per_Occupation_Channel_Code,mean_of_Vintage_per_Occupation_Channel_Code,sum_of_Vintage_per_Occupation_Channel_Code,std_of_Vintage_per_Occupation_Channel_Code,mean_of_Avg_Account_Balance_per_Occupation_Channel_Code,sum_of_Avg_Account_Balance_per_Occupation_Channel_Code,std_of_Avg_Account_Balance_per_Occupation_Channel_Code,mean_of_Age_per_Occupation_Is_Active,sum_of_Age_per_Occupation_Is_Active,std_of_Age_per_Occupation_Is_Active,mean_of_Vintage_per_Occupation_Is_Active,sum_of_Vintage_per_Occupation_Is_Active,std_of_Vintage_per_Occupation_Is_Active,mean_of_Avg_Account_Balance_per_Occupation_Is_Active,sum_of_Avg_Account_Balance_per_Occupation_Is_Active,std_of_Avg_Account_Balance_per_Occupation_Is_Active,mean_of_Age_per_Occupation_Age_Bins,sum_of_Age_per_Occupation_Age_Bins,std_of_Age_per_Occupation_Age_Bins,mean_of_Vintage_per_Occupation_Age_Bins,sum_of_Vintage_per_Occupation_Age_Bins,std_of_Vintage_per_Occupation_Age_Bins,mean_of_Avg_Account_Balance_per_Occupation_Age_Bins,sum_of_Avg_Account_Balance_per_Occupation_Age_Bins,std_of_Avg_Account_Balance_per_Occupation_Age_Bins,mean_of_Age_per_Occupation_Vintage_Bins,sum_of_Age_per_Occupation_Vintage_Bins,std_of_Age_per_Occupation_Vintage_Bins,mean_of_Vintage_per_Occupation_Vintage_Bins,sum_of_Vintage_per_Occupation_Vintage_Bins,std_of_Vintage_per_Occupation_Vintage_Bins,mean_of_Avg_Account_Balance_per_Occupation_Vintage_Bins,sum_of_Avg_Account_Balance_per_Occupation_Vintage_Bins,std_of_Avg_Account_Balance_per_Occupation_Vintage_Bins,mean_of_Age_per_Occupation_Avg_Account_Balance_Bins,sum_of_Age_per_Occupation_Avg_Account_Balance_Bins,std_of_Age_per_Occupation_Avg_Account_Balance_Bins,mean_of_Vintage_per_Occupation_Avg_Account_Balance_Bins,sum_of_Vintage_per_Occupation_Avg_Account_Balance_Bins,std_of_Vintage_per_Occupation_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Occupation_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Occupation_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Occupation_Avg_Account_Balance_Bins,mean_of_Age_per_Channel_Code_Gender,sum_of_Age_per_Channel_Code_Gender,std_of_Age_per_Channel_Code_Gender,mean_of_Vintage_per_Channel_Code_Gender,sum_of_Vintage_per_Channel_Code_Gender,std_of_Vintage_per_Channel_Code_Gender,mean_of_Avg_Account_Balance_per_Channel_Code_Gender,sum_of_Avg_Account_Balance_per_Channel_Code_Gender,std_of_Avg_Account_Balance_per_Channel_Code_Gender,mean_of_Age_per_Channel_Code_Region_Code,sum_of_Age_per_Channel_Code_Region_Code,std_of_Age_per_Channel_Code_Region_Code,mean_of_Vintage_per_Channel_Code_Region_Code,sum_of_Vintage_per_Channel_Code_Region_Code,std_of_Vintage_per_Channel_Code_Region_Code,mean_of_Avg_Account_Balance_per_Channel_Code_Region_Code,sum_of_Avg_Account_Balance_per_Channel_Code_Region_Code,std_of_Avg_Account_Balance_per_Channel_Code_Region_Code,mean_of_Age_per_Channel_Code_Is_Active,sum_of_Age_per_Channel_Code_Is_Active,std_of_Age_per_Channel_Code_Is_Active,mean_of_Vintage_per_Channel_Code_Is_Active,sum_of_Vintage_per_Channel_Code_Is_Active,std_of_Vintage_per_Channel_Code_Is_Active,mean_of_Avg_Account_Balance_per_Channel_Code_Is_Active,sum_of_Avg_Account_Balance_per_Channel_Code_Is_Active,std_of_Avg_Account_Balance_per_Channel_Code_Is_Active,mean_of_Age_per_Channel_Code_Age_Bins,sum_of_Age_per_Channel_Code_Age_Bins,std_of_Age_per_Channel_Code_Age_Bins,mean_of_Vintage_per_Channel_Code_Age_Bins,sum_of_Vintage_per_Channel_Code_Age_Bins,std_of_Vintage_per_Channel_Code_Age_Bins,mean_of_Avg_Account_Balance_per_Channel_Code_Age_Bins,sum_of_Avg_Account_Balance_per_Channel_Code_Age_Bins,std_of_Avg_Account_Balance_per_Channel_Code_Age_Bins,mean_of_Age_per_Channel_Code_Vintage_Bins,sum_of_Age_per_Channel_Code_Vintage_Bins,std_of_Age_per_Channel_Code_Vintage_Bins,mean_of_Vintage_per_Channel_Code_Vintage_Bins,sum_of_Vintage_per_Channel_Code_Vintage_Bins,std_of_Vintage_per_Channel_Code_Vintage_Bins,mean_of_Avg_Account_Balance_per_Channel_Code_Vintage_Bins,sum_of_Avg_Account_Balance_per_Channel_Code_Vintage_Bins,std_of_Avg_Account_Balance_per_Channel_Code_Vintage_Bins,mean_of_Age_per_Channel_Code_Avg_Account_Balance_Bins,sum_of_Age_per_Channel_Code_Avg_Account_Balance_Bins,std_of_Age_per_Channel_Code_Avg_Account_Balance_Bins,mean_of_Vintage_per_Channel_Code_Avg_Account_Balance_Bins,sum_of_Vintage_per_Channel_Code_Avg_Account_Balance_Bins,std_of_Vintage_per_Channel_Code_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Channel_Code_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Channel_Code_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Channel_Code_Avg_Account_Balance_Bins,mean_of_Age_per_Is_Active_Gender,sum_of_Age_per_Is_Active_Gender,std_of_Age_per_Is_Active_Gender,mean_of_Vintage_per_Is_Active_Gender,sum_of_Vintage_per_Is_Active_Gender,std_of_Vintage_per_Is_Active_Gender,mean_of_Avg_Account_Balance_per_Is_Active_Gender,sum_of_Avg_Account_Balance_per_Is_Active_Gender,std_of_Avg_Account_Balance_per_Is_Active_Gender,mean_of_Age_per_Is_Active_Region_Code,sum_of_Age_per_Is_Active_Region_Code,std_of_Age_per_Is_Active_Region_Code,mean_of_Vintage_per_Is_Active_Region_Code,sum_of_Vintage_per_Is_Active_Region_Code,std_of_Vintage_per_Is_Active_Region_Code,mean_of_Avg_Account_Balance_per_Is_Active_Region_Code,sum_of_Avg_Account_Balance_per_Is_Active_Region_Code,std_of_Avg_Account_Balance_per_Is_Active_Region_Code,mean_of_Age_per_Is_Active_Age_Bins,sum_of_Age_per_Is_Active_Age_Bins,std_of_Age_per_Is_Active_Age_Bins,mean_of_Vintage_per_Is_Active_Age_Bins,sum_of_Vintage_per_Is_Active_Age_Bins,std_of_Vintage_per_Is_Active_Age_Bins,mean_of_Avg_Account_Balance_per_Is_Active_Age_Bins,sum_of_Avg_Account_Balance_per_Is_Active_Age_Bins,std_of_Avg_Account_Balance_per_Is_Active_Age_Bins,mean_of_Age_per_Is_Active_Vintage_Bins,sum_of_Age_per_Is_Active_Vintage_Bins,std_of_Age_per_Is_Active_Vintage_Bins,mean_of_Vintage_per_Is_Active_Vintage_Bins,sum_of_Vintage_per_Is_Active_Vintage_Bins,std_of_Vintage_per_Is_Active_Vintage_Bins,mean_of_Avg_Account_Balance_per_Is_Active_Vintage_Bins,sum_of_Avg_Account_Balance_per_Is_Active_Vintage_Bins,std_of_Avg_Account_Balance_per_Is_Active_Vintage_Bins,mean_of_Age_per_Is_Active_Avg_Account_Balance_Bins,sum_of_Age_per_Is_Active_Avg_Account_Balance_Bins,std_of_Age_per_Is_Active_Avg_Account_Balance_Bins,mean_of_Vintage_per_Is_Active_Avg_Account_Balance_Bins,sum_of_Vintage_per_Is_Active_Avg_Account_Balance_Bins,std_of_Vintage_per_Is_Active_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Is_Active_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Is_Active_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Is_Active_Avg_Account_Balance_Bins,mean_of_Age_per_Region_Code_Gender,sum_of_Age_per_Region_Code_Gender,std_of_Age_per_Region_Code_Gender,mean_of_Vintage_per_Region_Code_Gender,sum_of_Vintage_per_Region_Code_Gender,std_of_Vintage_per_Region_Code_Gender,mean_of_Avg_Account_Balance_per_Region_Code_Gender,sum_of_Avg_Account_Balance_per_Region_Code_Gender,std_of_Avg_Account_Balance_per_Region_Code_Gender,mean_of_Age_per_Region_Code_Age_Bins,sum_of_Age_per_Region_Code_Age_Bins,std_of_Age_per_Region_Code_Age_Bins,mean_of_Vintage_per_Region_Code_Age_Bins,sum_of_Vintage_per_Region_Code_Age_Bins,std_of_Vintage_per_Region_Code_Age_Bins,mean_of_Avg_Account_Balance_per_Region_Code_Age_Bins,sum_of_Avg_Account_Balance_per_Region_Code_Age_Bins,std_of_Avg_Account_Balance_per_Region_Code_Age_Bins,mean_of_Age_per_Region_Code_Vintage_Bins,sum_of_Age_per_Region_Code_Vintage_Bins,std_of_Age_per_Region_Code_Vintage_Bins,mean_of_Vintage_per_Region_Code_Vintage_Bins,sum_of_Vintage_per_Region_Code_Vintage_Bins,std_of_Vintage_per_Region_Code_Vintage_Bins,mean_of_Avg_Account_Balance_per_Region_Code_Vintage_Bins,sum_of_Avg_Account_Balance_per_Region_Code_Vintage_Bins,std_of_Avg_Account_Balance_per_Region_Code_Vintage_Bins,mean_of_Age_per_Region_Code_Avg_Account_Balance_Bins,sum_of_Age_per_Region_Code_Avg_Account_Balance_Bins,std_of_Age_per_Region_Code_Avg_Account_Balance_Bins,mean_of_Vintage_per_Region_Code_Avg_Account_Balance_Bins,sum_of_Vintage_per_Region_Code_Avg_Account_Balance_Bins,std_of_Vintage_per_Region_Code_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Region_Code_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Region_Code_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Region_Code_Avg_Account_Balance_Bins,mean_of_Age_per_Credit_Product_Occupation_Gender,sum_of_Age_per_Credit_Product_Occupation_Gender,std_of_Age_per_Credit_Product_Occupation_Gender,mean_of_Vintage_per_Credit_Product_Occupation_Gender,sum_of_Vintage_per_Credit_Product_Occupation_Gender,std_of_Vintage_per_Credit_Product_Occupation_Gender,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Gender,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Gender,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Gender,mean_of_Age_per_Credit_Product_Occupation_Region_Code,sum_of_Age_per_Credit_Product_Occupation_Region_Code,std_of_Age_per_Credit_Product_Occupation_Region_Code,mean_of_Vintage_per_Credit_Product_Occupation_Region_Code,sum_of_Vintage_per_Credit_Product_Occupation_Region_Code,std_of_Vintage_per_Credit_Product_Occupation_Region_Code,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Region_Code,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Region_Code,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Region_Code,mean_of_Age_per_Credit_Product_Occupation_Channel_Code,sum_of_Age_per_Credit_Product_Occupation_Channel_Code,std_of_Age_per_Credit_Product_Occupation_Channel_Code,mean_of_Vintage_per_Credit_Product_Occupation_Channel_Code,sum_of_Vintage_per_Credit_Product_Occupation_Channel_Code,std_of_Vintage_per_Credit_Product_Occupation_Channel_Code,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Channel_Code,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Channel_Code,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Channel_Code,mean_of_Age_per_Credit_Product_Occupation_Is_Active,sum_of_Age_per_Credit_Product_Occupation_Is_Active,std_of_Age_per_Credit_Product_Occupation_Is_Active,mean_of_Vintage_per_Credit_Product_Occupation_Is_Active,sum_of_Vintage_per_Credit_Product_Occupation_Is_Active,std_of_Vintage_per_Credit_Product_Occupation_Is_Active,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Is_Active,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Is_Active,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Is_Active,mean_of_Age_per_Credit_Product_Occupation_Age_Bins,sum_of_Age_per_Credit_Product_Occupation_Age_Bins,std_of_Age_per_Credit_Product_Occupation_Age_Bins,mean_of_Vintage_per_Credit_Product_Occupation_Age_Bins,sum_of_Vintage_per_Credit_Product_Occupation_Age_Bins,std_of_Vintage_per_Credit_Product_Occupation_Age_Bins,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Age_Bins,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Age_Bins,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Age_Bins,mean_of_Age_per_Credit_Product_Occupation_Vintage_Bins,sum_of_Age_per_Credit_Product_Occupation_Vintage_Bins,std_of_Age_per_Credit_Product_Occupation_Vintage_Bins,mean_of_Vintage_per_Credit_Product_Occupation_Vintage_Bins,sum_of_Vintage_per_Credit_Product_Occupation_Vintage_Bins,std_of_Vintage_per_Credit_Product_Occupation_Vintage_Bins,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Vintage_Bins,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Vintage_Bins,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Vintage_Bins,mean_of_Age_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,sum_of_Age_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,std_of_Age_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,mean_of_Vintage_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,sum_of_Vintage_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,std_of_Vintage_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,mean_of_Avg_Account_Balance_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,sum_of_Avg_Account_Balance_per_Credit_Product_Occupation_Avg_Account_Balance_Bins,std_of_Avg_Account_Balance_per_Credit_Product_Occupation_Avg_Account_Balance_Bins
0,222A8XWS,Male,66,RG280,Other,X2,50,No,819609,No,0.0,9,6,4,No_Male,No_RG280,No_Other,No_X2,No_No,No_9,No_6,No_4,Other_Male,Other_RG280,Other_X2,Other_No,Other_9,Other_6,Other_4,X2_Male,X2_RG280,X2_No,X2_9,X2_6,X2_4,No_Male,No_RG280,No_9,No_6,No_4,RG280_Male,RG280_9,RG280_6,RG280_4,No_Other_Male,No_Other_RG280,No_Other_X2,No_Other_No,No_Other_9,No_Other_6,No_Other_4,191887,2496,18304,100302,96897,3807,205947,4,214069,38338,34021,35099,105826,11163,56262,46342,120510,19465,19458,20811,56896,5020,27796,58975,37987,11995,9701,61993,5717,46840,12541,16675,9825,111428,11342,19473,19016,21903,10316,1876,1763,1693,29944,2973,13410,30876,19412,7139,5572,11,140,11,385,44,22,102,110,110,11,8,15,8,35,40,40,40,31,40,22,11,41,22,97,110,110,6,105,12,30,30,30,10,140,40,70,20,70,350,350,350,6,105,12,30,30,30,11,30,140,37,40,10,20,347,350,350,22,385,11,22,102,110,110,7,106,14,31,32,32,7,106,14,31,32,32,139,37,382,102,3,3,3,105,12,6,29,30,30,140,350,350,3,105,12,6,29,30,30,3,3,6,3,12,6,29,30,30,8,8,40,31,40,20,20,20,6,3,12,6,29,30,30,2,35,4,2,35,4,10,10,10,2,35,4,10,10,10,8,140,8,40,32,40,4,70,20,20,20,70,...,18.845949,51.400398,258030,34.132114,829581.5,4164498985,556886.882409,59.612714,1656995,13.932329,58.557598,1627667,27.7327,1203478.0,33451880719,870494.350615,49.934294,2944875,19.148558,48.869945,2882105,32.471279,1160149.0,68419787843,876824.991807,72.568352,2756654,6.126454,71.750046,2725569,30.916791,1367496.0,51947062522,996371.17965,63.610921,763013,12.633312,54.706294,656202,5.107403,1296181.0,15547691530,942743.608881,51.808576,502595,18.576178,51.594062,500514,33.270456,830815.315638,8059739377,36172.883086,50.65759,3140416,11.048622,56.026971,3473280,28.79591,1137846.0,70538462181,836838.307422,50.23054,287168,11.218737,52.183138,298331,28.832378,764313.0,4369577392,488392.4,50.867464,2382632,11.491183,55.196734,2585415,27.580495,1140582.0,53424877753,820634.557019,72.301252,906730,6.079155,63.67985,798609,25.849493,1271066.0,15940433648,919759.825948,52.165157,869854,11.284396,55.253373,921350,5.133824,1160155.0,19345591961,818408.609137,50.25313,493737,11.12306,53.898524,529553,27.871458,831209.160305,8166630000,36234.299869,43.421698,4838393,14.843543,45.627921,5084228,31.396467,1103344.0,122943467666,826303.73124,40.257979,456606,14.396477,37.076353,420520,28.971957,772901.5,8766249087,514057.6,72.449289,1410805,6.120512,69.015971,1343948,30.478878,1358942.0,26462683119,974389.479832,54.038126,1027589,11.773012,54.438105,1035195,5.227152,1219921.0,23198014620,881968.47219,40.627311,889860,14.170129,39.77204,871127,28.256752,830360.108707,18187377461,36443.839273,45.725572,471705,14.607816,48.002617,495195,33.796688,785771.5,8106018419,506009.7,72.103945,135267,6.152495,70.121002,131547,30.229553,920950.7,1727703476,621870.20451,53.917187,95056,11.39747,54.543392,96160,5.008134,794200.1,1400174746,483780.209465,43.788541,74134,15.113211,44.502067,75342,32.582585,828277.23922,1402273366,35787.968631,52.424993,1569814,19.184805,52.858002,1582780,33.280882,1178313.0,35283406631,896517.820262,49.731248,147851,20.010161,46.800538,139138,32.327005,794119.1,2360915981,537657.79704,60.525876,811652,14.126094,57.725727,774102,26.172938,1182623.0,15858971240,867233.073676,44.15857,1363440,18.99772,41.280509,1274577,28.565579,1079271.0,33323559166,839627.608503,72.623326,1409764,6.101108,69.818514,1355317,29.756567,1328817.0,25795000279,975493.990768,64.834711,462855,12.500462,54.916515,392049,4.919957,1276723.0,9114527276,939625.388027,48.70262,271371,19.449017,46.113604,256945,31.215883,830362.991206,4626782587,35818.210685
1,222HSZEH,Male,49,RG268,Self_Employed,X3,69,Yes,679666,No,0.0,6,7,3,Yes_Male,Yes_RG268,Yes_Self_Employed,Yes_X3,Yes_No,Yes_6,Yes_7,Yes_3,Self_Employed_Male,Self_Employed_RG268,Self_Employed_X3,Self_Employed_No,Self_Employed_6,Self_Employed_7,Self_Employed_3,X3_Male,X3_RG268,X3_No,X3_6,X3_7,X3_3,No_Male,No_RG268,No_6,No_7,No_3,RG268_Male,RG268_6,RG268_7,RG268_3,Yes_Self_Employed_Male,Yes_Self_Employed_RG268,Yes_Self_Employed_X3,Yes_Self_Employed_No,Yes_Self_Employed_6,Yes_Self_Employed_7,Yes_Self_Employed_3,191887,9071,51054,144072,97979,3942,103218,3,214069,35633,33822,35105,59910,17657,48110,36757,71725,13015,11256,9940,86072,25206,52824,74009,27620,20248,14599,59887,20496,47840,15432,14095,8688,111428,25667,17414,17376,22336,28486,7161,7114,3630,29176,9078,19533,31125,9723,6727,4737,11,140,11,385,44,22,102,110,110,12,8,16,8,36,40,40,40,32,40,23,12,46,24,100,112,112,6,105,12,27,30,30,9,140,36,70,18,70,315,350,350,6,105,12,27,30,30,12,30,140,34,40,10,20,348,350,350,23,383,12,23,98,111,111,8,140,16,37,40,40,8,140,16,37,40,40,139,37,382,102,4,4,4,140,16,8,37,40,40,140,350,350,4,140,16,8,37,40,40,4,4,8,4,16,8,35,40,40,8,8,40,32,40,20,20,20,8,4,16,8,35,40,40,2,35,4,2,35,4,9,10,10,2,35,4,9,10,10,8,140,8,36,32,40,4,70,18,20,20,70,...,7.615248,63.642109,1604163,32.197924,1400144.0,35292017443,959321.15235,50.086514,2645770,6.684889,73.475466,3881268,32.246086,1223406.0,64625208152,876389.443094,45.698956,3382134,9.254768,52.088341,3855006,31.460214,1103589.0,81675532908,806710.510887,48.476322,1338916,1.108534,64.391021,1778480,30.258272,1150010.0,31763274651,824792.697902,48.976492,991676,6.082518,71.625049,1450264,5.399949,1176064.0,23812936503,840837.506068,46.033975,672050,8.631275,51.697582,754733,31.643414,712527.177409,10402184263,32835.526475,55.341794,3314254,11.794537,76.851003,4602376,32.528489,1261522.0,75548760791,916831.599042,55.253757,1132481,11.486482,80.756391,1655183,31.025344,1513315.0,31016901342,1006678.0,54.063608,2586403,12.739365,70.544712,3374859,33.119302,1253141.0,59950265110,899203.55731,48.557089,749333,1.106467,76.106273,1174472,29.37947,1238197.0,19107853734,871719.374176,54.805747,772487,11.546176,72.125151,1016604,5.489358,1238454.0,17456009843,905922.892177,53.767035,467128,12.19912,70.246547,610302,32.865164,713084.273826,6195276171,32473.974482,43.421698,4838393,14.843543,45.627921,5084228,31.396467,1103344.0,122943467666,826303.73124,48.959715,1256649,14.416602,57.275763,1470097,32.76891,1468418.0,37689886352,1007287.0,48.482428,844273,1.111746,63.421443,1104421,29.935333,1144087.0,19923138614,814298.239413,54.435083,945864,11.400757,71.46921,1241849,5.397775,1224433.0,21275739386,871923.772575,40.015983,893797,13.969594,38.399355,857688,27.455938,711970.714004,15902577868,32620.261953,50.050305,1425733,13.729344,62.055747,1767720,34.178909,1477295.0,42082224334,1025807.0,48.48443,347197,1.102088,71.128474,509351,28.929166,1383352.0,9906183047,915863.719594,53.64366,381621,11.506046,71.949396,511848,5.378566,1426371.0,10147205416,946417.727002,47.795868,173499,13.93337,54.960606,199507,32.871701,713633.37438,2590489149,32851.966517,48.064951,1402343,7.835787,58.382712,1703374,33.871224,1149917.0,33549981677,824151.868117,48.397665,439354,7.218603,64.243225,583200,33.120621,1409597.0,12796321364,923834.603672,50.165515,979883,6.636251,72.917575,1424299,33.778063,1263693.0,24683706075,887735.482805,46.835695,1457761,8.447259,51.203823,1593719,32.233311,1129693.0,35161709309,805981.538993,48.471871,471292,1.111694,64.01913,622458,31.200001,1172228.0,11397569385,803727.177651,49.152817,330651,5.84942,71.65631,482032,5.387751,1204304.0,8101349744,845419.779136,46.816128,221768,8.066013,51.472873,243827,32.615494,712697.867004,3376049796,32930.67226
2,222TDSNN,Male,32,RG262,Salaried,X1,32,No,761982,No,0.0,3,5,3,No_Male,No_RG262,No_Salaried,No_X1,No_No,No_3,No_5,No_3,Salaried_Male,Salaried_RG262,Salaried_X1,Salaried_No,Salaried_3,Salaried_5,Salaried_3,X1_Male,X1_RG262,X1_No,X1_3,X1_5,X1_3,No_Male,No_RG262,No_3,No_5,No_3,RG262_Male,RG262_3,RG262_5,RG262_3,No_Salaried_Male,No_Salaried_RG262,No_Salaried_X1,No_Salaried_No,No_Salaried_3,No_Salaried_5,No_Salaried_3,191887,8806,2588,102895,148184,13599,205947,3,214069,39493,41860,35105,105826,1747,74842,114590,120510,29980,28499,21262,46525,903,90094,78996,24313,14984,10884,65131,1248,115143,31737,23153,15794,111428,1550,30218,27827,22336,1476,279,384,282,32105,720,72010,57381,18786,11593,8084,11,140,11,385,44,22,102,110,110,11,8,14,8,32,37,38,39,31,40,21,11,40,22,82,95,100,6,105,12,30,30,30,10,139,40,70,20,70,349,349,350,6,105,12,30,30,30,12,30,116,35,40,10,20,350,322,350,23,332,12,23,99,111,110,7,106,14,31,32,32,7,106,14,31,32,32,139,37,382,102,3,3,3,105,12,6,29,30,30,140,350,350,3,105,12,6,29,30,30,3,3,6,3,12,6,27,28,30,8,8,39,30,40,20,20,20,6,3,12,6,27,28,30,2,35,4,2,35,4,10,10,10,2,35,4,10,10,10,8,137,8,39,32,40,4,70,20,20,20,70,...,5.351236,25.911406,23398,13.953236,1304622.0,1178073492,930795.995431,29.342642,2643596,3.43107,22.875597,2060954,7.503347,1008459.0,90856115937,791269.446468,30.443136,2404886,5.646091,25.407185,2007066,13.952869,985229.8,77829213452,764194.143981,32.478509,789650,1.369337,25.671616,624154,7.777694,1001793.0,24356591929,787705.493735,31.38134,470218,4.060043,33.005739,494558,1.901315,1031118.0,15450271391,806730.501204,30.558618,332600,5.982421,25.294377,275304,14.672632,711338.779952,7742211281,32419.017392,32.804594,2136596,10.161793,26.006264,1693814,13.705466,1068990.0,69624366375,836214.142864,29.553686,36883,5.694748,24.431891,30491,7.752911,1283687.0,1602041011,898275.4,31.901453,3673229,8.808122,24.888634,2865752,10.789743,983893.1,113288408561,766389.7483,32.414406,1028736,1.345264,25.655071,814215,6.710235,986389.1,31305030138,775821.515104,33.599577,777931,8.561328,33.111778,766637,2.128787,1029588.0,23838052747,807235.614291,31.843865,502942,8.606501,24.639737,389160,11.36069,711531.185767,11237923548,32473.988442,43.421698,4838393,14.843543,45.627921,5084228,31.396467,1103344.0,122943467666,826303.73124,38.02129,58933,13.750997,36.96,57288,26.411839,1227956.0,1903331233,926719.1,32.530247,982999,1.386121,26.095837,788564,8.270369,961114.3,29042950464,750425.257837,38.123585,1060865,11.948423,34.427139,958004,3.598553,1059799.0,29491023793,807333.419652,40.015983,893797,13.969594,38.399355,857688,27.455938,711970.714004,15902577868,32620.261953,43.908537,64809,14.619065,46.885501,69203,32.82068,1201371.0,1773223768,928521.4,32.394265,9038,1.391949,26.910394,7508,8.025759,1123005.0,313318270,809664.320869,38.757812,14883,13.025229,34.786458,13358,3.621985,1190612.0,457195029,834802.847652,41.446809,11688,13.875617,39.705674,11197,28.359824,713558.219858,201223418,32433.885364,29.786762,956304,4.325881,23.753527,762607,10.294966,1050716.0,33733222534,835136.293491,29.036111,20906,3.933922,24.734722,17809,9.690648,1299242.0,935454006,924509.242604,29.281003,2108525,3.020984,22.828357,1643870,6.893988,1007358.0,72539869256,802572.393593,29.63887,1700708,3.461184,23.796779,1365483,8.058724,958228.3,54984097514,755617.062531,32.391515,608507,1.333889,25.628447,481456,6.492552,988883.5,18577165306,787323.728015,30.987837,359242,2.962896,32.787285,380103,1.372027,1017710.0,11798316674,810156.172984,29.604527,239323,3.864856,23.445571,189534,9.192923,711027.850569,5747949144,32448.821638
3,224FPNSD,Female,39,RG276,Self_Employed,X2,26,Missing,583519,Yes,1.0,4,3,2,Missing_Female,Missing_RG276,Missing_Self_Employed,Missing_X2,Missing_Yes,Missing_4,Missing_3,Missing_2,Self_Employed_Female,Self_Employed_RG276,Self_Employed_X2,Self_Employed_Yes,Self_Employed_4,Self_Employed_3,Self_Employed_2,X2_Female,X2_RG276,X2_Yes,X2_4,X2_3,X2_2,Yes_Female,Yes_RG276,Yes_4,Yes_3,Yes_2,RG276_Female,RG276_4,RG276_3,RG276_2,Missing_Self_Employed_Female,Missing_Self_Employed_RG276,Missing_Self_Employed_X2,Missing_Self_Employed_Yes,Missing_Self_Employed_4,Missing_Self_Employed_3,Missing_Self_Employed_2,159125,4765,3949,144072,96897,15701,41847,6,136943,35856,48683,35097,15696,585,21121,17026,20013,5117,3117,3595,58000,1737,62422,70063,23786,12933,14296,34904,1444,50057,16371,8963,9416,56484,1510,15926,13077,12414,1717,447,479,568,8002,287,9610,10489,3209,1504,1954,12,139,12,381,46,24,103,112,112,11,8,15,8,34,40,39,40,32,40,22,11,41,22,91,109,107,6,105,12,27,30,30,9,140,36,70,18,70,315,350,350,6,105,12,27,30,30,11,30,140,37,40,10,20,347,350,350,22,385,11,22,102,110,110,8,140,16,37,40,40,8,140,16,37,40,40,140,36,385,101,4,4,4,138,16,8,35,40,40,139,345,343,4,138,16,8,35,40,40,4,4,8,4,14,8,29,40,38,8,8,34,31,38,19,20,20,8,4,14,8,29,40,38,2,35,4,2,35,4,9,10,10,2,35,4,9,10,10,8,140,8,36,32,40,4,70,18,20,20,70,...,8.642732,53.888313,93604,30.532169,1033328.0,1794890218,710338.991139,47.176172,2944831,7.112087,53.29166,3326572,27.971754,1103596.0,68888651493,813633.30389,47.486962,3327079,7.942158,59.228808,4149748,33.333732,1164185.0,81566274158,867493.868193,39.610906,942185,1.887301,38.004036,903964,23.279947,1047158.0,24907699040,808702.186483,40.977345,529960,9.358459,23.884714,308901,2.202077,996895.3,12892846910,749660.167897,45.468383,650016,8.787713,49.880946,713098,31.301771,603725.029449,8630853021,30721.344274,50.111477,1749091,11.927468,52.017333,1815613,26.889613,1149092.0,40107901180,845068.681851,51.196676,73928,11.985442,53.051247,76606,26.972487,1028065.0,1484525314,690391.6,50.080408,2506875,11.253944,54.007991,2703478,28.736603,1143127.0,57221485608,857400.199502,39.476758,646274,1.970644,38.844725,635927,22.692999,1058855.0,17334511912,830779.997784,45.195805,405090,10.841802,23.817249,213474,2.217796,1017094.0,9116214558,767111.784202,49.043649,461795,11.017579,50.050871,471279,27.777568,604104.074979,5688243970,30910.273325,45.90176,2592715,14.878577,50.394873,2846504,33.592097,1212149.0,68467024328,911012.295089,50.574834,76368,13.985059,55.645033,84024,31.74108,1073100.0,1620381073,747358.8,39.360291,626852,2.009782,38.857089,618838,24.054278,1085890.0,17293890404,846660.408524,38.348474,501483,12.062398,23.725472,310258,2.234878,1113836.0,14565628471,858054.021367,46.26744,574364,13.881262,50.007975,620799,33.06725,604190.113179,7500416065,30828.036513,44.010483,75566,15.317689,42.758299,73416,28.100165,1023649.0,1757604532,681970.2,39.06264,17461,1.973051,36.391499,16267,21.575111,1009290.0,451152488,707287.764606,36.192067,17336,11.384524,23.960334,11477,2.186037,1032015.0,494334970,736126.075174,45.021127,25572,14.642582,46.102113,26186,30.316673,605926.725352,344166380,30947.839117,48.1006,384901,6.890524,63.517996,508271,32.338453,1204783.0,9640672966,837274.636128,48.289199,13859,7.638007,63.543554,18237,31.628371,1023730.0,293810634,711423.695437,47.846722,459807,6.773849,59.953902,576157,30.170392,1118977.0,10753373286,784354.804189,48.926304,513188,6.729236,69.976642,733985,32.617249,1206961.0,12659813187,843294.021476,39.666563,127290,1.875091,39.629791,127172,25.721776,1051485.0,3374216640,773976.497804,44.469415,66882,7.602508,23.939495,36005,2.2089,1020655.0,1535064390,769475.49375,47.549642,92912,7.009674,58.830604,114955,32.558466,605232.279427,1182623874,30233.198293
4,224VSEND,Male,29,RG261,Salaried,X1,13,No,736866,Yes,0.0,2,0,3,No_Male,No_RG261,No_Salaried,No_X1,No_Yes,No_2,No_0,No_3,Salaried_Male,Salaried_RG261,Salaried_X1,Salaried_Yes,Salaried_2,Salaried_0,Salaried_3,X1_Male,X1_RG261,X1_Yes,X1_2,X1_0,X1_3,Yes_Male,Yes_RG261,Yes_2,Yes_0,Yes_3,RG261_Male,RG261_2,RG261_0,RG261_3,No_Salaried_Male,No_Salaried_RG261,No_Salaried_X1,No_Salaried_Yes,No_Salaried_2,No_Salaried_0,No_Salaried_3,191887,18981,10917,102895,148184,17712,205947,2,136943,34664,20456,35105,105826,6819,74842,114590,85437,27852,12726,21262,46525,4776,90094,23899,23343,8706,10884,65131,7141,33041,32171,12197,15794,80459,2724,6547,7624,12769,5498,1650,815,1192,32105,3248,72010,17461,18791,6318,8084,11,140,11,385,44,22,102,110,110,11,8,16,8,33,40,40,40,31,40,22,11,42,22,95,108,107,6,105,12,30,30,30,10,139,40,70,20,70,349,349,350,6,105,12,30,30,30,12,30,116,35,40,10,20,350,322,350,23,332,12,23,99,111,110,7,106,14,31,32,32,7,106,14,31,32,32,140,36,385,101,3,3,3,105,12,6,29,30,30,140,350,350,3,105,12,6,29,30,30,3,3,6,3,12,6,29,30,30,8,8,40,31,40,20,20,20,6,3,12,6,29,30,30,2,35,4,2,35,4,10,10,10,2,35,4,10,10,10,8,137,8,39,32,40,4,70,20,20,20,70,...,4.761197,24.43928,116722,11.462796,983956.9,4699378333,627635.184296,29.342642,2643596,3.43107,22.875597,2060954,7.503347,1008459.0,90856115937,791269.446468,32.277836,771408,8.800048,29.303276,700319,24.053804,1205750.0,28816217668,931235.376272,29.451998,687498,0.497701,24.816733,579297,6.402435,1028834.0,24016065493,832470.542605,28.368711,246978,4.388178,12.733287,110856,1.137159,1003267.0,8734445228,770352.269727,30.558618,332600,5.982421,25.294377,275304,14.672632,711338.779952,7742211281,32419.017392,32.804594,2136596,10.161793,26.006264,1693814,13.705466,1068990.0,69624366375,836214.142864,31.048873,221720,7.687962,23.645848,168855,8.239171,972614.7,6945441814,625461.0,33.864411,1118914,12.088168,26.788989,885135,17.436822,1206001.0,39847474353,935839.376311,29.452395,947513,0.497736,24.851668,799503,6.351584,1020495.0,32830352009,816654.643563,29.152824,355577,7.177453,12.824711,156423,0.929599,996670.4,12156388623,763356.574174,31.843865,502942,8.606501,24.639737,389160,11.36069,711531.185767,11237923548,32473.988442,49.358257,3971316,13.632059,58.990144,4746288,34.961381,1210948.0,97431670133,909707.643905,45.251836,123266,14.672921,47.145007,128423,31.671942,1008546.0,2747280338,643883.7,29.439438,192740,0.496357,23.141897,151510,7.113857,1187280.0,7773124665,953317.508684,37.974816,289520,12.983785,12.100603,92255,1.948902,1060431.0,8084727601,805853.937093,47.045657,600726,13.883884,51.691597,660050,33.422025,712771.49769,9101379254,32459.553387,40.291924,221525,14.052246,39.013459,214496,28.532276,996462.8,5478552572,621190.3,29.441818,48579,0.496754,25.72,42438,6.229521,982973.0,1621905430,622232.353681,33.695706,27462,11.59474,12.620859,10286,1.339187,997472.0,812939715,694305.382815,37.567953,44781,13.186211,33.957215,40477,24.609781,712337.034396,849105745,32086.123089,29.786762,956304,4.325881,23.753527,762607,10.294966,1050716.0,33733222534,835136.293491,29.436576,95610,3.658461,23.757389,77164,8.686775,974369.8,3164753220,627267.05921,29.281003,2108525,3.020984,22.828357,1643870,6.893988,1007358.0,72539869256,802572.393593,29.238589,510535,4.961628,22.015921,384420,12.182611,1182039.0,20639588632,924465.449195,29.454526,553480,0.497941,24.787771,465787,6.323594,1023231.0,19227538399,835485.73612,27.75182,175336,3.034467,12.79867,80862,1.001496,993239.8,6275289280,762668.394193,29.604527,239323,3.864856,23.445571,189534,9.192923,711027.850569,5747949144,32448.821638


### Preparing Train and Test

In [14]:
train_x_full = df_total[df_total['Is_Lead'].notnull()].reset_index(drop=True)
test_x_full = df_total[df_total['Is_Lead'].isnull()].reset_index(drop=True)
train_y_full = train_x_full['Is_Lead']
del test_x_full['Is_Lead']
del train_x_full['Is_Lead']
## storing test_id to use while submitting solution
test_id = test_x_full['ID']
## del id column from train and test as they have all unique values
del train_x_full['ID']
del test_x_full['ID']
print(train_x_full.shape, train_y_full.shape, test_x_full.shape)

(245700, 1590) (245700,) (105312, 1590)


In [15]:
remove_non_unique_cols_train = []
for col in train_x_full.columns:
    if train_x_full[col].nunique()<=1:
        remove_non_unique_cols_train.append(col)
        
print(len(remove_non_unique_cols_train))

0


In [16]:
remove_non_unique_cols_test = []
for col in test_x_full.columns:
    if test_x_full[col].nunique()<=1:
        remove_non_unique_cols_test.append(col)
        
print(len(remove_non_unique_cols_test))

97


In [17]:
remove_non_unique_cols_based_on_train_test = list(set(set(remove_non_unique_cols_train).union(set(remove_non_unique_cols_test))))

In [18]:
df_total.drop(columns=remove_non_unique_cols_based_on_train_test, axis=1, inplace=True)

In [19]:
train_x_full = df_total[df_total['Is_Lead'].notnull()].reset_index(drop=True)
test_x_full = df_total[df_total['Is_Lead'].isnull()].reset_index(drop=True)
train_y_full = train_x_full['Is_Lead']
del test_x_full['Is_Lead']
del train_x_full['Is_Lead']
## storing test_id to use while submitting solution
test_id = test_x_full['ID']
## del id column from train and test as they have all unique values
del train_x_full['ID']
del test_x_full['ID']
print(train_x_full.shape, train_y_full.shape, test_x_full.shape)

(245700, 1493) (245700,) (105312, 1493)


In [20]:
cat_features = [col for col in train_x_full.columns if train_x_full[col].dtype=='O']
print(cat_features)

['Gender', 'Region_Code', 'Occupation', 'Channel_Code', 'Credit_Product', 'Is_Active', 'Credit_Product_Gender', 'Credit_Product_Region_Code', 'Credit_Product_Occupation', 'Credit_Product_Channel_Code', 'Credit_Product_Is_Active', 'Credit_Product_Age_Bins', 'Credit_Product_Vintage_Bins', 'Credit_Product_Avg_Account_Balance_Bins', 'Occupation_Gender', 'Occupation_Region_Code', 'Occupation_Channel_Code', 'Occupation_Is_Active', 'Occupation_Age_Bins', 'Occupation_Vintage_Bins', 'Occupation_Avg_Account_Balance_Bins', 'Channel_Code_Gender', 'Channel_Code_Region_Code', 'Channel_Code_Is_Active', 'Channel_Code_Age_Bins', 'Channel_Code_Vintage_Bins', 'Channel_Code_Avg_Account_Balance_Bins', 'Is_Active_Gender', 'Is_Active_Region_Code', 'Is_Active_Age_Bins', 'Is_Active_Vintage_Bins', 'Is_Active_Avg_Account_Balance_Bins', 'Region_Code_Gender', 'Region_Code_Age_Bins', 'Region_Code_Vintage_Bins', 'Region_Code_Avg_Account_Balance_Bins', 'Credit_Product_Occupation_Gender', 'Credit_Product_Occupation_Re

### CATBOOST

In [21]:
# Final ROC-AUC = 0.8742
train_x = train_x_full.copy()
train_y = train_y_full.copy()
test_x = test_x_full.copy()
feature_importance_df = pd.DataFrame()

NFOLDS = 5
folds = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=18121995)
pred_test = []
pred_x = []
pred_y = []
j=1
for fold, (train_ids, test_ids) in enumerate(folds.split(train_x, train_y)):
    print('● Fold :', fold+1)
    model = CatBoostClassifier(n_estimators=20000,random_state=1812195,learning_rate=0.03,eval_metric='AUC',
                              cat_features =cat_features)
    model.fit(train_x.loc[train_ids], train_y.loc[train_ids], 
              eval_set=[(train_x.loc[train_ids], train_y.loc[train_ids]), (train_x.loc[test_ids], train_y.loc[test_ids])],
              verbose=500,
              early_stopping_rounds=200)
    pred_fold = model.predict_proba(train_x.loc[test_ids])[:,-1]
    pred_x.extend([float(i) for i  in pred_fold])
    pred_y.extend(list(train_y.loc[test_ids].values))
    pred_fold_test = model.predict_proba(test_x)[:,-1]
    pred_test.append([float(i) for i  in pred_fold_test])
    print('\n')
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = train_x.loc[train_ids].columns
    fold_importance_df["importance"] = model.feature_importances_
    fold_importance_df["fold"] = j + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    j=j+1

print("Final ROC-AUC Score:", roc_auc_score(pred_y, pred_x))

final_prediction = pd.DataFrame(pred_test).T
final_prediction.columns = [("FOLD_"+str(i)) for i in range(final_prediction.shape[1])]

vote = final_prediction.mean(axis=1)
final_prediction['Is_Lead'] = vote
final_prediction['ID'] = test_id.values
print(final_prediction.shape)

train_pred_all = pd.DataFrame()
train_pred_all['Is_Lead'] = pred_y
train_pred_all['cb_pred'] = pred_x
final_prediction_cb = final_prediction.copy()

final_prediction.head()

● Fold : 1
0:	test: 0.8628158	test1: 0.8575245	best: 0.8575245 (0)	total: 1.3s	remaining: 7h 12m 46s
500:	test: 0.8788127	test1: 0.8707141	best: 0.8707141 (500)	total: 9m 45s	remaining: 6h 19m 58s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.870818275
bestIteration = 763

Shrink model to first 764 iterations.


● Fold : 2
0:	test: 0.8614734	test1: 0.8628354	best: 0.8628354 (0)	total: 1.22s	remaining: 6h 46m 44s
500:	test: 0.8777185	test1: 0.8752544	best: 0.8752610 (499)	total: 9m 35s	remaining: 6h 13m 14s
1000:	test: 0.8805299	test1: 0.8755445	best: 0.8755445 (1000)	total: 19m 46s	remaining: 6h 15m 15s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.8755903059
bestIteration = 1112

Shrink model to first 1113 iterations.


● Fold : 3
0:	test: 0.8620261	test1: 0.8606954	best: 0.8606954 (0)	total: 1.25s	remaining: 6h 55m 21s
500:	test: 0.8778105	test1: 0.8736808	best: 0.8736814 (496)	total: 10m 19s	remaining: 6h 42m
1000:	test: 0.8803611	test1:

Unnamed: 0,FOLD_0,FOLD_1,FOLD_2,FOLD_3,FOLD_4,Is_Lead,ID
0,0.047243,0.044791,0.038201,0.040695,0.036415,0.041469,VBENBARO
1,0.877784,0.870863,0.879267,0.88538,0.889843,0.880627,CCMEWNKY
2,0.050817,0.055701,0.055668,0.051307,0.052312,0.053161,VK3KGA9M
3,0.02361,0.023294,0.025823,0.024535,0.021965,0.023845,TT8RPZVC
4,0.02454,0.023651,0.025,0.024406,0.021039,0.023727,SHQZEYTZ


In [23]:
df_submission = final_prediction[['ID', 'Is_Lead']]
df_submission.to_csv("cb_v3.csv", index = False)
df_submission.head()

Unnamed: 0,ID,Is_Lead
0,VBENBARO,0.041469
1,CCMEWNKY,0.880627
2,VK3KGA9M,0.053161
3,TT8RPZVC,0.023845
4,SHQZEYTZ,0.023727


In [22]:
all_features = feature_importance_df[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)
all_features.reset_index(inplace=True)
important_features = list(all_features[all_features['importance']!=0]['feature'])
all_features

Unnamed: 0,feature,importance
0,unique_of_Occupation_Age_Bins_per_Credit_Produ...,4.179025
1,std_of_Age_per_Credit_Product_Is_Active,3.718169
2,Occupation_Age_Bins,3.035632
3,sum_of_Age_per_Credit_Product_Is_Active,2.929656
4,mean_of_Vintage_per_Credit_Product_Is_Active,2.747740
...,...,...
1488,unique_of_Occupation_Avg_Account_Balance_Bins_...,0.000000
1489,unique_of_Credit_Product_Gender_per_Region_Cod...,0.000000
1490,unique_of_Credit_Product_Is_Active_per_Channel...,0.000000
1491,unique_of_Occupation_Avg_Account_Balance_Bins_...,0.000000


In [29]:
imp_features_v1 = list(all_features[all_features['importance']>0.1]['feature'].values)

In [38]:
train_x_full_v1 = train_x_full[imp_features_v1]
test_x_full_v1 = test_x_full[imp_features_v1]

cat_features_v1 = [col for col in train_x_full_v1.columns if train_x_full_v1[col].dtype=='O']
print(cat_features_v1)

['Occupation_Age_Bins', 'Credit_Product_Occupation_Age_Bins', 'Credit_Product_Occupation_Vintage_Bins', 'Channel_Code_Vintage_Bins', 'Credit_Product_Channel_Code', 'Credit_Product_Vintage_Bins', 'Credit_Product_Occupation_Channel_Code', 'Credit_Product_Age_Bins', 'Credit_Product_Is_Active', 'Credit_Product', 'Occupation_Vintage_Bins', 'Is_Active_Vintage_Bins', 'Credit_Product_Occupation_Is_Active', 'Channel_Code_Is_Active', 'Channel_Code', 'Credit_Product_Occupation', 'Region_Code_Vintage_Bins', 'Credit_Product_Gender', 'Occupation', 'Occupation_Channel_Code', 'Occupation_Is_Active', 'Channel_Code_Age_Bins', 'Occupation_Gender', 'Credit_Product_Region_Code', 'Is_Active_Age_Bins', 'Channel_Code_Avg_Account_Balance_Bins']


In [39]:
# Final ROC-AUC = 0.8742
train_x = train_x_full_v1.copy()
train_y = train_y_full.copy()
test_x = test_x_full_v1.copy()
feature_importance_df = pd.DataFrame()

NFOLDS = 5
folds = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=18121995)
pred_test = []
pred_x = []
pred_y = []
j=1
for fold, (train_ids, test_ids) in enumerate(folds.split(train_x, train_y)):
    print('● Fold :', fold+1)
    model = CatBoostClassifier(n_estimators=20000,random_state=1812195,learning_rate=0.03,eval_metric='AUC',
                              cat_features =cat_features_v1)
    model.fit(train_x.loc[train_ids], train_y.loc[train_ids], 
              eval_set=[(train_x.loc[train_ids], train_y.loc[train_ids]), (train_x.loc[test_ids], train_y.loc[test_ids])],
              verbose=500,
              early_stopping_rounds=200)
    pred_fold = model.predict_proba(train_x.loc[test_ids])[:,-1]
    pred_x.extend([float(i) for i  in pred_fold])
    pred_y.extend(list(train_y.loc[test_ids].values))
    pred_fold_test = model.predict_proba(test_x)[:,-1]
    pred_test.append([float(i) for i  in pred_fold_test])
    print('\n')
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = train_x.loc[train_ids].columns
    fold_importance_df["importance"] = model.feature_importances_
    fold_importance_df["fold"] = j + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    j=j+1

print("Final ROC-AUC Score:", roc_auc_score(pred_y, pred_x))

final_prediction = pd.DataFrame(pred_test).T
final_prediction.columns = [("FOLD_"+str(i)) for i in range(final_prediction.shape[1])]

vote = final_prediction.mean(axis=1)
final_prediction['Is_Lead'] = vote
final_prediction['ID'] = test_id.values
print(final_prediction.shape)

train_pred_all = pd.DataFrame()
train_pred_all['Is_Lead'] = pred_y
train_pred_all['cb_pred'] = pred_x
final_prediction_cb = final_prediction.copy()

final_prediction.head()

● Fold : 1
0:	test: 0.8645757	test1: 0.8582878	best: 0.8582878 (0)	total: 555ms	remaining: 3h 5m 4s
500:	test: 0.8778492	test1: 0.8713279	best: 0.8713403 (498)	total: 5m 27s	remaining: 3h 32m 25s
1000:	test: 0.8799788	test1: 0.8716354	best: 0.8716591 (902)	total: 10m 57s	remaining: 3h 28m 4s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.8716590531
bestIteration = 902

Shrink model to first 903 iterations.


● Fold : 2
0:	test: 0.8630838	test1: 0.8642857	best: 0.8642857 (0)	total: 685ms	remaining: 3h 48m 27s
500:	test: 0.8767686	test1: 0.8754662	best: 0.8754718 (497)	total: 5m 34s	remaining: 3h 36m 49s
1000:	test: 0.8787959	test1: 0.8756672	best: 0.8756831 (846)	total: 11m 11s	remaining: 3h 32m 21s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.8757238657
bestIteration = 1153

Shrink model to first 1154 iterations.


● Fold : 3
0:	test: 0.8630922	test1: 0.8630889	best: 0.8630889 (0)	total: 745ms	remaining: 4h 8m 25s
500:	test: 0.8769165	test1

Unnamed: 0,FOLD_0,FOLD_1,FOLD_2,FOLD_3,FOLD_4,Is_Lead,ID
0,0.041109,0.038062,0.034993,0.040441,0.038348,0.03859,VBENBARO
1,0.87854,0.890066,0.876172,0.886581,0.884776,0.883227,CCMEWNKY
2,0.05081,0.052833,0.053842,0.051593,0.049886,0.051793,VK3KGA9M
3,0.02285,0.022985,0.022714,0.023633,0.021189,0.022674,TT8RPZVC
4,0.022703,0.022362,0.023472,0.024481,0.022216,0.023047,SHQZEYTZ


In [42]:
all_features = feature_importance_df[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)
all_features.reset_index(inplace=True)
important_features = list(all_features[all_features['importance']!=0]['feature'])
all_features

Unnamed: 0,feature,importance
0,unique_of_Region_Code_Avg_Account_Balance_Bins...,6.633275
1,sum_of_Avg_Account_Balance_per_Credit_Product_...,4.875253
2,std_of_Avg_Account_Balance_per_Credit_Product,3.218597
3,Credit_Product_Channel_Code,3.113593
4,Occupation_Age_Bins_counts,2.927466
...,...,...
147,unique_of_Credit_Product_Occupation_Age_Bins_p...,0.060863
148,unique_of_Region_Code_Gender_per_Credit_Produc...,0.057073
149,unique_of_Credit_Product_Occupation_Avg_Accoun...,0.045416
150,unique_of_Is_Active_Region_Code_per_Occupation...,0.040519


In [41]:
df_submission = final_prediction[['ID', 'Is_Lead']]
df_submission.to_csv("cb_v3_imp_feat.csv", index = False)
df_submission.head()

Unnamed: 0,ID,Is_Lead
0,VBENBARO,0.03859
1,CCMEWNKY,0.883227
2,VK3KGA9M,0.051793
3,TT8RPZVC,0.022674
4,SHQZEYTZ,0.023047
