In [245]:
import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,ExtraTreesClassifier,StackingClassifier, HistGradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score,classification_report, roc_auc_score
from sklearn.preprocessing import LabelEncoder, KBinsDiscretizer, RobustScaler
from sklearn.model_selection import KFold,StratifiedKFold, GroupKFold,train_test_split
import gc
import datetime
from tqdm.notebook import tqdm ,tnrange
import warnings
warnings.filterwarnings("ignore")
np.random.seed(0)
pd.set_option('display.max_columns', 500)

In [246]:
df_train = pd.read_csv("train_Df64byy.csv")
df_test = pd.read_csv("test_YCcRUnU.csv")
df_train = df_train.drop_duplicates()
df_train = df_train[~df_train.Region_Code.isin(list(set(df_train.Region_Code)-set(df_test.Region_Code)))]
df_total = pd.concat([df_train,df_test],ignore_index=True, sort=False)
print("Shape of datasets: ", df_train.shape, df_test.shape, df_total.shape)

Shape of datasets:  (48752, 14) (21805, 13) (70557, 14)


In [247]:
df_total['holds_insurance_policy'] = np.where(df_total['Holding_Policy_Duration'].isnull(), 0,1)
df_total['Holding_Policy_Duration'].fillna(0,inplace=True) 
df_total['Holding_Policy_Duration'].replace('14+',15,inplace=True) 
df_total['Holding_Policy_Duration'] = df_total['Holding_Policy_Duration'].astype(float).astype(int)
df_total['Holding_Policy_Type'].fillna(0,inplace=True)
df_total['Health Indicator'].fillna('X0',inplace=True)
df_total['Age_diff'] = df_total['Upper_Age'] - df_total['Lower_Age']
df_total['Average_age'] = (df_total['Upper_Age'] + df_total['Lower_Age'])/2

for col in ['Health Indicator','Holding_Policy_Duration','Holding_Policy_Type',
            'Reco_Insurance_Type','Reco_Policy_Cat','holds_insurance_policy',
           'Accomodation_Type','Is_Spouse'
           ]:
    df_total['City_'+str(col)] = df_total['City_Code'].astype(str)+'_'+df_total[col].astype(str) 
    df_total['Region_'+str(col)] = df_total['Region_Code'].astype(str)+'_'+df_total[col].astype(str)

df_total['Holding_Policy_Duration_Type'] = df_total['Holding_Policy_Duration'].astype(str)+'_'+df_total['Holding_Policy_Type'].astype(str)

df_total['Reco_Insurance_Type_Cat'] = df_total['Reco_Insurance_Type'].astype(str)+'_'+df_total['Reco_Policy_Cat'].astype(str)

In [248]:
df_total['City_Code'] = df_total['City_Code'].str.replace("[^0-9]","")
df_total['City_Code'] = df_total['City_Code'].astype(float).astype(int)

df_total = pd.get_dummies(df_total,columns=['Accomodation_Type','Reco_Insurance_Type','Is_Spouse'],drop_first=True)

df_total['Health Indicator'] = df_total['Health Indicator'].str.replace("[^0-9]","")
df_total['Health Indicator'] = df_total['Health Indicator'].astype(float).astype(int)

In [249]:
df_total.columns

Index(['ID', 'City_Code', 'Region_Code', 'Upper_Age', 'Lower_Age',
       'Health Indicator', 'Holding_Policy_Duration', 'Holding_Policy_Type',
       'Reco_Policy_Cat', 'Reco_Policy_Premium', 'Response',
       'holds_insurance_policy', 'Age_diff', 'Average_age',
       'City_Health Indicator', 'Region_Health Indicator',
       'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
       'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
       'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
       'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
       'City_holds_insurance_policy', 'Region_holds_insurance_policy',
       'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
       'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
       'Reco_Insurance_Type_Cat', 'Accomodation_Type_Rented',
       'Reco_Insurance_Type_Joint', 'Is_Spouse_Yes'],
      dtype='object')

In [250]:
le = LabelEncoder()
for col in df_total.columns:
    if (df_total[col].dtypes == "object"):
#         df_total[col].fillna("not_available", inplace = True)
        df_total[col] = df_total[col].astype(str)
        df_total[col] = le.fit_transform(df_total[col])
#         df_total[col] = df_total[col].astype('category')

In [252]:
premium_discretizer = KBinsDiscretizer(n_bins=8, encode='ordinal', strategy='quantile')
df_total['Reco_Policy_Premium_Bins'] =premium_discretizer.fit_transform(df_total['Reco_Policy_Premium'].values.reshape(-1,1)).astype(int)

age_discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile')
df_total['Upper_Age_Bins'] =age_discretizer.fit_transform(df_total['Upper_Age'].values.reshape(-1,1)).astype(int)
df_total['Lower_Age_Bins'] =age_discretizer.fit_transform(df_total['Lower_Age'].values.reshape(-1,1)).astype(int)
df_total['Age_diff_Bins'] =age_discretizer.fit_transform(df_total['Age_diff'].values.reshape(-1,1)).astype(int)
df_total['Average_age_Bins'] =age_discretizer.fit_transform(df_total['Average_age'].values.reshape(-1,1)).astype(int)

In [253]:
for col in ['City_Code','Region_Code','Upper_Age','Lower_Age','Health Indicator','Holding_Policy_Duration','Holding_Policy_Type',
           'Reco_Policy_Cat','holds_insurance_policy']:
    
    val_counts = df_total[col].value_counts().to_dict()
    df_total[str(col)+"_counts"] = df_total[col].map(val_counts)

In [254]:
for main_col in ['Reco_Policy_Premium','Upper_Age','Lower_Age','Holding_Policy_Duration',
                 
                'City_Code','Region_Code','Health Indicator','Holding_Policy_Type','Reco_Policy_Cat',
               'City_Health Indicator', 'Region_Health Indicator',
       'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
       'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
       'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
       'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
       'City_holds_insurance_policy', 'Region_holds_insurance_policy',
       'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
       'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
       'Reco_Insurance_Type_Cat']:

    for col in ['City_Code','Region_Code','Health Indicator','Holding_Policy_Type','Reco_Policy_Cat',
               'City_Health Indicator', 'Region_Health Indicator',
       'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
       'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
       'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
       'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
       'City_holds_insurance_policy', 'Region_holds_insurance_policy',
       'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
       'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
       'Reco_Insurance_Type_Cat']:
        
        if main_col!=col:
            df_total['unique_of_'+str(col)+'_per_'+str(main_col)] = df_total.groupby([main_col])[col].transform('nunique')


for main_col in ['City_Code','Region_Code','Health Indicator','Holding_Policy_Type','Reco_Policy_Cat',
               'City_Health Indicator', 'Region_Health Indicator',
       'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
       'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
       'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
       'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
       'City_holds_insurance_policy', 'Region_holds_insurance_policy',
       'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
       'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
       'Reco_Insurance_Type_Cat']:
    
    for col in ['Upper_Age','Lower_Age','Holding_Policy_Duration','holds_insurance_policy',
                'Age_diff','Average_age','Reco_Policy_Premium']:  
        
        df_total['mean_of_'+str(col)+'_per_'+str(main_col)] = df_total.groupby([main_col])[col].transform('mean')
        df_total['sum_of_'+str(col)+'_per_'+str(main_col)] = df_total.groupby([main_col])[col].transform('sum')
        df_total['std_of_'+str(col)+'_per_'+str(main_col)] = df_total.groupby([main_col])[col].transform('std').fillna(-1)

In [255]:
df_total.head()

Unnamed: 0,ID,City_Code,Region_Code,Upper_Age,Lower_Age,Health Indicator,Holding_Policy_Duration,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,Response,holds_insurance_policy,Age_diff,Average_age,City_Health Indicator,Region_Health Indicator,City_Holding_Policy_Duration,Region_Holding_Policy_Duration,City_Holding_Policy_Type,Region_Holding_Policy_Type,City_Reco_Insurance_Type,Region_Reco_Insurance_Type,City_Reco_Policy_Cat,Region_Reco_Policy_Cat,City_holds_insurance_policy,Region_holds_insurance_policy,City_Accomodation_Type,Region_Accomodation_Type,City_Is_Spouse,Region_Is_Spouse,Holding_Policy_Duration_Type,Reco_Insurance_Type_Cat,Accomodation_Type_Rented,Reco_Insurance_Type_Joint,Is_Spouse_Yes,Reco_Policy_Premium_Bins,Upper_Age_Bins,Lower_Age_Bins,Age_diff_Bins,Average_age_Bins,City_Code_counts,Region_Code_counts,Upper_Age_counts,Lower_Age_counts,Health Indicator_counts,Holding_Policy_Duration_counts,Holding_Policy_Type_counts,Reco_Policy_Cat_counts,holds_insurance_policy_counts,unique_of_City_Code_per_Reco_Policy_Premium,unique_of_Region_Code_per_Reco_Policy_Premium,unique_of_Health Indicator_per_Reco_Policy_Premium,unique_of_Holding_Policy_Type_per_Reco_Policy_Premium,unique_of_Reco_Policy_Cat_per_Reco_Policy_Premium,unique_of_City_Health Indicator_per_Reco_Policy_Premium,unique_of_Region_Health Indicator_per_Reco_Policy_Premium,unique_of_City_Holding_Policy_Duration_per_Reco_Policy_Premium,unique_of_Region_Holding_Policy_Duration_per_Reco_Policy_Premium,unique_of_City_Holding_Policy_Type_per_Reco_Policy_Premium,unique_of_Region_Holding_Policy_Type_per_Reco_Policy_Premium,unique_of_City_Reco_Insurance_Type_per_Reco_Policy_Premium,unique_of_Region_Reco_Insurance_Type_per_Reco_Policy_Premium,unique_of_City_Reco_Policy_Cat_per_Reco_Policy_Premium,unique_of_Region_Reco_Policy_Cat_per_Reco_Policy_Premium,unique_of_City_holds_insurance_policy_per_Reco_Policy_Premium,unique_of_Region_holds_insurance_policy_per_Reco_Policy_Premium,unique_of_City_Accomodation_Type_per_Reco_Policy_Premium,unique_of_Region_Accomodation_Type_per_Reco_Policy_Premium,unique_of_City_Is_Spouse_per_Reco_Policy_Premium,unique_of_Region_Is_Spouse_per_Reco_Policy_Premium,unique_of_Holding_Policy_Duration_Type_per_Reco_Policy_Premium,unique_of_Reco_Insurance_Type_Cat_per_Reco_Policy_Premium,unique_of_City_Code_per_Upper_Age,unique_of_Region_Code_per_Upper_Age,unique_of_Health Indicator_per_Upper_Age,unique_of_Holding_Policy_Type_per_Upper_Age,unique_of_Reco_Policy_Cat_per_Upper_Age,unique_of_City_Health Indicator_per_Upper_Age,unique_of_Region_Health Indicator_per_Upper_Age,unique_of_City_Holding_Policy_Duration_per_Upper_Age,unique_of_Region_Holding_Policy_Duration_per_Upper_Age,unique_of_City_Holding_Policy_Type_per_Upper_Age,unique_of_Region_Holding_Policy_Type_per_Upper_Age,unique_of_City_Reco_Insurance_Type_per_Upper_Age,unique_of_Region_Reco_Insurance_Type_per_Upper_Age,unique_of_City_Reco_Policy_Cat_per_Upper_Age,unique_of_Region_Reco_Policy_Cat_per_Upper_Age,unique_of_City_holds_insurance_policy_per_Upper_Age,unique_of_Region_holds_insurance_policy_per_Upper_Age,unique_of_City_Accomodation_Type_per_Upper_Age,unique_of_Region_Accomodation_Type_per_Upper_Age,unique_of_City_Is_Spouse_per_Upper_Age,unique_of_Region_Is_Spouse_per_Upper_Age,unique_of_Holding_Policy_Duration_Type_per_Upper_Age,unique_of_Reco_Insurance_Type_Cat_per_Upper_Age,unique_of_City_Code_per_Lower_Age,unique_of_Region_Code_per_Lower_Age,unique_of_Health Indicator_per_Lower_Age,unique_of_Holding_Policy_Type_per_Lower_Age,unique_of_Reco_Policy_Cat_per_Lower_Age,unique_of_City_Health Indicator_per_Lower_Age,unique_of_Region_Health Indicator_per_Lower_Age,unique_of_City_Holding_Policy_Duration_per_Lower_Age,unique_of_Region_Holding_Policy_Duration_per_Lower_Age,unique_of_City_Holding_Policy_Type_per_Lower_Age,unique_of_Region_Holding_Policy_Type_per_Lower_Age,unique_of_City_Reco_Insurance_Type_per_Lower_Age,unique_of_Region_Reco_Insurance_Type_per_Lower_Age,unique_of_City_Reco_Policy_Cat_per_Lower_Age,unique_of_Region_Reco_Policy_Cat_per_Lower_Age,unique_of_City_holds_insurance_policy_per_Lower_Age,unique_of_Region_holds_insurance_policy_per_Lower_Age,unique_of_City_Accomodation_Type_per_Lower_Age,unique_of_Region_Accomodation_Type_per_Lower_Age,unique_of_City_Is_Spouse_per_Lower_Age,unique_of_Region_Is_Spouse_per_Lower_Age,unique_of_Holding_Policy_Duration_Type_per_Lower_Age,unique_of_Reco_Insurance_Type_Cat_per_Lower_Age,unique_of_City_Code_per_Holding_Policy_Duration,unique_of_Region_Code_per_Holding_Policy_Duration,unique_of_Health Indicator_per_Holding_Policy_Duration,unique_of_Holding_Policy_Type_per_Holding_Policy_Duration,unique_of_Reco_Policy_Cat_per_Holding_Policy_Duration,unique_of_City_Health Indicator_per_Holding_Policy_Duration,unique_of_Region_Health Indicator_per_Holding_Policy_Duration,unique_of_City_Holding_Policy_Duration_per_Holding_Policy_Duration,unique_of_Region_Holding_Policy_Duration_per_Holding_Policy_Duration,unique_of_City_Holding_Policy_Type_per_Holding_Policy_Duration,unique_of_Region_Holding_Policy_Type_per_Holding_Policy_Duration,unique_of_City_Reco_Insurance_Type_per_Holding_Policy_Duration,unique_of_Region_Reco_Insurance_Type_per_Holding_Policy_Duration,unique_of_City_Reco_Policy_Cat_per_Holding_Policy_Duration,unique_of_Region_Reco_Policy_Cat_per_Holding_Policy_Duration,unique_of_City_holds_insurance_policy_per_Holding_Policy_Duration,unique_of_Region_holds_insurance_policy_per_Holding_Policy_Duration,unique_of_City_Accomodation_Type_per_Holding_Policy_Duration,unique_of_Region_Accomodation_Type_per_Holding_Policy_Duration,unique_of_City_Is_Spouse_per_Holding_Policy_Duration,unique_of_Region_Is_Spouse_per_Holding_Policy_Duration,unique_of_Holding_Policy_Duration_Type_per_Holding_Policy_Duration,unique_of_Reco_Insurance_Type_Cat_per_Holding_Policy_Duration,unique_of_Region_Code_per_City_Code,unique_of_Health Indicator_per_City_Code,unique_of_Holding_Policy_Type_per_City_Code,unique_of_Reco_Policy_Cat_per_City_Code,unique_of_City_Health Indicator_per_City_Code,unique_of_Region_Health Indicator_per_City_Code,unique_of_City_Holding_Policy_Duration_per_City_Code,unique_of_Region_Holding_Policy_Duration_per_City_Code,unique_of_City_Holding_Policy_Type_per_City_Code,unique_of_Region_Holding_Policy_Type_per_City_Code,unique_of_City_Reco_Insurance_Type_per_City_Code,unique_of_Region_Reco_Insurance_Type_per_City_Code,unique_of_City_Reco_Policy_Cat_per_City_Code,unique_of_Region_Reco_Policy_Cat_per_City_Code,unique_of_City_holds_insurance_policy_per_City_Code,unique_of_Region_holds_insurance_policy_per_City_Code,unique_of_City_Accomodation_Type_per_City_Code,unique_of_Region_Accomodation_Type_per_City_Code,unique_of_City_Is_Spouse_per_City_Code,unique_of_Region_Is_Spouse_per_City_Code,unique_of_Holding_Policy_Duration_Type_per_City_Code,unique_of_Reco_Insurance_Type_Cat_per_City_Code,unique_of_City_Code_per_Region_Code,unique_of_Health Indicator_per_Region_Code,unique_of_Holding_Policy_Type_per_Region_Code,unique_of_Reco_Policy_Cat_per_Region_Code,unique_of_City_Health Indicator_per_Region_Code,unique_of_Region_Health Indicator_per_Region_Code,unique_of_City_Holding_Policy_Duration_per_Region_Code,unique_of_Region_Holding_Policy_Duration_per_Region_Code,unique_of_City_Holding_Policy_Type_per_Region_Code,unique_of_Region_Holding_Policy_Type_per_Region_Code,unique_of_City_Reco_Insurance_Type_per_Region_Code,unique_of_Region_Reco_Insurance_Type_per_Region_Code,unique_of_City_Reco_Policy_Cat_per_Region_Code,unique_of_Region_Reco_Policy_Cat_per_Region_Code,unique_of_City_holds_insurance_policy_per_Region_Code,unique_of_Region_holds_insurance_policy_per_Region_Code,unique_of_City_Accomodation_Type_per_Region_Code,unique_of_Region_Accomodation_Type_per_Region_Code,unique_of_City_Is_Spouse_per_Region_Code,unique_of_Region_Is_Spouse_per_Region_Code,unique_of_Holding_Policy_Duration_Type_per_Region_Code,unique_of_Reco_Insurance_Type_Cat_per_Region_Code,unique_of_City_Code_per_Health Indicator,unique_of_Region_Code_per_Health Indicator,unique_of_Holding_Policy_Type_per_Health Indicator,unique_of_Reco_Policy_Cat_per_Health Indicator,unique_of_City_Health Indicator_per_Health Indicator,unique_of_Region_Health Indicator_per_Health Indicator,unique_of_City_Holding_Policy_Duration_per_Health Indicator,unique_of_Region_Holding_Policy_Duration_per_Health Indicator,unique_of_City_Holding_Policy_Type_per_Health Indicator,unique_of_Region_Holding_Policy_Type_per_Health Indicator,unique_of_City_Reco_Insurance_Type_per_Health Indicator,unique_of_Region_Reco_Insurance_Type_per_Health Indicator,unique_of_City_Reco_Policy_Cat_per_Health Indicator,unique_of_Region_Reco_Policy_Cat_per_Health Indicator,unique_of_City_holds_insurance_policy_per_Health Indicator,unique_of_Region_holds_insurance_policy_per_Health Indicator,unique_of_City_Accomodation_Type_per_Health Indicator,unique_of_Region_Accomodation_Type_per_Health Indicator,unique_of_City_Is_Spouse_per_Health Indicator,unique_of_Region_Is_Spouse_per_Health Indicator,unique_of_Holding_Policy_Duration_Type_per_Health Indicator,unique_of_Reco_Insurance_Type_Cat_per_Health Indicator,unique_of_City_Code_per_Holding_Policy_Type,unique_of_Region_Code_per_Holding_Policy_Type,unique_of_Health Indicator_per_Holding_Policy_Type,unique_of_Reco_Policy_Cat_per_Holding_Policy_Type,unique_of_City_Health Indicator_per_Holding_Policy_Type,unique_of_Region_Health Indicator_per_Holding_Policy_Type,unique_of_City_Holding_Policy_Duration_per_Holding_Policy_Type,unique_of_Region_Holding_Policy_Duration_per_Holding_Policy_Type,unique_of_City_Holding_Policy_Type_per_Holding_Policy_Type,unique_of_Region_Holding_Policy_Type_per_Holding_Policy_Type,unique_of_City_Reco_Insurance_Type_per_Holding_Policy_Type,unique_of_Region_Reco_Insurance_Type_per_Holding_Policy_Type,unique_of_City_Reco_Policy_Cat_per_Holding_Policy_Type,unique_of_Region_Reco_Policy_Cat_per_Holding_Policy_Type,unique_of_City_holds_insurance_policy_per_Holding_Policy_Type,unique_of_Region_holds_insurance_policy_per_Holding_Policy_Type,unique_of_City_Accomodation_Type_per_Holding_Policy_Type,unique_of_Region_Accomodation_Type_per_Holding_Policy_Type,unique_of_City_Is_Spouse_per_Holding_Policy_Type,unique_of_Region_Is_Spouse_per_Holding_Policy_Type,unique_of_Holding_Policy_Duration_Type_per_Holding_Policy_Type,unique_of_Reco_Insurance_Type_Cat_per_Holding_Policy_Type,unique_of_City_Code_per_Reco_Policy_Cat,unique_of_Region_Code_per_Reco_Policy_Cat,unique_of_Health Indicator_per_Reco_Policy_Cat,unique_of_Holding_Policy_Type_per_Reco_Policy_Cat,unique_of_City_Health Indicator_per_Reco_Policy_Cat,unique_of_Region_Health Indicator_per_Reco_Policy_Cat,unique_of_City_Holding_Policy_Duration_per_Reco_Policy_Cat,unique_of_Region_Holding_Policy_Duration_per_Reco_Policy_Cat,unique_of_City_Holding_Policy_Type_per_Reco_Policy_Cat,unique_of_Region_Holding_Policy_Type_per_Reco_Policy_Cat,unique_of_City_Reco_Insurance_Type_per_Reco_Policy_Cat,unique_of_Region_Reco_Insurance_Type_per_Reco_Policy_Cat,unique_of_City_Reco_Policy_Cat_per_Reco_Policy_Cat,unique_of_Region_Reco_Policy_Cat_per_Reco_Policy_Cat,unique_of_City_holds_insurance_policy_per_Reco_Policy_Cat,unique_of_Region_holds_insurance_policy_per_Reco_Policy_Cat,unique_of_City_Accomodation_Type_per_Reco_Policy_Cat,unique_of_Region_Accomodation_Type_per_Reco_Policy_Cat,unique_of_City_Is_Spouse_per_Reco_Policy_Cat,unique_of_Region_Is_Spouse_per_Reco_Policy_Cat,unique_of_Holding_Policy_Duration_Type_per_Reco_Policy_Cat,...,std_of_Upper_Age_per_City_Reco_Insurance_Type,mean_of_Lower_Age_per_City_Reco_Insurance_Type,sum_of_Lower_Age_per_City_Reco_Insurance_Type,std_of_Lower_Age_per_City_Reco_Insurance_Type,mean_of_Holding_Policy_Duration_per_City_Reco_Insurance_Type,sum_of_Holding_Policy_Duration_per_City_Reco_Insurance_Type,std_of_Holding_Policy_Duration_per_City_Reco_Insurance_Type,mean_of_holds_insurance_policy_per_City_Reco_Insurance_Type,sum_of_holds_insurance_policy_per_City_Reco_Insurance_Type,std_of_holds_insurance_policy_per_City_Reco_Insurance_Type,mean_of_Age_diff_per_City_Reco_Insurance_Type,sum_of_Age_diff_per_City_Reco_Insurance_Type,std_of_Age_diff_per_City_Reco_Insurance_Type,mean_of_Average_age_per_City_Reco_Insurance_Type,sum_of_Average_age_per_City_Reco_Insurance_Type,std_of_Average_age_per_City_Reco_Insurance_Type,mean_of_Reco_Policy_Premium_per_City_Reco_Insurance_Type,sum_of_Reco_Policy_Premium_per_City_Reco_Insurance_Type,std_of_Reco_Policy_Premium_per_City_Reco_Insurance_Type,mean_of_Upper_Age_per_Region_Reco_Insurance_Type,sum_of_Upper_Age_per_Region_Reco_Insurance_Type,std_of_Upper_Age_per_Region_Reco_Insurance_Type,mean_of_Lower_Age_per_Region_Reco_Insurance_Type,sum_of_Lower_Age_per_Region_Reco_Insurance_Type,std_of_Lower_Age_per_Region_Reco_Insurance_Type,mean_of_Holding_Policy_Duration_per_Region_Reco_Insurance_Type,sum_of_Holding_Policy_Duration_per_Region_Reco_Insurance_Type,std_of_Holding_Policy_Duration_per_Region_Reco_Insurance_Type,mean_of_holds_insurance_policy_per_Region_Reco_Insurance_Type,sum_of_holds_insurance_policy_per_Region_Reco_Insurance_Type,std_of_holds_insurance_policy_per_Region_Reco_Insurance_Type,mean_of_Age_diff_per_Region_Reco_Insurance_Type,sum_of_Age_diff_per_Region_Reco_Insurance_Type,std_of_Age_diff_per_Region_Reco_Insurance_Type,mean_of_Average_age_per_Region_Reco_Insurance_Type,sum_of_Average_age_per_Region_Reco_Insurance_Type,std_of_Average_age_per_Region_Reco_Insurance_Type,mean_of_Reco_Policy_Premium_per_Region_Reco_Insurance_Type,sum_of_Reco_Policy_Premium_per_Region_Reco_Insurance_Type,std_of_Reco_Policy_Premium_per_Region_Reco_Insurance_Type,mean_of_Upper_Age_per_City_Reco_Policy_Cat,sum_of_Upper_Age_per_City_Reco_Policy_Cat,std_of_Upper_Age_per_City_Reco_Policy_Cat,mean_of_Lower_Age_per_City_Reco_Policy_Cat,sum_of_Lower_Age_per_City_Reco_Policy_Cat,std_of_Lower_Age_per_City_Reco_Policy_Cat,mean_of_Holding_Policy_Duration_per_City_Reco_Policy_Cat,sum_of_Holding_Policy_Duration_per_City_Reco_Policy_Cat,std_of_Holding_Policy_Duration_per_City_Reco_Policy_Cat,mean_of_holds_insurance_policy_per_City_Reco_Policy_Cat,sum_of_holds_insurance_policy_per_City_Reco_Policy_Cat,std_of_holds_insurance_policy_per_City_Reco_Policy_Cat,mean_of_Age_diff_per_City_Reco_Policy_Cat,sum_of_Age_diff_per_City_Reco_Policy_Cat,std_of_Age_diff_per_City_Reco_Policy_Cat,mean_of_Average_age_per_City_Reco_Policy_Cat,sum_of_Average_age_per_City_Reco_Policy_Cat,std_of_Average_age_per_City_Reco_Policy_Cat,mean_of_Reco_Policy_Premium_per_City_Reco_Policy_Cat,sum_of_Reco_Policy_Premium_per_City_Reco_Policy_Cat,std_of_Reco_Policy_Premium_per_City_Reco_Policy_Cat,mean_of_Upper_Age_per_Region_Reco_Policy_Cat,sum_of_Upper_Age_per_Region_Reco_Policy_Cat,std_of_Upper_Age_per_Region_Reco_Policy_Cat,mean_of_Lower_Age_per_Region_Reco_Policy_Cat,sum_of_Lower_Age_per_Region_Reco_Policy_Cat,std_of_Lower_Age_per_Region_Reco_Policy_Cat,mean_of_Holding_Policy_Duration_per_Region_Reco_Policy_Cat,sum_of_Holding_Policy_Duration_per_Region_Reco_Policy_Cat,std_of_Holding_Policy_Duration_per_Region_Reco_Policy_Cat,mean_of_holds_insurance_policy_per_Region_Reco_Policy_Cat,sum_of_holds_insurance_policy_per_Region_Reco_Policy_Cat,std_of_holds_insurance_policy_per_Region_Reco_Policy_Cat,mean_of_Age_diff_per_Region_Reco_Policy_Cat,sum_of_Age_diff_per_Region_Reco_Policy_Cat,std_of_Age_diff_per_Region_Reco_Policy_Cat,mean_of_Average_age_per_Region_Reco_Policy_Cat,sum_of_Average_age_per_Region_Reco_Policy_Cat,std_of_Average_age_per_Region_Reco_Policy_Cat,mean_of_Reco_Policy_Premium_per_Region_Reco_Policy_Cat,sum_of_Reco_Policy_Premium_per_Region_Reco_Policy_Cat,std_of_Reco_Policy_Premium_per_Region_Reco_Policy_Cat,mean_of_Upper_Age_per_City_holds_insurance_policy,sum_of_Upper_Age_per_City_holds_insurance_policy,std_of_Upper_Age_per_City_holds_insurance_policy,mean_of_Lower_Age_per_City_holds_insurance_policy,sum_of_Lower_Age_per_City_holds_insurance_policy,std_of_Lower_Age_per_City_holds_insurance_policy,mean_of_Holding_Policy_Duration_per_City_holds_insurance_policy,sum_of_Holding_Policy_Duration_per_City_holds_insurance_policy,std_of_Holding_Policy_Duration_per_City_holds_insurance_policy,mean_of_holds_insurance_policy_per_City_holds_insurance_policy,sum_of_holds_insurance_policy_per_City_holds_insurance_policy,std_of_holds_insurance_policy_per_City_holds_insurance_policy,mean_of_Age_diff_per_City_holds_insurance_policy,sum_of_Age_diff_per_City_holds_insurance_policy,std_of_Age_diff_per_City_holds_insurance_policy,mean_of_Average_age_per_City_holds_insurance_policy,sum_of_Average_age_per_City_holds_insurance_policy,std_of_Average_age_per_City_holds_insurance_policy,mean_of_Reco_Policy_Premium_per_City_holds_insurance_policy,sum_of_Reco_Policy_Premium_per_City_holds_insurance_policy,std_of_Reco_Policy_Premium_per_City_holds_insurance_policy,mean_of_Upper_Age_per_Region_holds_insurance_policy,sum_of_Upper_Age_per_Region_holds_insurance_policy,std_of_Upper_Age_per_Region_holds_insurance_policy,mean_of_Lower_Age_per_Region_holds_insurance_policy,sum_of_Lower_Age_per_Region_holds_insurance_policy,std_of_Lower_Age_per_Region_holds_insurance_policy,mean_of_Holding_Policy_Duration_per_Region_holds_insurance_policy,sum_of_Holding_Policy_Duration_per_Region_holds_insurance_policy,std_of_Holding_Policy_Duration_per_Region_holds_insurance_policy,mean_of_holds_insurance_policy_per_Region_holds_insurance_policy,sum_of_holds_insurance_policy_per_Region_holds_insurance_policy,std_of_holds_insurance_policy_per_Region_holds_insurance_policy,mean_of_Age_diff_per_Region_holds_insurance_policy,sum_of_Age_diff_per_Region_holds_insurance_policy,std_of_Age_diff_per_Region_holds_insurance_policy,mean_of_Average_age_per_Region_holds_insurance_policy,sum_of_Average_age_per_Region_holds_insurance_policy,std_of_Average_age_per_Region_holds_insurance_policy,mean_of_Reco_Policy_Premium_per_Region_holds_insurance_policy,sum_of_Reco_Policy_Premium_per_Region_holds_insurance_policy,std_of_Reco_Policy_Premium_per_Region_holds_insurance_policy,mean_of_Upper_Age_per_City_Accomodation_Type,sum_of_Upper_Age_per_City_Accomodation_Type,std_of_Upper_Age_per_City_Accomodation_Type,mean_of_Lower_Age_per_City_Accomodation_Type,sum_of_Lower_Age_per_City_Accomodation_Type,std_of_Lower_Age_per_City_Accomodation_Type,mean_of_Holding_Policy_Duration_per_City_Accomodation_Type,sum_of_Holding_Policy_Duration_per_City_Accomodation_Type,std_of_Holding_Policy_Duration_per_City_Accomodation_Type,mean_of_holds_insurance_policy_per_City_Accomodation_Type,sum_of_holds_insurance_policy_per_City_Accomodation_Type,std_of_holds_insurance_policy_per_City_Accomodation_Type,mean_of_Age_diff_per_City_Accomodation_Type,sum_of_Age_diff_per_City_Accomodation_Type,std_of_Age_diff_per_City_Accomodation_Type,mean_of_Average_age_per_City_Accomodation_Type,sum_of_Average_age_per_City_Accomodation_Type,std_of_Average_age_per_City_Accomodation_Type,mean_of_Reco_Policy_Premium_per_City_Accomodation_Type,sum_of_Reco_Policy_Premium_per_City_Accomodation_Type,std_of_Reco_Policy_Premium_per_City_Accomodation_Type,mean_of_Upper_Age_per_Region_Accomodation_Type,sum_of_Upper_Age_per_Region_Accomodation_Type,std_of_Upper_Age_per_Region_Accomodation_Type,mean_of_Lower_Age_per_Region_Accomodation_Type,sum_of_Lower_Age_per_Region_Accomodation_Type,std_of_Lower_Age_per_Region_Accomodation_Type,mean_of_Holding_Policy_Duration_per_Region_Accomodation_Type,sum_of_Holding_Policy_Duration_per_Region_Accomodation_Type,std_of_Holding_Policy_Duration_per_Region_Accomodation_Type,mean_of_holds_insurance_policy_per_Region_Accomodation_Type,sum_of_holds_insurance_policy_per_Region_Accomodation_Type,std_of_holds_insurance_policy_per_Region_Accomodation_Type,mean_of_Age_diff_per_Region_Accomodation_Type,sum_of_Age_diff_per_Region_Accomodation_Type,std_of_Age_diff_per_Region_Accomodation_Type,mean_of_Average_age_per_Region_Accomodation_Type,sum_of_Average_age_per_Region_Accomodation_Type,std_of_Average_age_per_Region_Accomodation_Type,mean_of_Reco_Policy_Premium_per_Region_Accomodation_Type,sum_of_Reco_Policy_Premium_per_Region_Accomodation_Type,std_of_Reco_Policy_Premium_per_Region_Accomodation_Type,mean_of_Upper_Age_per_City_Is_Spouse,sum_of_Upper_Age_per_City_Is_Spouse,std_of_Upper_Age_per_City_Is_Spouse,mean_of_Lower_Age_per_City_Is_Spouse,sum_of_Lower_Age_per_City_Is_Spouse,std_of_Lower_Age_per_City_Is_Spouse,mean_of_Holding_Policy_Duration_per_City_Is_Spouse,sum_of_Holding_Policy_Duration_per_City_Is_Spouse,std_of_Holding_Policy_Duration_per_City_Is_Spouse,mean_of_holds_insurance_policy_per_City_Is_Spouse,sum_of_holds_insurance_policy_per_City_Is_Spouse,std_of_holds_insurance_policy_per_City_Is_Spouse,mean_of_Age_diff_per_City_Is_Spouse,sum_of_Age_diff_per_City_Is_Spouse,std_of_Age_diff_per_City_Is_Spouse,mean_of_Average_age_per_City_Is_Spouse,sum_of_Average_age_per_City_Is_Spouse,std_of_Average_age_per_City_Is_Spouse,mean_of_Reco_Policy_Premium_per_City_Is_Spouse,sum_of_Reco_Policy_Premium_per_City_Is_Spouse,std_of_Reco_Policy_Premium_per_City_Is_Spouse,mean_of_Upper_Age_per_Region_Is_Spouse,sum_of_Upper_Age_per_Region_Is_Spouse,std_of_Upper_Age_per_Region_Is_Spouse,mean_of_Lower_Age_per_Region_Is_Spouse,sum_of_Lower_Age_per_Region_Is_Spouse,std_of_Lower_Age_per_Region_Is_Spouse,mean_of_Holding_Policy_Duration_per_Region_Is_Spouse,sum_of_Holding_Policy_Duration_per_Region_Is_Spouse,std_of_Holding_Policy_Duration_per_Region_Is_Spouse,mean_of_holds_insurance_policy_per_Region_Is_Spouse,sum_of_holds_insurance_policy_per_Region_Is_Spouse,std_of_holds_insurance_policy_per_Region_Is_Spouse,mean_of_Age_diff_per_Region_Is_Spouse,sum_of_Age_diff_per_Region_Is_Spouse,std_of_Age_diff_per_Region_Is_Spouse,mean_of_Average_age_per_Region_Is_Spouse,sum_of_Average_age_per_Region_Is_Spouse,std_of_Average_age_per_Region_Is_Spouse,mean_of_Reco_Policy_Premium_per_Region_Is_Spouse,sum_of_Reco_Policy_Premium_per_Region_Is_Spouse,std_of_Reco_Policy_Premium_per_Region_Is_Spouse,mean_of_Upper_Age_per_Holding_Policy_Duration_Type,sum_of_Upper_Age_per_Holding_Policy_Duration_Type,std_of_Upper_Age_per_Holding_Policy_Duration_Type,mean_of_Lower_Age_per_Holding_Policy_Duration_Type,sum_of_Lower_Age_per_Holding_Policy_Duration_Type,std_of_Lower_Age_per_Holding_Policy_Duration_Type,mean_of_Holding_Policy_Duration_per_Holding_Policy_Duration_Type,sum_of_Holding_Policy_Duration_per_Holding_Policy_Duration_Type,std_of_Holding_Policy_Duration_per_Holding_Policy_Duration_Type,mean_of_holds_insurance_policy_per_Holding_Policy_Duration_Type,sum_of_holds_insurance_policy_per_Holding_Policy_Duration_Type,std_of_holds_insurance_policy_per_Holding_Policy_Duration_Type,mean_of_Age_diff_per_Holding_Policy_Duration_Type,sum_of_Age_diff_per_Holding_Policy_Duration_Type,std_of_Age_diff_per_Holding_Policy_Duration_Type,mean_of_Average_age_per_Holding_Policy_Duration_Type,sum_of_Average_age_per_Holding_Policy_Duration_Type,std_of_Average_age_per_Holding_Policy_Duration_Type,mean_of_Reco_Policy_Premium_per_Holding_Policy_Duration_Type,sum_of_Reco_Policy_Premium_per_Holding_Policy_Duration_Type,std_of_Reco_Policy_Premium_per_Holding_Policy_Duration_Type,mean_of_Upper_Age_per_Reco_Insurance_Type_Cat,sum_of_Upper_Age_per_Reco_Insurance_Type_Cat,std_of_Upper_Age_per_Reco_Insurance_Type_Cat,mean_of_Lower_Age_per_Reco_Insurance_Type_Cat,sum_of_Lower_Age_per_Reco_Insurance_Type_Cat,std_of_Lower_Age_per_Reco_Insurance_Type_Cat,mean_of_Holding_Policy_Duration_per_Reco_Insurance_Type_Cat,sum_of_Holding_Policy_Duration_per_Reco_Insurance_Type_Cat,std_of_Holding_Policy_Duration_per_Reco_Insurance_Type_Cat,mean_of_holds_insurance_policy_per_Reco_Insurance_Type_Cat,sum_of_holds_insurance_policy_per_Reco_Insurance_Type_Cat,std_of_holds_insurance_policy_per_Reco_Insurance_Type_Cat,mean_of_Age_diff_per_Reco_Insurance_Type_Cat,sum_of_Age_diff_per_Reco_Insurance_Type_Cat,std_of_Age_diff_per_Reco_Insurance_Type_Cat,mean_of_Average_age_per_Reco_Insurance_Type_Cat,sum_of_Average_age_per_Reco_Insurance_Type_Cat,std_of_Average_age_per_Reco_Insurance_Type_Cat,mean_of_Reco_Policy_Premium_per_Reco_Insurance_Type_Cat,sum_of_Reco_Policy_Premium_per_Reco_Insurance_Type_Cat,std_of_Reco_Policy_Premium_per_Reco_Insurance_Type_Cat
0,1,3,3213,36,36,1,15,3.0,22,11628.0,0.0,1,0,36.0,263,11758,446,16074,145,9103,58,4118,534,13078,59,4232,59,4221,58,4028,23,14,1,0,0,3,3,4,0,4,6837,10,1027,1032,18151,6020,18301,9209,42524,17,35,6,5.0,13,29,36,33,36,28,36,20,36,30,36,22,36,21,35,20,36,18,15,34,853,9,5.0,22,173,987,285,997,138,973,62,903,296,964,65,922,64,915,62,899,60,43,35,870,9,5.0,22,182,991,292,1006,139,979,64,913,306,974,69,935,67,932,64,909,58,44,35,2819,10,4.0,22,242,4955,35,2819,135,4269,69,3657,476,4692,35,2819,69,3528,68,3550,4,44,407,10,5.0,22,10,2028,16,2692,5,1581,2,746,22,2323,2,792,2,792,2,726,61,44,1,4,4.0,5,4,4,3,3,4,4,2,2,5,5,2,2,2,2,2,2,4,5,35,4097,5.0,22,35,4097,496,12045,171,9740,69,5939,582,10395,70,6743,70,6680,69,5662,61,44,36,4088,10,22,281,11310,497,13895,36,4088,71,6229,593,10020,36,4088,71,6548,71,6040,15,44,35,2687,10,5.0,246,5728,428,6177,168,5007,69,3534,35,2687,70,3796,70,3742,69,3453,61,...,16.594758,41.540789,230676,16.594758,3.372591,18728,4.571796,0.580587,3224,0.493507,0.0,0,0.0,41.540789,230676.0,16.594758,12069.198271,67020258.0,4755.842028,38.333333,345,15.532225,38.333333,345,15.532225,2.222222,20,5.068969,0.222222,2,0.440959,0.0,0,0.0,38.333333,345.0,15.532225,10411.555556,93704.0,4009.210618,42.726744,22047,17.249425,40.591085,20945,17.26879,3.463178,1787,4.510247,0.593023,306,0.491747,2.135659,1102,7.169041,41.658915,21496.0,16.882775,14087.355814,7269075.6,6565.491887,36.0,36,-1.0,36.0,36,-1.0,15.0,15,-1.0,1.0,1,-1.0,0.0,0,-1.0,36.0,36.0,-1.0,11628.0,11628.0,-1.0,47.041931,192966,15.504558,44.676012,183261,15.983184,6.0,24612,4.721763,1,4102,0.0,2.365919,9705,7.30757,45.858971,188113.5,15.315894,14937.844222,61275037.0,6109.160914,48.333333,145,13.051181,47.666667,143,12.013881,11.666667,35,5.773503,1,3,0.0,0.666667,2,1.154701,48.0,144.0,12.529964,17321.6,51964.8,8915.937263,36.179564,107996,15.250519,35.099162,104771,14.8929,2.610385,7792,4.036275,0.507873,1516,0.500022,1.080402,3225,4.915189,35.639363,106383.5,14.871067,11488.384054,34292826.4,5410.026197,37.25,298,16.237083,37.25,298,16.237083,1.875,15,5.303301,0.125,1,0.353553,0.0,0,0.0,37.25,298.0,16.237083,10120.5,80964.0,4183.143008,42.033718,244342,16.617563,40.995011,238304,16.641285,3.423017,19898,4.603793,0.584208,3396,0.4929,1.038706,6038,5.485548,41.514364,241323.0,16.401679,12555.252675,72983683.8,5342.775019,38.333333,345,15.532225,38.333333,345,15.532225,2.222222,20,5.068969,0.222222,2,0.440959,0.0,0,0.0,38.333333,345.0,15.532225,10411.555556,93704.0,4009.210618,59.285443,187342,11.64882,55.789873,176296,14.425261,15,47400,0,1,3160,0,3.49557,11046,9.21001,57.537658,181819.0,12.275403,18349.221582,57983540.2,6053.779195,42.321739,301754,17.442111,42.321739,301754,17.442111,3.321318,23681,4.432041,0.579523,4132,0.49367,0.0,0,0.0,42.321739,301754.0,17.442111,12317.786536,87825818.0,4964.349953
1,2,5,1117,75,22,2,0,0.0,22,30510.0,0.0,0,53,48.5,284,696,471,1027,152,526,63,228,577,828,62,230,62,230,62,226,0,36,0,1,0,7,9,1,2,6,1920,21,4072,2370,14402,28033,28033,9209,28033,2,2,2,2.0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,35,2227,10,5.0,22,224,3470,399,3641,164,3274,69,2695,426,3205,70,2736,68,2589,69,2630,61,44,35,1604,10,5.0,22,204,2163,182,1881,147,1882,65,1699,405,2022,70,1833,67,1782,62,1661,48,44,36,4385,10,1.0,22,305,14676,36,4385,36,4385,72,6673,617,13896,36,4385,72,7637,72,6411,1,44,151,10,5.0,21,10,639,16,778,5,498,2,254,21,679,2,274,2,276,2,245,60,41,1,7,5.0,4,7,7,8,8,5,5,2,2,4,4,2,2,2,2,2,2,11,6,36,3935,5.0,22,36,3935,502,10407,174,8706,71,5670,554,8929,71,6266,71,6185,71,5496,61,44,36,4385,10,22,305,14676,36,4385,36,4385,72,6673,617,13896,36,4385,72,7637,72,6411,1,44,35,2687,10,5.0,246,5728,428,6177,168,5007,69,3534,35,2687,70,3796,70,3742,69,3453,61,...,14.043049,35.514658,10903,14.469563,4.667752,1433,5.253107,0.677524,208,0.468187,13.117264,4027,13.184683,42.07329,12916.5,12.64234,21382.975244,6564573.4,6662.336272,47.9,479,19.846354,33.9,339,15.343837,3.5,35,5.400617,0.6,6,0.516398,14.0,140,17.713774,40.9,409.0,15.369161,21416.32,214163.2,9022.815623,40.490385,4211,16.913254,37.221154,3871,16.002401,2.769231,288,3.912218,0.625,65,0.486467,3.269231,340,9.13675,38.855769,4041.0,15.817631,13955.673077,1451390.0,6979.693104,45.0,720,18.496847,36.5625,585,15.340442,2.75,44,4.404543,0.5625,9,0.512348,8.4375,135,15.534773,40.78125,652.5,15.112874,17596.55,281544.8,9248.842701,36.118774,28281,14.886268,34.323116,26875,14.215426,0.0,0,0.0,0,0,0.0,1.795658,1406,6.844015,35.220945,27578.0,14.146714,11783.737676,9226666.6,5915.114029,42.7,427,18.785633,33.0,330,14.3527,0.0,0,0.0,0,0,0.0,9.7,97,18.868257,37.85,378.5,13.800262,15256.12,152561.2,8065.619148,47.993742,38347,15.094148,44.434293,35503,15.975635,4.844806,3871,5.513796,0.652065,521,0.476614,3.559449,2844,9.15906,46.214018,36925.0,14.851095,15728.027284,12566693.8,6538.478755,44.411765,755,18.340729,36.470588,620,15.182711,2.235294,38,4.308849,0.470588,8,0.514496,7.941176,135,15.180047,40.441176,687.5,15.027976,17427.458824,296266.8,8888.385041,40.195665,68614,15.399423,38.865261,66343,15.320358,3.168131,5408,4.494542,0.578793,988,0.493897,1.330404,2271,6.290247,39.530463,67478.5,15.034494,12469.948096,21286201.4,5360.358107,40.307692,524,17.627557,34.538462,449,13.05511,2.461538,32,4.215357,0.461538,6,0.518875,5.769231,75,15.438879,37.423077,486.5,13.453386,13720.953846,178372.4,7362.60537,38.717797,1085376,17.440596,37.186887,1042460,16.960034,0,0,0,0,0,0,1.53091,42916,5.803397,37.952342,1063918.0,16.955492,12359.797667,346482208.0,6415.197973,52.274651,108679,14.408955,41.883598,87076,16.860434,4.531987,9422,4.905832,0.706109,1468,0.455652,10.391053,21603,12.303451,47.079125,97877.5,14.425754,22374.953824,46517529.0,6822.037217
2,3,5,3732,32,32,0,1,1.0,19,7450.0,1.0,1,0,32.0,282,14099,472,18991,153,11049,62,5044,573,15506,63,5242,62,5223,62,4921,25,10,0,0,0,1,3,3,0,3,1920,8,1235,1313,16226,6208,11396,5207,42524,5,7,4,3.0,4,6,7,5,7,5,7,5,7,7,7,5,7,5,7,5,7,3,4,34,997,10,5.0,22,183,1177,261,1174,144,1153,61,1053,339,1145,67,1086,66,1094,61,1049,48,44,34,1055,10,5.0,22,190,1251,273,1252,147,1226,64,1129,332,1218,67,1148,67,1159,64,1119,55,44,35,2833,10,4.0,22,264,5005,35,2833,136,4399,70,3489,506,4509,35,2833,70,3788,70,3394,4,44,151,10,5.0,21,10,639,16,778,5,498,2,254,21,679,2,274,2,276,2,245,60,41,1,4,3.0,2,4,4,5,5,3,3,2,2,2,2,2,2,2,2,2,2,5,3,36,4016,5.0,22,36,4016,500,11373,176,9426,71,5809,582,9772,72,6568,71,6505,71,5591,61,44,35,3410,10,22,275,7782,440,8664,35,3410,70,4578,473,6597,35,3410,70,5058,70,4408,15,44,33,1520,10,5.0,201,3066,335,3362,137,2616,62,1945,33,1520,64,2067,65,2047,62,1883,61,...,15.300766,39.562926,63815,15.300766,3.117173,5028,4.466088,0.575945,929,0.494352,0.0,0,0.0,39.562926,63815.0,15.300766,11827.429634,19077644.0,4563.434339,35.857143,251,17.789778,35.857143,251,17.789778,1.571429,11,1.511858,0.714286,5,0.48795,0.0,0,0.0,35.857143,251.0,17.789778,10355.428571,72488.0,4717.605991,42.086207,4882,13.547706,40.206897,4664,14.114758,3.181034,369,4.702634,0.603448,70,0.491304,1.87931,218,6.115064,41.146552,4773.0,13.492029,14485.746552,1680346.6,5991.594892,32.5,65,0.707107,32.5,65,0.707107,1.5,3,0.707107,1.0,2,0.0,0.0,0,0.0,32.5,65.0,0.707107,8268.0,16536.0,1156.826694,44.383465,50464,14.952531,42.078276,47843,15.121923,5.682498,6461,4.806501,1,1137,0.0,2.305189,2621,7.317083,43.230871,49153.5,14.585625,14437.599648,16415550.8,5939.461168,39.6,198,20.181675,39.6,198,20.181675,2.2,11,1.30384,1,5,0.0,0.0,0,0.0,39.6,198.0,20.181675,10844.0,54220.0,5617.663037,47.993742,38347,15.094148,44.434293,35503,15.975635,4.844806,3871,5.513796,0.652065,521,0.476614,3.559449,2844,9.15906,46.214018,36925.0,14.851095,15728.027284,12566693.8,6538.478755,47.0,141,24.269322,47.0,141,24.269322,2.666667,8,1.527525,1.0,3,0.0,0.0,0,0.0,47.0,141.0,24.269322,13263.333333,39790.0,6137.282243,40.195665,68614,15.399423,38.865261,66343,15.320358,3.168131,5408,4.494542,0.578793,988,0.493897,1.330404,2271,6.290247,39.530463,67478.5,15.034494,12469.948096,21286201.4,5360.358107,35.857143,251,17.789778,35.857143,251,17.789778,1.571429,11,1.511858,0.714286,5,0.48795,0.0,0,0.0,35.857143,251.0,17.789778,10355.428571,72488.0,4717.605991,40.789987,96142,14.67681,39.271532,92563,14.542401,1,2357,0,1,2357,0,1.518456,3579,5.388649,40.030759,94352.5,14.359168,13061.202121,30785253.4,5846.345556,43.437363,178571,16.971936,43.437363,178571,16.971936,3.530041,14512,4.578327,0.602043,2475,0.489536,0.0,0,0.0,43.437363,178571.0,16.971936,12655.625395,52027276.0,4779.044293
3,5,8,2190,44,44,2,3,1.0,16,10404.0,0.0,1,0,44.0,314,6652,528,9472,168,5094,68,2264,632,7640,69,2296,69,2291,68,2226,33,7,1,0,0,2,5,5,0,5,2488,14,1106,1017,14402,5026,11396,5667,42524,12,20,6,4.0,12,18,20,18,20,17,20,13,20,19,20,15,20,17,20,13,20,15,13,34,942,10,5.0,22,181,1058,290,1081,138,1062,66,996,302,1047,64,1002,64,1004,63,987,60,44,34,867,10,5.0,22,174,984,273,990,135,963,62,913,282,958,64,915,65,923,61,907,59,44,35,2623,10,4.0,22,243,4296,35,2623,134,3886,70,3183,474,3911,35,2623,70,3368,69,3109,4,44,215,10,5.0,21,10,988,16,1226,5,802,2,384,21,1059,2,412,2,422,2,376,61,41,1,4,4.0,3,4,4,9,9,4,4,2,2,3,3,2,2,2,2,2,2,10,4,36,3935,5.0,22,36,3935,502,10407,174,8706,71,5670,554,8929,71,6266,71,6185,71,5496,61,44,35,3410,10,22,275,7782,440,8664,35,3410,70,4578,473,6597,35,3410,70,5058,70,4408,15,44,34,1777,10,5.0,209,3420,362,3716,148,2998,64,2214,34,1777,66,2395,65,2351,64,2159,61,...,16.401647,40.708458,81824,16.401647,3.613433,7263,4.699991,0.591542,1189,0.491671,0.0,0,0.0,40.708458,81824.0,16.401647,11638.181095,23392744.0,4592.593152,53.75,430,18.843339,53.75,430,18.843339,7.75,62,6.250714,0.875,7,0.353553,0.0,0,0.0,53.75,430.0,18.843339,14592.0,116736.0,5478.589182,41.263158,9408,15.935748,39.350877,8972,16.18096,3.508772,800,4.679594,0.627193,143,0.484615,1.912281,436,6.402865,40.307018,9190.0,15.736474,13319.682456,3036887.6,5578.121329,47.454545,522,15.635624,41.090909,452,19.649196,3.636364,40,4.249064,0.818182,9,0.40452,6.363636,70,11.800616,44.272727,487.0,16.747184,16309.563636,179405.2,4823.103865,45.773325,69667,15.508577,43.634034,66411,15.715902,6.175427,9399,4.726731,1,1522,0.0,2.13929,3256,6.928131,44.703679,68039.0,15.223437,14273.879238,21724844.2,6014.835763,49.083333,589,13.918322,43.083333,517,18.082932,6.5,78,5.502066,1,12,0.0,6.0,72,11.401754,46.083333,553.0,15.094902,16251.016667,195012.2,5363.055639,36.134137,47950,15.014777,34.955539,46386,14.438886,2.715901,3604,3.964767,0.535041,710,0.498959,1.178598,1564,5.158767,35.544838,47168.0,14.502043,11597.960663,15390493.8,5659.830716,49.8,249,16.991174,49.8,249,16.991174,5.2,26,5.761944,0.8,4,0.447214,0.0,0,0.0,49.8,249.0,16.991174,12992.0,64960.0,5463.314013,40.990879,85384,16.40981,40.243879,83828,16.399317,3.644743,7592,4.726888,0.594815,1239,0.491046,0.747,1556,4.711243,40.617379,84606.0,16.234555,11985.952568,24966739.2,5012.531,52.8,528,16.738512,46.8,468,22.155009,6.4,64,6.203941,0.9,9,0.316228,6.0,60,12.649111,49.8,498.0,18.587929,16024.4,160244.0,5724.709933,42.752535,63231,14.714041,40.948614,60563,14.619627,3,4437,0,1,1479,0,1.803922,2668,6.110348,41.850575,61897.0,14.345179,13622.900203,20148269.4,5832.489615,42.833993,194809,17.148581,42.833993,194809,17.148581,3.480211,15828,4.643035,0.591249,2689,0.491657,0.0,0,0.0,42.833993,194809.0,17.148581,12420.470097,56488298.0,4820.483314
4,6,9,1785,52,52,2,5,1.0,22,15264.0,1.0,1,0,52.0,324,4447,546,6439,173,3388,70,1492,660,5159,71,1511,71,1507,70,1477,41,14,1,0,0,4,6,6,0,6,3008,19,1196,1115,14402,3268,11396,9209,42524,23,45,8,5.0,18,38,45,40,45,38,45,25,45,41,45,29,45,31,45,25,45,25,21,32,973,8,5.0,22,181,1133,290,1154,142,1116,60,1041,313,1111,62,1047,62,1054,59,1032,61,44,32,918,8,5.0,22,179,1062,278,1079,139,1043,59,965,306,1039,63,982,63,992,59,962,60,43,35,2083,10,4.0,22,214,2926,35,2083,128,2753,67,2410,414,2725,35,2083,68,2491,66,2361,4,44,183,10,5.0,22,10,906,16,1159,5,731,2,341,22,1043,2,357,2,352,2,336,59,42,1,6,5.0,5,6,6,8,8,5,5,2,2,5,5,2,2,2,2,2,2,14,6,36,3935,5.0,22,36,3935,502,10407,174,8706,71,5670,554,8929,71,6266,71,6185,71,5496,61,44,35,3410,10,22,275,7782,440,8664,35,3410,70,4578,473,6597,35,3410,70,5058,70,4408,15,44,35,2687,10,5.0,246,5728,428,6177,168,5007,69,3534,35,2687,70,3796,70,3742,69,3453,61,...,15.640036,39.405683,99854,15.640036,2.754933,6981,4.033261,0.546567,1385,0.497925,0.0,0,0.0,39.405683,99854.0,15.640036,11277.641673,28577544.0,4551.798868,42.055556,757,14.873871,42.055556,757,14.873871,3.055556,55,3.842062,0.722222,13,0.460889,0.0,0,0.0,42.055556,757.0,14.873871,12628.444444,227312.0,4387.087339,41.012474,19727,15.731201,39.654886,19074,15.764062,2.968815,1428,3.863971,0.613306,295,0.4875,1.357588,653,5.303005,40.33368,19400.5,15.522812,12795.866112,6154811.6,5613.700773,62.333333,187,11.676187,62.333333,187,11.676187,2.666667,8,2.516611,0.666667,2,0.57735,0.0,0,0.0,62.333333,187.0,11.676187,18066.0,54198.0,2932.748199,44.27781,76025,15.011272,42.482236,72942,15.229876,5.193943,8918,4.302952,1,1717,0.0,1.795574,3083,6.330415,43.380023,74483.5,14.785979,13729.15364,23572956.8,5764.575852,43.928571,615,15.469396,43.714286,612,15.324332,4.5,63,3.917417,1,14,0.0,0.214286,3,0.801784,43.821429,613.5,15.391815,13485.571429,188798.0,4556.495899,35.479818,54497,14.33161,34.572266,53103,14.049377,2.202474,3383,3.540554,0.50651,778,0.50012,0.907552,1394,4.366803,35.026042,53800.0,14.022224,10987.372786,16876604.6,5163.944658,40.285714,282,12.079104,39.857143,279,11.50983,4.285714,30,3.450328,0.857143,6,0.377964,0.428571,3,1.133893,40.071429,280.5,11.784271,12450.0,87150.0,3706.703837,39.726017,103526,15.65291,39.050652,101766,15.66111,2.795856,7286,4.060266,0.549501,1432,0.497639,0.675365,1760,4.461366,39.388335,102646.0,15.497291,11561.769455,30129971.2,4876.863536,42.055556,757,14.873871,42.055556,757,14.873871,3.055556,55,3.842062,0.722222,13,0.460889,0.0,0,0.0,42.055556,757.0,14.873871,12628.444444,227312.0,4387.087339,45.646784,39028,14.349083,43.192982,36930,14.918795,5,4275,0,1,855,0,2.453801,2098,7.272057,44.419883,37979.0,14.177891,14402.86386,12314448.6,5872.587458,42.321739,301754,17.442111,42.321739,301754,17.442111,3.321318,23681,4.432041,0.579523,4132,0.49367,0.0,0,0.0,42.321739,301754.0,17.442111,12317.786536,87825818.0,4964.349953


In [256]:
df_total.columns[:35]

Index(['ID', 'City_Code', 'Region_Code', 'Upper_Age', 'Lower_Age',
       'Health Indicator', 'Holding_Policy_Duration', 'Holding_Policy_Type',
       'Reco_Policy_Cat', 'Reco_Policy_Premium', 'Response',
       'holds_insurance_policy', 'Age_diff', 'Average_age',
       'City_Health Indicator', 'Region_Health Indicator',
       'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
       'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
       'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
       'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
       'City_holds_insurance_policy', 'Region_holds_insurance_policy',
       'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
       'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
       'Reco_Insurance_Type_Cat', 'Accomodation_Type_Rented',
       'Reco_Insurance_Type_Joint', 'Is_Spouse_Yes'],
      dtype='object')

In [258]:
# df_total.drop(['Region_Code'],axis=1,inplace=True)

In [259]:
train_x_full = df_total[df_total['Response'].notnull()]

cols = ['City_Code', 'Region_Code', 'Accomodation_Type',
       'Reco_Insurance_Type', 'Upper_Age', 'Lower_Age', 'Is_Spouse',
       'Health Indicator', 'Holding_Policy_Duration', 'Holding_Policy_Type',
       'Reco_Policy_Cat', 'Reco_Policy_Premium']

train_x_full = train_x_full[~train_x_full.loc[:,cols].duplicated(keep = 'first')].reset_index(drop=True)

test_x_full = df_total[df_total['Response'].isnull()]
train_y_full = train_x_full['Response']
test_id=test_x_full['ID']
del test_x_full['Response']
del train_x_full['Response']
del test_x_full['ID']
del train_x_full['ID']
print(train_x_full.shape, train_y_full.shape, test_x_full.shape)

(48752, 1128) (48752,) (21805, 1128)


In [260]:
test_x_full.nunique()

City_Code                                                    36
Region_Code                                                4694
Upper_Age                                                    58
Lower_Age                                                    60
Health Indicator                                             10
                                                           ... 
sum_of_Average_age_per_Reco_Insurance_Type_Cat               44
std_of_Average_age_per_Reco_Insurance_Type_Cat               44
mean_of_Reco_Policy_Premium_per_Reco_Insurance_Type_Cat      44
sum_of_Reco_Policy_Premium_per_Reco_Insurance_Type_Cat       44
std_of_Reco_Policy_Premium_per_Reco_Insurance_Type_Cat       44
Length: 1128, dtype: int64

In [261]:
for col in test_x_full.columns:
    print(col,":",len(set(train_x_full[col]).intersection(set(test_x_full[col]))))

City_Code : 36
Region_Code : 4472
Upper_Age : 58
Lower_Age : 60
Health Indicator : 10
Holding_Policy_Duration : 16
Holding_Policy_Type : 5
Reco_Policy_Cat : 22
Reco_Policy_Premium : 4486
holds_insurance_policy : 2
Age_diff : 59
Average_age : 115
City_Health Indicator : 286
Region_Health Indicator : 10148
City_Holding_Policy_Duration : 506
Region_Holding_Policy_Duration : 8425
City_Holding_Policy_Type : 174
Region_Holding_Policy_Type : 9286
City_Reco_Insurance_Type : 71
Region_Reco_Insurance_Type : 6277
City_Reco_Policy_Cat : 577
Region_Reco_Policy_Cat : 7436
City_holds_insurance_policy : 71
Region_holds_insurance_policy : 7106
City_Accomodation_Type : 71
Region_Accomodation_Type : 7070
City_Is_Spouse : 71
Region_Is_Spouse : 6010
Holding_Policy_Duration_Type : 61
Reco_Insurance_Type_Cat : 44
Accomodation_Type_Rented : 2
Reco_Insurance_Type_Joint : 2
Is_Spouse_Yes : 2
Reco_Policy_Premium_Bins : 8
Upper_Age_Bins : 10
Lower_Age_Bins : 10
Age_diff_Bins : 3
Average_age_Bins : 10
City_Code_co

unique_of_Region_Is_Spouse_per_Health Indicator : 10
unique_of_Holding_Policy_Duration_Type_per_Health Indicator : 4
unique_of_Reco_Insurance_Type_Cat_per_Health Indicator : 3
unique_of_City_Code_per_Holding_Policy_Type : 2
unique_of_Region_Code_per_Holding_Policy_Type : 5
unique_of_Health Indicator_per_Holding_Policy_Type : 1
unique_of_Reco_Policy_Cat_per_Holding_Policy_Type : 1
unique_of_City_Health Indicator_per_Holding_Policy_Type : 5
unique_of_Region_Health Indicator_per_Holding_Policy_Type : 5
unique_of_City_Holding_Policy_Duration_per_Holding_Policy_Type : 5
unique_of_Region_Holding_Policy_Duration_per_Holding_Policy_Type : 5
unique_of_City_Holding_Policy_Type_per_Holding_Policy_Type : 2
unique_of_Region_Holding_Policy_Type_per_Holding_Policy_Type : 5
unique_of_City_Reco_Insurance_Type_per_Holding_Policy_Type : 4
unique_of_Region_Reco_Insurance_Type_per_Holding_Policy_Type : 5
unique_of_City_Reco_Policy_Cat_per_Holding_Policy_Type : 5
unique_of_Region_Reco_Policy_Cat_per_Holding

unique_of_Region_holds_insurance_policy_per_City_Holding_Policy_Type : 118
unique_of_City_Accomodation_Type_per_City_Holding_Policy_Type : 2
unique_of_Region_Accomodation_Type_per_City_Holding_Policy_Type : 128
unique_of_City_Is_Spouse_per_City_Holding_Policy_Type : 2
unique_of_Region_Is_Spouse_per_City_Holding_Policy_Type : 127
unique_of_Holding_Policy_Duration_Type_per_City_Holding_Policy_Type : 15
unique_of_Reco_Insurance_Type_Cat_per_City_Holding_Policy_Type : 42
unique_of_City_Code_per_Region_Holding_Policy_Type : 1
unique_of_Region_Code_per_Region_Holding_Policy_Type : 1
unique_of_Health Indicator_per_Region_Holding_Policy_Type : 8
unique_of_Holding_Policy_Type_per_Region_Holding_Policy_Type : 1
unique_of_Reco_Policy_Cat_per_Region_Holding_Policy_Type : 12
unique_of_City_Health Indicator_per_Region_Holding_Policy_Type : 8
unique_of_Region_Health Indicator_per_Region_Holding_Policy_Type : 8
unique_of_City_Holding_Policy_Duration_per_Region_Holding_Policy_Type : 14
unique_of_Region

unique_of_City_Holding_Policy_Type_per_Region_holds_insurance_policy : 4
unique_of_Region_Holding_Policy_Type_per_Region_holds_insurance_policy : 4
unique_of_City_Reco_Insurance_Type_per_Region_holds_insurance_policy : 2
unique_of_Region_Reco_Insurance_Type_per_Region_holds_insurance_policy : 2
unique_of_City_Reco_Policy_Cat_per_Region_holds_insurance_policy : 15
unique_of_Region_Reco_Policy_Cat_per_Region_holds_insurance_policy : 15
unique_of_City_holds_insurance_policy_per_Region_holds_insurance_policy : 1
unique_of_City_Accomodation_Type_per_Region_holds_insurance_policy : 2
unique_of_Region_Accomodation_Type_per_Region_holds_insurance_policy : 2
unique_of_City_Is_Spouse_per_Region_holds_insurance_policy : 2
unique_of_Region_Is_Spouse_per_Region_holds_insurance_policy : 2
unique_of_Holding_Policy_Duration_Type_per_Region_holds_insurance_policy : 34
unique_of_Reco_Insurance_Type_Cat_per_Region_holds_insurance_policy : 19
unique_of_City_Code_per_City_Accomodation_Type : 1
unique_of_Re

mean_of_Upper_Age_per_City_Code : 36
sum_of_Upper_Age_per_City_Code : 36
std_of_Upper_Age_per_City_Code : 36
mean_of_Lower_Age_per_City_Code : 36
sum_of_Lower_Age_per_City_Code : 36
std_of_Lower_Age_per_City_Code : 36
mean_of_Holding_Policy_Duration_per_City_Code : 36
sum_of_Holding_Policy_Duration_per_City_Code : 36
std_of_Holding_Policy_Duration_per_City_Code : 36
mean_of_holds_insurance_policy_per_City_Code : 36
sum_of_holds_insurance_policy_per_City_Code : 36
std_of_holds_insurance_policy_per_City_Code : 36
mean_of_Age_diff_per_City_Code : 36
sum_of_Age_diff_per_City_Code : 36
std_of_Age_diff_per_City_Code : 36
mean_of_Average_age_per_City_Code : 36
sum_of_Average_age_per_City_Code : 36
std_of_Average_age_per_City_Code : 36
mean_of_Reco_Policy_Premium_per_City_Code : 36
sum_of_Reco_Policy_Premium_per_City_Code : 36
std_of_Reco_Policy_Premium_per_City_Code : 36
mean_of_Upper_Age_per_Region_Code : 2402
sum_of_Upper_Age_per_Region_Code : 1559
std_of_Upper_Age_per_Region_Code : 4292
me

sum_of_Reco_Policy_Premium_per_City_Holding_Policy_Duration : 506
std_of_Reco_Policy_Premium_per_City_Holding_Policy_Duration : 507
mean_of_Upper_Age_per_Region_Holding_Policy_Duration : 1439
sum_of_Upper_Age_per_Region_Holding_Policy_Duration : 777
std_of_Upper_Age_per_Region_Holding_Policy_Duration : 4431
mean_of_Lower_Age_per_Region_Holding_Policy_Duration : 1411
sum_of_Lower_Age_per_Region_Holding_Policy_Duration : 747
std_of_Lower_Age_per_Region_Holding_Policy_Duration : 4514
mean_of_Holding_Policy_Duration_per_Region_Holding_Policy_Duration : 16
sum_of_Holding_Policy_Duration_per_Region_Holding_Policy_Duration : 56
std_of_Holding_Policy_Duration_per_Region_Holding_Policy_Duration : 2
mean_of_holds_insurance_policy_per_Region_Holding_Policy_Duration : 2
sum_of_holds_insurance_policy_per_Region_Holding_Policy_Duration : 21
std_of_holds_insurance_policy_per_Region_Holding_Policy_Duration : 2
mean_of_Age_diff_per_Region_Holding_Policy_Duration : 640
sum_of_Age_diff_per_Region_Holding

sum_of_holds_insurance_policy_per_City_holds_insurance_policy : 36
std_of_holds_insurance_policy_per_City_holds_insurance_policy : 1
mean_of_Age_diff_per_City_holds_insurance_policy : 71
sum_of_Age_diff_per_City_holds_insurance_policy : 71
std_of_Age_diff_per_City_holds_insurance_policy : 71
mean_of_Average_age_per_City_holds_insurance_policy : 71
sum_of_Average_age_per_City_holds_insurance_policy : 71
std_of_Average_age_per_City_holds_insurance_policy : 71
mean_of_Reco_Policy_Premium_per_City_holds_insurance_policy : 71
sum_of_Reco_Policy_Premium_per_City_holds_insurance_policy : 71
std_of_Reco_Policy_Premium_per_City_holds_insurance_policy : 71
mean_of_Upper_Age_per_Region_holds_insurance_policy : 2303
sum_of_Upper_Age_per_Region_holds_insurance_policy : 1232
std_of_Upper_Age_per_Region_holds_insurance_policy : 6149
mean_of_Lower_Age_per_Region_holds_insurance_policy : 2240
sum_of_Lower_Age_per_Region_holds_insurance_policy : 1169
std_of_Lower_Age_per_Region_holds_insurance_policy : 

### Catboost

In [266]:
for col in ['City_Code', 'Region_Code', 
               'Health Indicator', 'Holding_Policy_Type',
               'Reco_Policy_Cat','holds_insurance_policy', 
               'City_Health Indicator', 'Region_Health Indicator',
               'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
               'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
               'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
               'City_holds_insurance_policy', 'Region_holds_insurance_policy',
               'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
               'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
               'Reco_Insurance_Type_Cat', 'Accomodation_Type_Rented',
               'Reco_Insurance_Type_Joint', 'Is_Spouse_Yes',
               'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
               'Holding_Policy_Duration','Reco_Policy_Premium',
               'Upper_Age', 'Lower_Age','Age_diff',
               'Accomodation_Type_Rented','Reco_Insurance_Type_Joint', 'Is_Spouse_Yes'
              ]:
    train_x_full[col] = train_x_full[col].astype(int)
    test_x_full[col] = test_x_full[col].astype(int)

cat_features = ['City_Code', 'Region_Code', 
               'Health Indicator', 'Holding_Policy_Type',
               'Reco_Policy_Cat','holds_insurance_policy', 
               'City_Health Indicator', 'Region_Health Indicator',
               'City_Holding_Policy_Type', 'Region_Holding_Policy_Type',
               'City_Reco_Insurance_Type', 'Region_Reco_Insurance_Type',
               'City_Reco_Policy_Cat', 'Region_Reco_Policy_Cat',
               'City_holds_insurance_policy', 'Region_holds_insurance_policy',
               'City_Accomodation_Type', 'Region_Accomodation_Type', 'City_Is_Spouse',
               'Region_Is_Spouse', 'Holding_Policy_Duration_Type',
               'Reco_Insurance_Type_Cat', 'Accomodation_Type_Rented',
               'Reco_Insurance_Type_Joint', 'Is_Spouse_Yes',
               'City_Holding_Policy_Duration', 'Region_Holding_Policy_Duration',
               'Holding_Policy_Duration','Reco_Policy_Premium',
               'Accomodation_Type_Rented','Reco_Insurance_Type_Joint', 'Is_Spouse_Yes'
              ]

In [268]:
# Final ROC-AUC = 0.8139
train_x = train_x_full.copy()
train_y = train_y_full.copy()
test_x = test_x_full.copy()
feature_importance_df = pd.DataFrame()

NFOLDS = 20
folds = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=18121995)
pred_test = []
pred_x = []
pred_y = []
j=1
for fold, (train_ids, test_ids) in enumerate(folds.split(train_x, train_y)):
    print('● Fold :', fold+1)
    model = CatBoostClassifier(n_estimators=20000,random_state=1812195,learning_rate=0.03,eval_metric='AUC',
                              cat_features =cat_features)
    model.fit(train_x.loc[train_ids], train_y.loc[train_ids], 
              eval_set=[(train_x.loc[train_ids], train_y.loc[train_ids]), (train_x.loc[test_ids], train_y.loc[test_ids])],
              verbose=500,
              early_stopping_rounds=200)
    pred_fold = model.predict_proba(train_x.loc[test_ids])[:,-1]
    pred_x.extend([float(i) for i  in pred_fold])
    pred_y.extend(list(train_y.loc[test_ids].values))
    pred_fold_test = model.predict_proba(test_x)[:,-1]
    pred_test.append([float(i) for i  in pred_fold_test])
    print('\n')
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = train_x.loc[train_ids].columns
    fold_importance_df["importance"] = model.feature_importances_
    fold_importance_df["fold"] = j + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    j=j+1

print("Final ROC-AUC Score:", roc_auc_score(pred_y, pred_x))

final_prediction = pd.DataFrame(pred_test).T
final_prediction.columns = [("FOLD_"+str(i)) for i in range(final_prediction.shape[1])]

vote = final_prediction.mean(axis=1)
final_prediction['Response'] = vote
final_prediction['ID'] = test_id.values
print(final_prediction.shape)

train_pred_all['cb_pred'] = pred_x
final_prediction_cb = final_prediction.copy()

final_prediction.head()

● Fold : 1
0:	test: 0.7499461	test1: 0.7449343	best: 0.7449343 (0)	total: 316ms	remaining: 1h 45m 18s
500:	test: 0.8859171	test1: 0.8069502	best: 0.8071494 (467)	total: 2m 32s	remaining: 1h 39m
1000:	test: 0.9023302	test1: 0.8076679	best: 0.8081780 (841)	total: 5m 6s	remaining: 1h 36m 49s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.8081780066
bestIteration = 841

Shrink model to first 842 iterations.


● Fold : 2
0:	test: 0.7491410	test1: 0.7610509	best: 0.7610509 (0)	total: 329ms	remaining: 1h 49m 31s
500:	test: 0.8882595	test1: 0.8235128	best: 0.8237693 (472)	total: 2m 49s	remaining: 1h 49m 55s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.823847676
bestIteration = 543

Shrink model to first 544 iterations.


● Fold : 3
0:	test: 0.7404455	test1: 0.7338029	best: 0.7338029 (0)	total: 331ms	remaining: 1h 50m 10s
500:	test: 0.8882716	test1: 0.8096393	best: 0.8097011 (494)	total: 2m 48s	remaining: 1h 49m 14s
Stopped by overfitting detector  

Unnamed: 0,FOLD_0,FOLD_1,FOLD_2,FOLD_3,FOLD_4,FOLD_5,FOLD_6,FOLD_7,FOLD_8,FOLD_9,FOLD_10,FOLD_11,FOLD_12,FOLD_13,FOLD_14,FOLD_15,FOLD_16,FOLD_17,FOLD_18,FOLD_19,Response,ID
0,0.064973,0.068683,0.059546,0.068678,0.063358,0.054752,0.072979,0.052199,0.067585,0.067355,0.056773,0.064627,0.054046,0.053868,0.059736,0.048528,0.066159,0.06788,0.05592,0.052007,0.060983,50883
1,0.067165,0.078078,0.084768,0.070015,0.07954,0.06331,0.086575,0.059024,0.083825,0.071201,0.084387,0.076651,0.073158,0.075502,0.084388,0.082494,0.076802,0.0522,0.073131,0.072769,0.074749,50884
2,0.434917,0.457602,0.405649,0.46366,0.439468,0.427119,0.482638,0.451075,0.451873,0.457026,0.449572,0.462557,0.470639,0.423898,0.453164,0.433602,0.420445,0.553237,0.463387,0.44985,0.452569,50885
3,0.091021,0.103002,0.099288,0.089482,0.098358,0.097389,0.095846,0.093021,0.103143,0.094719,0.094996,0.10165,0.101842,0.084432,0.096496,0.092977,0.093867,0.093417,0.085816,0.089696,0.095023,50886
4,0.038729,0.042363,0.031359,0.034737,0.035625,0.034278,0.031901,0.041032,0.036576,0.034473,0.036095,0.039388,0.034034,0.038677,0.037061,0.036921,0.042954,0.025112,0.039901,0.038533,0.036487,50887


In [269]:
df_submission = final_prediction[['ID', 'Response']]
df_submission.to_csv("cb_v4.csv", index = False)
df_submission.head()

Unnamed: 0,ID,Response
0,50883,0.060983
1,50884,0.074749
2,50885,0.452569
3,50886,0.095023
4,50887,0.036487


In [270]:
all_features = feature_importance_df[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)
all_features.reset_index(inplace=True)
important_features = list(all_features[all_features['importance']!=0]['feature'])
all_features

Unnamed: 0,feature,importance
0,Region_Reco_Policy_Cat,4.847963
1,sum_of_Lower_Age_per_Region_Reco_Policy_Cat,4.382607
2,City_Reco_Policy_Cat,4.031894
3,sum_of_Average_age_per_Region_Reco_Policy_Cat,3.575629
4,sum_of_Upper_Age_per_Region_Reco_Policy_Cat,3.455060
...,...,...
1123,unique_of_Reco_Policy_Cat_per_Upper_Age,0.000000
1124,unique_of_City_Code_per_City_Reco_Insurance_Type,0.000000
1125,unique_of_City_Code_per_City_Reco_Policy_Cat,0.000000
1126,unique_of_City_Code_per_City_holds_insurance_p...,0.000000
