# Import Libraries

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing
import os
import xgboost
import requests
import random
random.seed(6)
import catboost 
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder, RobustScaler
le = LabelEncoder()
from sklearn.model_selection import KFold,GroupKFold, RepeatedKFold, GridSearchCV, cross_validate, train_test_split
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score
from numpy import mean, std
from numpy import isnan
from numpy import asarray
from matplotlib import pyplot
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
pd.set_option('display.max_colwidth', 250)
pd.set_option('display.max_columns', 500)
pd.options.mode.chained_assignment = None  # default='warn'


# Import Data

In [2]:
client = pd.read_csv('../input/btcbtc/btc/btc_clientes.csv')
df = pd.read_csv('../input/btcbtc/btc/btc_universo_interacoes_genesys.csv')
app = pd.read_csv('../input/btcbtc/btc/btc_interacoes_app.csv')

# Data Cleaning
1. rename columns
2. convert data to numeric types

In [3]:
client.columns

Index(['n_cliente', 'idade_cliente', 'c_pais_isoa_resi', 'i_gnro_cli',
       'gran_cli_prt', 'estado_civil', 'c_pais_isoa_mord_prin', 'cp_distrito',
       'q_1titular', 'q_outras_titular', 'q_autorizado', 'q_fiador',
       'q_profissao', 'i_cdo_ativ', 'z_adesao_cdo', 'qt_fami_prod',
       'pfi_ris_brqm_cap', 'c_prof_cli', 'd_prof_cli', 'c_sit_prof',
       'd_sit_prof_cli', 'c_tipo_dpdc_trbx', 'd_tipo_dpdc_trbx', 'd_hrio_tel1',
       'd_hrio_tel2', 'd_hrio_tel3', 'd_hrio_tel4', 'd_hrio_tm1', 'd_hrio_tm2',
       'd_segm_cli_prin', 'i_falencia', 'm_volume_negocios',
       'm_carteira_passivas', 'm_carteira_activas', 'i_vencimento',
       'm_vencimento_ult6m', 'i_venda_2020', 'n_venda_2020'],
      dtype='object')

In [4]:
cols = ['client_id', 'age_client', 'c_country_isoa_resi', 'i_gnro_cli',
        'gran_cli_prt', 'civil_status', 'c_country_isoa_mord_prin', 'cp_district',
        'q_1holder', 'q_other_holder', 'q_authorized', 'q_guarantor',
        'q_profession', 'i_cdo_ativ', 'z_adesao_cdo', 'qt_fami_prod',
        'pfi_ris_brqm_cap', 'c_prof_cli', 'd_prof_cli', 'c_sit_prof',
        'd_sit_prof_cli', 'c_tipo_dpdc_trbx', 'd_tipo_dpdc_trbx', 'd_hrio_tel1',
        'd_hrio_tel2', 'd_hrio_tel3', 'd_hrio_tel4', 'd_hrio_tm1', 'd_hrio_tm2',
        'd_segm_cli_prin', 'i_falencia', 'm_volume_negocios',
        'm_passive_portfolio', 'm_active_portfolio', 'i_maturity',
        'm_vencimento_ult6m', 'i_venda_2020', 'n_venda_2020']
client = pd.DataFrame(client.values,columns=cols)

In [5]:
df.columns

Index(['interaction_id', 'n_cliente', 'datainicio', 'horainicio', 'datafim',
       'horafim', 'duracao', 'media', 'sentido', 'atendida', 'nps', 'nps1',
       'mes', 'dia_semana', 'periodo', 'dia_semana_periodo'],
      dtype='object')

In [6]:
cols = ['interaction_id', 'client_id', 'start_date', 'start_time', 'end_date',
        'end_time', 'duration', 'mode', 'direction', 'answered', 'nps', 'nps1',
        'month', 'week_day', 'period', 'week_day_period']
df = pd.DataFrame(df.values,columns=cols)

In [7]:
app.columns

Index(['ns_interaccao', 'n_cliente', 'datainicio', 'horainicio', 'datafim',
       'horafim', 'du_interaccao', 'c_est_iacz_celt_dw', 'd_est_iacz_cel_fdd',
       'c_tipo_iacz_cel_dw', 'd_tip_iacz_cel_fdd', 'i_ibnd_obnd',
       'ts_carregamento', 'ts_actz_ult', 'd_app_cxd_fdad', 'mes', 'dia_semana',
       'periodo', 'dia_semana_periodo'],
      dtype='object')

In [8]:
cols = ['interaction_id', 'client_id', 'start_date', 'start_time', 'end_date',
        'end_time', 'duration', 'c_est_iacz_celt_dw', 'd_est_iacz_cel_fdd',
        'c_tipo_iacz_cel_dw', 'd_tip_iacz_cel_fdd', 'i_ibnd_obnd',
        'ts_loading', 'ts_actz_ult', 'd_app_cxd_fdad', 'month', 'week_day',
        'period', 'week_day_period']
app = pd.DataFrame(app.values,columns=cols)

In [9]:
# Get Dummy Variables
client['YA'] = (client['c_sit_prof']=='YA ').astype('int')
client['YP'] = (client['c_sit_prof']=='YP ').astype('int')
client['FS'] = (client['c_sit_prof']=='FS ').astype('int')
client.c_sit_prof = client.c_sit_prof.replace({'YA ':np.nan,'YP ':np.nan,'FS ':np.nan})

In [10]:
client['m_vencimento_ult6m'] = client['m_vencimento_ult6m'].str.split('-',n=1,expand=True)[1]
client['m_vencimento_ult6m'] = client['m_vencimento_ult6m'].replace({'None':0})
client[['z_adesao_cdo_year','z_adesao_cdo_month','z_adesao_cdo']] = client['z_adesao_cdo'].str.split('-',n=2,expand=True)

numeric_cols = [
                'age_client','q_1holder','q_other_holder','q_authorized',
               'q_guarantor','q_profession','qt_fami_prod','c_prof_cli','m_active_portfolio',
               'c_sit_prof','c_tipo_dpdc_trbx','i_falencia','m_volume_negocios',
               'm_passive_portfolio','i_venda_2020','n_venda_2020','m_vencimento_ult6m',
                'z_adesao_cdo_year','z_adesao_cdo_month','z_adesao_cdo'
               ]
# Convert to floats
for col in numeric_cols:
    client[col] = client[col].astype('float')

client.z_adesao_cdo_year[client.z_adesao_cdo_year==1]=np.nan
client['z_adesao_cdo_month_period'] = ((client['z_adesao_cdo_year']-client['z_adesao_cdo_year'].min())*12)+client['z_adesao_cdo_month']
client['z_adesao_cdo'] = ((client['z_adesao_cdo_month_period']-client['z_adesao_cdo_month_period'].min())*31)+client['z_adesao_cdo']

cat_cols = [
    'c_country_isoa_resi','i_gnro_cli','gran_cli_prt','civil_status',
    'c_country_isoa_mord_prin','cp_district','i_cdo_ativ','pfi_ris_brqm_cap',
    'd_prof_cli','d_sit_prof_cli','d_tipo_dpdc_trbx','d_hrio_tel1','d_hrio_tel2',
    'd_hrio_tel3','d_hrio_tel4','d_hrio_tm1','d_hrio_tm2','d_segm_cli_prin','i_maturity'
]
# Lbel Encode Categoical Columns
for col in cat_cols:
    le = LabelEncoder()
    client[col] = le.fit_transform(client[col].astype(str))

# Data Pre-Processing
1. Extract time features
2. Extract target clusters

In [11]:
df[['start_year','start_month','start_day']] = df['start_date'].str.split('-',n=2,expand=True)
df[['start_hour','start_minute','start_sec']] = df['start_time'].str.split(':',n=2,expand=True)
df[['duration_hour','duration_minute','duration']] = df['duration'].str.split(':',n=2,expand=True)
df.drop(['end_date','end_time','month'],axis=1,inplace=True)

numeric_cols = [
    'start_year','start_month','start_day',
    'start_hour','start_minute','start_sec',
    'duration_hour','duration_minute','duration',
    'answered','nps','nps1'
]

for col in numeric_cols:
    df[col] = df[col].astype('float')
df['start_month_period'] = ((df.start_year-df.start_year.min())*12)+df.start_month
df['start_date'] = ((df.start_month_period-df.start_month_period.min())*31)+df.start_day
df['start_time'] = (df.start_hour*60)+df.start_minute
df.start_sec = (df.start_time*60)+df.start_sec
df['duration_minute'] = (df.duration_hour*60)+df.duration_minute
df.duration = (df.duration_minute*60) + df.duration
df.start_hour = df.start_hour.astype('int')
df.week_day_period = df.week_day + '_' + df.start_hour.astype(str)

In [12]:
app[['start_year','start_month','start_day']] = app['start_date'].str.split('-',n=2,expand=True)
app[['start_hour','start_minute','start_sec']] = app['start_time'].str.split(':',n=2,expand=True)
app.drop(['end_date','end_time','month','ts_loading','ts_actz_ult','i_ibnd_obnd','d_est_iacz_cel_fdd'],axis=1,inplace=True)

numeric_cols = [
    'start_year','start_month','start_day',
    'start_hour','start_minute','start_sec',
    'duration','c_est_iacz_celt_dw','c_tipo_iacz_cel_dw'
]
for col in numeric_cols:
    app[col] = app[col].astype('float')
app['start_month_period'] = ((app.start_year-app.start_year.min())*12)+app.start_month
app['start_date'] = ((app.start_month_period-app.start_month_period.min())*31)+app.start_day
app['start_time'] = (app.start_hour*60)+app.start_minute
app.start_sec = (app.start_time*60)+app.start_sec

# Feature Engineering

# We have 5 Meta-data on Customers
1. app
2. voice
3. chat
4. email
5. Out

# App Data

In [13]:
app = pd.merge(app,client,on='client_id',how='left')
app['Interactions'] = 1
stats = (app[['client_id']].groupby('client_id').count()).reset_index()
client = pd.merge(client, stats,on='client_id',how='left',suffixes=('','App'))
for cat in cat_cols:
    stats = (app[[cat,'Interactions']].groupby([cat]).count()).reset_index()
    client = pd.merge(client, stats,on=cat,how='left',suffixes=('','App'))

# Inbound

In [14]:
In = df[df.direction=='Inbound']
In = pd.merge(In,client,on='client_id',how='left')

# Outbound

In [15]:
Out = df[df.direction=='Outbound']
Out = Out.rename({'answered':'answers'},axis=1)
Out = pd.merge(Out,client,on='client_id',how='left')
stats = (Out[['client_id','duration','answers','nps','nps1']].groupby(['client_id']).mean()).reset_index()
client = pd.merge(client, stats,on='client_id',how='left',suffixes=('','Outbound'))
for cat in cat_cols:
    stats = (Out[[cat,'duration','answers','nps','nps1']].groupby([cat]).mean()).reset_index()
    client = pd.merge(client, stats,on=cat,how='left',suffixes=('','Outbound'))

In [16]:
# Use OutBound Data for modelling
df = df[df.direction=='Outbound']
df

Unnamed: 0,interaction_id,client_id,start_date,start_time,duration,mode,direction,answered,nps,nps1,week_day,period,week_day_period,start_year,start_month,start_day,start_hour,start_minute,start_sec,duration_hour,duration_minute,start_month_period
1,137444973,911957d6f64a428ea858a8c7d97a4562,309.0,1032.0,409.0,Voice,Outbound,1.0,-1.0,-1.0,SEXTA-FEIRA,FINAL DA TARDE (17H-20H),SEXTA-FEIRA_17,2020.0,10.0,30.0,17,12.0,61951.0,0.0,6.0,10.0
3,138017011,71f5a935b03c43b4b881c68921c77ec9,320.0,866.0,437.0,Voice,Outbound,1.0,-3.0,-3.0,TERCA-FEIRA,HORA ALMOCO (12H-15H),TERCA-FEIRA_14,2020.0,11.0,10.0,14,26.0,51991.0,0.0,7.0,11.0
5,138918457,964cbfc6426e4bc18e5f85f6fa814743,336.0,687.0,740.0,Voice,Outbound,1.0,-1.0,-1.0,QUINTA-FEIRA,FINAL DA MANHA (10H-12H),QUINTA-FEIRA_11,2020.0,11.0,26.0,11,27.0,41262.0,0.0,12.0,11.0
6,137024329,f7c1e15c8bf64c79abab94d208df2123,302.0,782.0,158.0,Voice,Outbound,1.0,-1.0,-1.0,SEXTA-FEIRA,HORA ALMOCO (12H-15H),SEXTA-FEIRA_13,2020.0,10.0,23.0,13,2.0,46951.0,0.0,2.0,10.0
8,139907129,025c60285d6342babcdbe5e12af86aa4,356.0,948.0,335.0,Voice,Outbound,1.0,-3.0,-3.0,TERCA-FEIRA,INICIO DA TARDE (15H-17H),TERCA-FEIRA_15,2020.0,12.0,15.0,15,48.0,56888.0,0.0,5.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377017,127819431,3743034d0d0b4238a7c0071413503993,164.0,724.0,9.0,Voice,Outbound,0.0,-1.0,-1.0,TERCA-FEIRA,HORA ALMOCO (12H-15H),TERCA-FEIRA_12,2020.0,6.0,9.0,12,4.0,43448.0,0.0,0.0,6.0
377023,140255831,4485cfc03a4844e3b5dc6f3c10315b97,363.0,821.0,2230.0,Voice,Outbound,1.0,-1.0,-1.0,TERCA-FEIRA,HORA ALMOCO (12H-15H),TERCA-FEIRA_13,2020.0,12.0,22.0,13,41.0,49275.0,0.0,37.0,12.0
377025,118790465,0f078a26c1064a9f8152331e8f60acd4,57.0,987.0,41.0,Voice,Outbound,1.0,-1.0,-1.0,QUARTA-FEIRA,INICIO DA TARDE (15H-17H),QUARTA-FEIRA_16,2020.0,2.0,26.0,16,27.0,59239.0,0.0,0.0,2.0
377028,129808649,e9f1432ccbae4a468bacbef2ee73e41c,195.0,751.0,41.0,Voice,Outbound,0.0,-1.0,-1.0,QUINTA-FEIRA,HORA ALMOCO (12H-15H),QUINTA-FEIRA_12,2020.0,7.0,9.0,12,31.0,45114.0,0.0,0.0,7.0


In [17]:
In['mode'].unique()

array(['Voice', 'Chat', 'Email'], dtype=object)

# Chat, Voice & Email

In [18]:
chat = In[In['mode']=='Chat']
voice = In[In['mode']=='Voice']
email = In[In['mode']=='Email']

In [19]:
chat = chat.rename({'answered':'answers'},axis=1)
stats = (chat[['client_id','duration','answers','nps','nps1']].groupby(['client_id']).mean()).reset_index()
client = pd.merge(client, stats,on='client_id',how='left',suffixes=('','Chat'))
for cat in cat_cols:
    stats = (Out[[cat,'duration','answers','nps','nps1']].groupby([cat]).mean()).reset_index()
    client = pd.merge(client, stats,on=cat,how='left',suffixes=('','Chat'))

In [20]:
voice = voice.rename({'answered':'answers'},axis=1)
stats = (voice[['client_id','duration','answers','nps','nps1']].groupby(['client_id']).mean()).reset_index()
client = pd.merge(client, stats,on='client_id',how='left',suffixes=('','Voice'))
for cat in cat_cols:
    stats = (Out[[cat,'duration','answers','nps','nps1']].groupby([cat]).mean()).reset_index()
    client = pd.merge(client, stats,on=cat,how='left',suffixes=('','Voice'))

In [21]:
email = email.rename({'answered':'answers'},axis=1)
stats = (email[['client_id','duration','answers','nps','nps1']].groupby(['client_id']).mean()).reset_index()
client = pd.merge(client, stats,on='client_id',how='left',suffixes=('','Email'))
for cat in cat_cols:
    stats = (Out[[cat,'duration','answers','nps','nps1']].groupby([cat]).mean()).reset_index()
    client = pd.merge(client, stats,on=cat,how='left',suffixes=('','Email'))

In [22]:
# Use Week Day Period with more tahn 1500 rows of data
df['data_count'] = df.groupby('week_day_period')['week_day_period'].transform('count')
df=df[df.data_count>1500]

In [23]:
# Average Call Answered by Hours
target_cols = ['client_id','week_day_period','answered']
df[target_cols].groupby('week_day_period').mean()

Unnamed: 0_level_0,answered
week_day_period,Unnamed: 1_level_1
QUARTA-FEIRA_10,0.668042
QUARTA-FEIRA_11,0.667541
QUARTA-FEIRA_12,0.65231
QUARTA-FEIRA_13,0.644742
QUARTA-FEIRA_14,0.652554
QUARTA-FEIRA_15,0.638049
QUARTA-FEIRA_16,0.665297
QUARTA-FEIRA_17,0.644842
QUARTA-FEIRA_9,0.648912
QUINTA-FEIRA_10,0.663937


In [24]:
df = pd.merge(df[target_cols],client,on='client_id',how='left')

In [25]:
# define cross validation folds and training columns
cv = KFold(n_splits=5, random_state=1, shuffle=True)
cols = (df.drop(target_cols, axis=1)).columns

In [26]:
Accuracy = pd.DataFrame()
Predictions = pd.DataFrame()
for target in df.week_day_period.unique():
    X = df[df.week_day_period==target]
    for model in [CatBoostClassifier(n_estimators=10,silent=True,eval_metric='AUC')]:
        pred = pd.DataFrame()
        pred['client_id'] = client.client_id
        pred['target'] = target
        scores = cross_val_score(model, X[cols].values, X['answered'].astype('int'), scoring='roc_auc', cv=cv, n_jobs=-1)
        pred['prob'] = model.fit(X[cols].values,X['answered']).predict_proba(client[cols].values)[:,1]
        acc = pd.DataFrame({'Label':target,'Percent_Accuracy':mean(scores),'Accuracy_std':std(scores)}, index=[0])
        Predictions = Predictions.append(pred)
        Accuracy = Accuracy.append(acc)

In [27]:
Accuracy = Accuracy.sort_values('Percent_Accuracy',ascending=False)
Accuracy.to_csv('Accuracies_Cross_Validation.csv',index=False)
Accuracy.T

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.20,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.30,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38
Label,QUINTA-FEIRA_15,SEXTA-FEIRA_15,TERCA-FEIRA_14,QUARTA-FEIRA_11,QUINTA-FEIRA_16,SEGUNDA-FEIRA_16,TERCA-FEIRA_16,QUARTA-FEIRA_15,QUARTA-FEIRA_14,SEXTA-FEIRA_11,QUINTA-FEIRA_14,QUINTA-FEIRA_11,QUARTA-FEIRA_16,QUARTA-FEIRA_10,SEXTA-FEIRA_12,SEGUNDA-FEIRA_15,QUARTA-FEIRA_12,QUINTA-FEIRA_10,SEGUNDA-FEIRA_10,QUARTA-FEIRA_13,SEGUNDA-FEIRA_14,TERCA-FEIRA_15,TERCA-FEIRA_11,QUINTA-FEIRA_13,SEGUNDA-FEIRA_13,SEGUNDA-FEIRA_11,TERCA-FEIRA_12,QUARTA-FEIRA_17,SEXTA-FEIRA_16,SEXTA-FEIRA_13,QUARTA-FEIRA_9,QUINTA-FEIRA_12,SEGUNDA-FEIRA_12,SEXTA-FEIRA_10,TERCA-FEIRA_13,SEXTA-FEIRA_14,TERCA-FEIRA_10,QUINTA-FEIRA_9,SEGUNDA-FEIRA_17
Percent_Accuracy,0.89583,0.889817,0.889721,0.888266,0.887587,0.88647,0.88574,0.88431,0.883354,0.883011,0.882156,0.88197,0.88181,0.880975,0.880276,0.879731,0.878586,0.878393,0.877887,0.8763,0.875232,0.875215,0.873963,0.872424,0.872372,0.871463,0.870339,0.8694,0.869168,0.868528,0.867699,0.866412,0.86598,0.865862,0.864862,0.864754,0.859693,0.845703,0.829179
Accuracy_std,0.012404,0.006073,0.01535,0.007252,0.006643,0.006283,0.013551,0.014285,0.012139,0.007823,0.009004,0.012334,0.004733,0.008611,0.00698,0.014518,0.010179,0.00853,0.015591,0.012884,0.019252,0.010444,0.017667,0.004237,0.008596,0.007451,0.018687,0.015303,0.020017,0.008847,0.02275,0.020531,0.006403,0.013488,0.012946,0.013029,0.004119,0.021542,0.020558


# Sort & Merge Predictions

In [28]:
Predictions = Predictions.sort_values(['client_id','prob'],ascending=False)
Predictions = ((Predictions.reset_index()).drop('index',axis=1)).reset_index()
Predictions['min_index'] = Predictions.groupby('client_id')['index'].transform('min')
Predictions['index'] = Predictions['index'] - Predictions.min_index
Predictions = Predictions[Predictions['index']<3]
Predictions.drop('min_index',axis=1,inplace=True)
Predictions

Unnamed: 0,index,client_id,target,prob
0,0,fffdff7b384e4a6e8b4b43c2ce5fbb29,QUARTA-FEIRA_14,0.996981
1,1,fffdff7b384e4a6e8b4b43c2ce5fbb29,TERCA-FEIRA_14,0.996813
2,2,fffdff7b384e4a6e8b4b43c2ce5fbb29,QUINTA-FEIRA_16,0.996699
39,0,fffa828a47074e45911625ceb8ade477,SEGUNDA-FEIRA_12,0.995282
40,1,fffa828a47074e45911625ceb8ade477,QUINTA-FEIRA_11,0.994738
...,...,...,...,...
1947388,1,00008850c7c4445b813edd3811b568d7,SEXTA-FEIRA_16,0.992277
1947389,2,00008850c7c4445b813edd3811b568d7,SEGUNDA-FEIRA_13,0.992261
1947426,0,00006fe927584846bb933dbdd23d72a8,QUINTA-FEIRA_9,0.192444
1947427,1,00006fe927584846bb933dbdd23d72a8,QUARTA-FEIRA_9,0.157962


In [29]:
Pred1 = Predictions[Predictions['index']==0]
Pred1[['Best_Day_1','Best_Time_1']] = Pred1.target.str.split('_',n=1,expand=True)
Pred1['Best_Time_1'] = Pred1['Best_Time_1'].astype('int')
Pred1 = Pred1.rename({'prob':'Prob1','client_id':'Client'},axis=1)
Pred1 = Pred1[['Client','Best_Day_1','Best_Time_1','Prob1']]
Pred1

Unnamed: 0,Client,Best_Day_1,Best_Time_1,Prob1
0,fffdff7b384e4a6e8b4b43c2ce5fbb29,QUARTA-FEIRA,14,0.996981
39,fffa828a47074e45911625ceb8ade477,SEGUNDA-FEIRA,12,0.995282
78,fffa3f19dc16447086c2b0d1012b56a2,QUINTA-FEIRA,12,0.208619
117,fff543468ef74eaf8db24c7e92c0dba2,SEGUNDA-FEIRA,17,0.197618
156,fff4659e4aaf415187cd43f242fdc9e9,QUINTA-FEIRA,12,0.939705
...,...,...,...,...
1947270,00046d6013dc40a1a5c1b057eab3c238,QUARTA-FEIRA,14,0.997399
1947309,0002edb814234f6ba6a4b2201f890245,QUARTA-FEIRA,12,0.942372
1947348,0001cfbaa5c74e408f4d2042da49ba5f,QUARTA-FEIRA,14,0.995560
1947387,00008850c7c4445b813edd3811b568d7,TERCA-FEIRA,12,0.992594


In [30]:
Pred2 = Predictions[Predictions['index']==1]
Pred2[['Best_Day_2','Best_Time_2']] = Pred2.target.str.split('_',n=1,expand=True)
Pred2['Best_Time_2'] = Pred2['Best_Time_2'].astype('int')
Pred2 = Pred2.rename({'prob':'Prob2','client_id':'Client'},axis=1)
Pred2 = Pred2[['Client','Best_Day_2','Best_Time_2','Prob2']]
Pred2

Unnamed: 0,Client,Best_Day_2,Best_Time_2,Prob2
1,fffdff7b384e4a6e8b4b43c2ce5fbb29,TERCA-FEIRA,14,0.996813
40,fffa828a47074e45911625ceb8ade477,QUINTA-FEIRA,11,0.994738
79,fffa3f19dc16447086c2b0d1012b56a2,QUINTA-FEIRA,16,0.193587
118,fff543468ef74eaf8db24c7e92c0dba2,QUARTA-FEIRA,12,0.150652
157,fff4659e4aaf415187cd43f242fdc9e9,SEGUNDA-FEIRA,12,0.935396
...,...,...,...,...
1947271,00046d6013dc40a1a5c1b057eab3c238,TERCA-FEIRA,14,0.997261
1947310,0002edb814234f6ba6a4b2201f890245,QUARTA-FEIRA,16,0.879919
1947349,0001cfbaa5c74e408f4d2042da49ba5f,TERCA-FEIRA,14,0.994448
1947388,00008850c7c4445b813edd3811b568d7,SEXTA-FEIRA,16,0.992277


In [31]:
Pred3 = Predictions[Predictions['index']==2]
Pred3[['Best_Day_3','Best_Time_3']] = Pred3.target.str.split('_',n=1,expand=True)
Pred3['Best_Time_3'] = Pred3['Best_Time_3'].astype('int')
Pred3 = Pred3.rename({'prob':'Prob3','client_id':'Client'},axis=1)
Pred3 = Pred3[['Client','Best_Day_3','Best_Time_3','Prob3']]
Pred3

Unnamed: 0,Client,Best_Day_3,Best_Time_3,Prob3
2,fffdff7b384e4a6e8b4b43c2ce5fbb29,QUINTA-FEIRA,16,0.996699
41,fffa828a47074e45911625ceb8ade477,QUARTA-FEIRA,15,0.992668
80,fffa3f19dc16447086c2b0d1012b56a2,QUARTA-FEIRA,17,0.191626
119,fff543468ef74eaf8db24c7e92c0dba2,QUINTA-FEIRA,10,0.149923
158,fff4659e4aaf415187cd43f242fdc9e9,SEGUNDA-FEIRA,11,0.916885
...,...,...,...,...
1947272,00046d6013dc40a1a5c1b057eab3c238,QUARTA-FEIRA,13,0.996604
1947311,0002edb814234f6ba6a4b2201f890245,SEGUNDA-FEIRA,15,0.876194
1947350,0001cfbaa5c74e408f4d2042da49ba5f,TERCA-FEIRA,12,0.993901
1947389,00008850c7c4445b813edd3811b568d7,SEGUNDA-FEIRA,13,0.992261


In [32]:
Best_time = pd.merge(Pred1,Pred2,on='Client',how='left')
Best_time = pd.merge(Best_time,Pred3,on='Client',how='left')
Best_time.to_csv('Best_time.csv',index=False)
Best_time

Unnamed: 0,Client,Best_Day_1,Best_Time_1,Prob1,Best_Day_2,Best_Time_2,Prob2,Best_Day_3,Best_Time_3,Prob3
0,fffdff7b384e4a6e8b4b43c2ce5fbb29,QUARTA-FEIRA,14,0.996981,TERCA-FEIRA,14,0.996813,QUINTA-FEIRA,16,0.996699
1,fffa828a47074e45911625ceb8ade477,SEGUNDA-FEIRA,12,0.995282,QUINTA-FEIRA,11,0.994738,QUARTA-FEIRA,15,0.992668
2,fffa3f19dc16447086c2b0d1012b56a2,QUINTA-FEIRA,12,0.208619,QUINTA-FEIRA,16,0.193587,QUARTA-FEIRA,17,0.191626
3,fff543468ef74eaf8db24c7e92c0dba2,SEGUNDA-FEIRA,17,0.197618,QUARTA-FEIRA,12,0.150652,QUINTA-FEIRA,10,0.149923
4,fff4659e4aaf415187cd43f242fdc9e9,QUINTA-FEIRA,12,0.939705,SEGUNDA-FEIRA,12,0.935396,SEGUNDA-FEIRA,11,0.916885
...,...,...,...,...,...,...,...,...,...,...
49930,00046d6013dc40a1a5c1b057eab3c238,QUARTA-FEIRA,14,0.997399,TERCA-FEIRA,14,0.997261,QUARTA-FEIRA,13,0.996604
49931,0002edb814234f6ba6a4b2201f890245,QUARTA-FEIRA,12,0.942372,QUARTA-FEIRA,16,0.879919,SEGUNDA-FEIRA,15,0.876194
49932,0001cfbaa5c74e408f4d2042da49ba5f,QUARTA-FEIRA,14,0.995560,TERCA-FEIRA,14,0.994448,TERCA-FEIRA,12,0.993901
49933,00008850c7c4445b813edd3811b568d7,TERCA-FEIRA,12,0.992594,SEXTA-FEIRA,16,0.992277,SEGUNDA-FEIRA,13,0.992261


In [33]:
Best_time.describe()

Unnamed: 0,Best_Time_1,Prob1,Best_Time_2,Prob2,Best_Time_3,Prob3
count,49935.0,49935.0,49935.0,49935.0,49935.0,49935.0
mean,12.544368,0.644745,12.882547,0.607029,12.931972,0.587508
std,2.637296,0.347613,2.572208,0.370827,2.480096,0.38246
min,9.0,0.080859,9.0,0.071405,9.0,0.061232
25%,10.0,0.246888,11.0,0.183076,11.0,0.147447
50%,12.0,0.766734,13.0,0.701924,13.0,0.668412
75%,15.0,0.995017,15.0,0.993441,15.0,0.992197
max,17.0,0.999719,17.0,0.998859,17.0,0.998255


In [34]:
Best_time.Best_Day_3.unique()

array(['QUINTA-FEIRA', 'QUARTA-FEIRA', 'SEGUNDA-FEIRA', 'SEXTA-FEIRA',
       'TERCA-FEIRA'], dtype=object)

# Feature Importance

In [35]:
sorted(zip(map(lambda x: round(x, 4), model.feature_importances_), cols), reverse=True)

[(72.3546, 'nps1Email'),
 (1.2423, 'durationEmail'),
 (0.2788, 'durationChat'),
 (0.1738, 'durationOutbound'),
 (0.0, 'z_adesao_cdo_year'),
 (0.0, 'z_adesao_cdo_month_period'),
 (0.0, 'z_adesao_cdo_month'),
 (0.0, 'z_adesao_cdo'),
 (0.0, 'qt_fami_prod'),
 (0.0, 'q_profession'),
 (0.0, 'q_other_holder'),
 (0.0, 'q_guarantor'),
 (0.0, 'q_authorized'),
 (0.0, 'q_1holder'),
 (0.0, 'pfi_ris_brqm_cap'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsVoice'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutbound'),
 (0.0, 'npsOutb