In [15]:
# import libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
pd.plotting.register_matplotlib_converters()

import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


from sklearn.linear_model import Lasso, LogisticRegression
# from sklearn.metrics import accuracy_score,make_scorer, mean_squared_error, mean_absolute_error,

from sklearn import datasets, linear_model, metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier


import scikitplot as skplt
import matplotlib.pyplot as plt

## I. DATA Processing

https://www.kaggle.com/henriqueyamahata/bank-marketing

### Những chú ý cho quá trình xử lý

1. Lược bỏ dữ liệu
* Cột 'duration' sẽ không được quan tâm trong quá trình phân tích và xử lý
* Có 12 dòng trùng nhau sẽ cắt bỏ
2. Missing:
* các giá trị unknown của các cột khác: loan, housing, default, marital, job
3. Outlier:
* Age, campain, previous, cons.conf.idx
4. Phân loại lại biến pdays, age
5. Clustering theo 3 chỉ số pdays,previous,poutcome
6. Sự tương quan giữa các biến:
* 3 biến nr.employed, emp.var.rate, euribo3m là 3 biến có tương quan rất mạnh --> có thể lọc bớt để giảm chiều dữ liệu

### Cách xử lý
1. Xóa 12 dòng dữ liệu trùng nhau
2. Loại bỏ cột duration, nr.employed
3. Biến 'default': khoảng 20% biến này có giá trị 'unknown' - không xác định đc là client có tín dụng xấu hay không - (yes) hay (no).
> Vì đa số biến này cũng khó xác định nên ta sẽ không biến đổi thuộc tính này

MISSING
4. 2. Biến 'education': 4% là unknown
> Thay thế giá trị 'unknown' --> 'university-degree'(bằng giá trị mode của cột dữ liệu)
5. Biến 'loan' và 'housing': 
    * Tỉ lệ missing khá nhỏ: là 2.4% (unknown)
    * Cách xử lý: mục tiêu là càng tiếp thị càng nhiều khách hàng càng tốt, ta đánh vào các giá trị mà tỉ lệ sucessful cao:
        - loan : unknown --> no
        - housing: unknown --> yes
6. Biến marital:missing rất thấp 0.1%
> unknown --> single
7. Biến job: missing thấp 0.8
> unknown --> student

OUTLIER
8. (I). age: thay thế những giá trị > 70 bằng 70
Nhìn biểu đồ thấy sau 70 là outlier
9. (II). campain: thay thế những giá trị > quantile_95 bằng quantile_95
10. (III). previous: thay thế những giá trị > quantile_95 bằng quantile_95
11. (IV). cons.conf.idx: thay thế những giá trị > quantile_95 bằng quantile_95

Phân loại lại biến
12. (I). Column 'pdays'
* Có đến 96.3% bộ dữ liệu thuộc tính 'pdays' có giá trị '999' - ghi nhận lại đây là những khách hàng chưa được liên hệ bằng call trước đó.
> Vì vậy đối với biến này, ta sẽ biến đổi phân loại thành 3 nhóm: 'not_previously_contacted', 'within_a_week', 'over_a_week'

In [8]:
def visualize_numerical(df, column, target = None):

    fig, (ax1,ax2) = plt.subplots(nrows = 1, ncols = 2, figsize = (12, 3));

    sns.histplot(df[column], ax=ax1, kde=True);
    ax1.set_xlabel(column);
    ax1.set_ylabel('Density');
    ax1.set_title(f'{column}  Distribution');
    
    if(target == None):
        sns.boxplot(y=column, data=df, showmeans=True, ax=ax2);
        ax2.set_ylabel(column);
    else:
        sns.boxplot(x=target, y=column, data=df, showmeans=True, ax=ax2);
        ax2.set_xlabel('Target');
        ax2.set_ylabel(column);
    
    plt.show()

def visualize_numerical_lst(df, numerical = ['age', 'duration', 'campaign', 'pdays', 'previous', 'emp.var.rate',\
                            'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed'], target = None):
    for column in numerical:
        visualize_numerical(df,column, target)
        print();
        
        
def visualize_categorical(df, column, target = 'y'):
    
        fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20,4))    
        data1 = df.groupby(column).size()
        ax1.pie(x=data1 , autopct="%.2f%%", explode=[0.05]*len(data1), labels=data1.index.tolist(),pctdistance=0.5, radius=1.1)
        ax1.set_title(f'{column}  Distribution')

        data2 = get_col_target(column, target,df)   
        data2.plot(kind='bar',stacked = True, ax=ax2);
        plt.xticks(rotation=45);
        
        plt.show()
    
    
def visualize_categorical_lst(df,categorical = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',\
                                          'month', 'day_of_week', 'poutcome'], target = 'y'):
    
    
    for column in categorical:
        visualize_categorical(df, column)
        
def get_col_target(rows, cols,data):
    
    cols_lst = data[cols].unique().tolist()
    rows_lst = data.groupby(rows)[rows].count().sort_values(ascending = False).index.tolist()

    group_df = data.groupby([rows,cols]).size()
    dic = {}
    for item in cols_lst:
        vals = []
        for i in rows_lst:
            try:
                vals.append(group_df.loc[(i, item)])
            except:
                vals.append(0)
            finally:
                continue
        dic[item] = vals

    df = pd.DataFrame(dic,index = rows_lst)
    return(df)


In [9]:
def remove_duplicated_row(df):
    df = df.drop(df[df.duplicated()].index).reset_index(drop=True)
    return(df)

def remove_features(df,col_lst):
    for col in col_lst:
        df.pop(col)
    return(df)

def replace_missing_by_value(df,column,replaced_value,missing_value='unknown'):
    df[column] = df[column].apply(lambda val: replaced_value if val == missing_value else val)
    return df

def replace_outlier_by_quantile(df, column, quantile_thresh = 0.95, replaced_value = None):
    thresh_value = df[column].quantile(quantile_thresh)
    if (replaced_value == None):
        replaced_value = thresh_value
        
    df[column] = df[column].apply(lambda val: replaced_value if val > thresh_value  else val)
    return df

def replace_outlier_by_value(df, column, value_thresh, replaced_value = None):
    if (replaced_value == None):
        replaced_value = value_thresh
        
    df[column] = df[column].apply(lambda val: replaced_value if val > value_thresh else val)
    return df

def replace_missing_by_mode(df,column,missing_value='unknown'):
    replaced_value = df[column].mode().values.tolist()[0]
    df[column] = df[column].apply(lambda val: replaced_value if val == missing_value else val)
    
    return df

def replace_missing_by_median(df,column,missing_value='unknown'):
    replaced_value = df[column].median().values.tolist()[0]
    df[column] = df[column].apply(lambda val: replaced_value if val == missing_value else val)
    return df

def transform_pdays(val):
    transform_dict = {999:'not_previously_contacted',7: 'over_a_week',0:'within_a_week'}
    for key in transform_dict.keys():
        if (val >= key):
            return transform_dict[key]

def eval_class(true, predicted):
    acc = metrics.accuracy_score(true, predicted)
    precision = metrics.precision_score(true, predicted)
    recall = metrics.recall_score(true, predicted)
    f1 = metrics.f1_score(true, predicted)
    log_loss = metrics.log_loss(true, predicted)
    auc = metrics.roc_auc_score(true, predicted)
    return acc, precision, recall, f1, log_loss, auc

def create_evaluation_df(model_name, y_train,y_train_pred, y_test, y_test_pred):
    eval_clm_metrics = ['Accuracy', 'Precision', 'Recall', 'F1', 'Log_loss','AUC']
    eval_clm_train = [m + '_train' for m in eval_clm_metrics]
    eval_clm_test = [m + '_test' for m in eval_clm_metrics]
    dis_clm = ['Model','Accuracy_train'] + eval_clm_test + ['diff_Acc_train_test']
    dis_clm_1 = ['Model','Accuracy_train','Accuracy_test','Precision_test','Recall_test','F1_test']
  
    res_clm = pd.DataFrame(data=[[model_name,*eval_class(y_train,y_train_pred),
                                 *eval_class(y_test, y_test_pred)]],
                          columns=['Model'] + eval_clm_train + eval_clm_test)
    res_clm['diff_Acc_train_test'] = res_clm.apply(lambda x: (x.Accuracy_test - x.Accuracy_train)/x.Accuracy_train, axis=1)
    return(res_clm[dis_clm_1])

def init_evaluation_df():
    eval_clm_metrics = ['Accuracy', 'Precision', 'Recall', 'F1', 'Log_loss','AUC']
    eval_clm_train = [m + '_train' for m in eval_clm_metrics]
    eval_clm_test = [m + '_test' for m in eval_clm_metrics]
    dis_clm = ['Model','Accuracy_train'] + eval_clm_test + ['diff_Acc_train_test']
    dis_clm_1 = ['Model','Accuracy_train','Accuracy_test','Precision_test','Recall_test','F1_test']
    
    res_clm = pd.DataFrame( columns=['Model'] + eval_clm_train + eval_clm_test + ['diff_Acc_train_test'])

    return(res_clm[dis_clm_1])
        
def data_processing_pipeline(df):
    # remove duplicated rows
    df = remove_duplicated_row(df)
    # remove duration and nr.employed
    remove_cols =['duration', 'nr.employed'] 
    df = remove_features(df,remove_cols)
    # edu_unknown = 'unknown'
    column = 'education'
    replaced_value = df[column].mode().values.tolist()[0]
    df = replace_missing_by_value(df,column,replaced_value)

    # housing_unknown = 'unknown'
    column = 'housing'
    replaced_value  = 'yes' #df[column].mode().values.tolist()[0]
    df = replace_missing_by_value(df,column,replaced_value)

    # loan_unknown = 'unknown'
    column = 'loan'
    replaced_value  = 'no' # df[column].mode().values.tolist()[0]
    df = replace_missing_by_value(df,column,replaced_value)

    # marital_unknown = 'unknown'
    column = 'marital'
    replaced_value  = 'single' # df[column].mode().values.tolist()[0]
    df = replace_missing_by_value(df,column,replaced_value)

    # job_unknown = 'unknown'
    column = 'job'
    replaced_value  = 'student' # df[column].mode().values.tolist()[0]
    df = replace_missing_by_value(df,column,replaced_value)

    ## OUTlier
    # age
    value_thresh = 65
    column = 'age' 
    df = replace_outlier_by_value(df,column,value_thresh)

    # campain
    value_thresh = 6
    column = 'campaign' 
    df = replace_outlier_by_value(df,column,value_thresh)

    #previous
    remove_thresh = float(0.95)
    column = 'previous' 
    df = replace_outlier_by_quantile(df,column)

    #cons.conf.idx'
    remove_thresh = float(0.95)
    column = 'cons.conf.idx' 
    df = replace_outlier_by_quantile(df,column)

    ### PHÂN LOẠI LẠI BIẾN
    # pdays
    column = 'pdays'
    df[column] = df[column].map(transform_pdays)
    
    return df

def label_encode_pipeline(df, cat_col_lst):
    labelencoder = LabelEncoder()
    for column in cat_col_lst:
        df[column] = labelencoder.fit_transform(process_mkt_df[column])
    return(df)

def run_model(name,model, X_train, y_train, X_test, y_test):
    # model_eval_df : evaluation dataframe of model
    # y_test_pred_proba: kiểu np.array - dùng để vẽ roc curve
    
    model_eval_df = pd.DataFrame() # evaluation dataframe
    model.fit(X_train,y_train)
    y_test_pred = model.predict(X_test)
    y_test_pred_proba = model.predict_proba(X_test)[:,1] #Lấy xác suất phần 1 
    y_train_pred = model.predict(X_train)

    model_eval_df = create_evaluation_df(name, y_train,y_train_pred, y_test, y_test_pred)
      
    return(model_eval_df,y_test_pred_proba)    

def run_model_lst(name_lst,model_lst, X_train, y_train, X_test, y_test):
    evalutation_df = init_evaluation_df() # evaluation dataframe
    y_test_proba_df = pd.DataFrame() # y_test_proba for ROC curve

    for model,name in zip(model_lst,name_lst):
        model_eval_df,y_test_pred_proba = run_model(name,model, X_train, y_train, X_test, y_test)
        
        evalutation_df = evalutation_df.append(model_eval_df, ignore_index = True)
        y_test_proba_df[name] = y_test_pred_proba
    
    return(evalutation_df,y_test_proba_df)

### ROC CURVE

def visualize_ROC_curves(y_true,y_pred_proba_df):
    plt.figure(figsize = (15,6))
    plt.plot([0, 1], [0, 1], 'k--')

    # Generate ROC curve values: fpr, tpr, thresholds
    for col in y_pred_proba_df.columns:
        fpr1, tpr1, thresholds1 = metrics.roc_curve(y_test, y_pred_proba_df[col])
        plt.plot(fpr1, tpr1)

    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve: Successful Client Classifiers')
    plt.legend(['Base line']+ y_pred_proba_df.columns.tolist(), loc='lower right')
    plt.show()
    
### Calculate ROI
def calculate_roi(call_cnt, sale_cnt,cost_per_call, roi_per_success):
    return roi_per_success * sale_cnt  - cost_per_call * call_cnt

def get_real_roi(y_test, cost_per_call = 10, roi_per_success = 20):
    sale_cnt = (y_test == 1).sum()
    call_cnt = len(y_test)
    real_roi = calculate_roi(call_cnt, sale_cnt, cost_per_call, roi_per_success)
    return real_roi
    
def get_pred_roi(y_test, y_test_pred,cost_per_call = 10, roi_per_success = 20):
    sale_cnt = ((y_test == 1) & (y_test_pred == 1)).sum()
    call_cnt = sum((y_test_pred == 1))
    pred_roi = calculate_roi(call_cnt, sale_cnt, cost_per_call, roi_per_success)
    return pred_roi

In [10]:
#### L O A D Data
file_path = "data/bank-additional-full.csv"
marketing_df = pd.read_csv(file_path,sep = ";")
# marketing_df.head(2)

In [11]:
#### M A I N 
process_mkt_df = marketing_df.copy() 
test_size = 0.3

## Processing data
process_mkt_df = data_processing_pipeline(process_mkt_df)
cat_cols = process_mkt_df.dtypes[process_mkt_df.dtypes == 'object'].index
num_cols = process_mkt_df.dtypes[process_mkt_df.dtypes != 'object'].index

## label encoding
process_mkt_df = label_encode_pipeline(process_mkt_df, cat_cols)

## list of models
models = [LogisticRegression(max_iter = 300),
#           GaussianNB(),
          DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=0),
#           RandomForestClassifier(n_estimators=1000, max_depth=3),
          GradientBoostingClassifier(n_estimators=1000, learning_rate=0.05),
          XGBClassifier(n_estimators=1000, learning_rate=0.05, use_label_encoder = False)
          ]
names = [ 'Logistic Regressor',
#              'Naive Bayes',
            'Decision Tree Classifier',
#           'Random Forest Classifier',
          'Gradient Boost Classifier',
          'XGBoost Classifier'
        ]

### R U N with the whole data set

In [12]:
#### R U N with the whole data set

## split train set and test_set
X_train, X_test, y_train, y_test = train_test_split(process_mkt_df.drop('y',axis=1), process_mkt_df['y'],
                                                    test_size=test_size, random_state = 101)
## standardize data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## init data frame of evalutation and y_pred_proba
evalutation_df = pd.DataFrame() # evaluation dataframe
y_test_pred_proba_df = pd.DataFrame() # y_test_proba for ROC curve

## run list of Models to choose the optimal model
evalutation_df,y_test_pred_proba_df = run_model_lst(names, models, X_train, y_train, X_test, y_test)
# evalutation_df



### R U N with the resampled data set

In [18]:
def under_resample_data(data, target = 'y'):
    X = data.iloc[:, data.columns != target]
    y = data.iloc[:, data.columns == target]
    # Number of data points in the minority class
    number_records_yes = len(data[data.y == 1])
    yes_indices = np.array(data[data.y == 1].index)

    # Picking the indices of the normal classes
    no_indices = data[data.y == 0].index

    # Out of the indices we picked, randomly select "x" number (number_records_fraud)
    random_no_indices = np.random.choice(no_indices, number_records_yes, replace = False)
    random_no_indices = np.array(random_no_indices)

    # Appending the 2 indices
    under_sample_indices = np.concatenate([yes_indices,random_no_indices])

    # Under sample dataset
    under_sample_data = data.iloc[under_sample_indices,:]
    
    return( under_sample_data)

#### R U N with the resampled data set

## xử lý imbalanced
data = process_mkt_df.copy()
target = 'y'
under_data = under_resample_data(data)

## Split train and test undersampled dataset
X_under_train, X_under_test, y_under_train, y_under_test = train_test_split(under_data.drop(target,axis=1),under_data[target],
                                                                            test_size=test_size, random_state = 101)

# Showing ratio
print("Percentage of no clients: ", len(under_data[under_data[target] == 0])/len(under_data))
print("Percentage of yes clients: ", len(under_data[under_data[target] == 1])/len(under_data))
print("Total number of clients in resampled data: ", len(under_data))

print("")
print("Number transactions train dataset: ", len(X_under_train))
print("Number transactions test dataset: ", len(X_under_test))
print("Total number of transactions: ", len(X_under_train)+len(X_under_test))

## Standardize data
scaler = MinMaxScaler()
X_under_train = scaler.fit_transform(X_under_train)
X_under_test = scaler.transform(X_under_test)

Percentage of no clients:  0.5
Percentage of yes clients:  0.5
Total number of clients in resampled data:  9278

Number transactions train dataset:  6494
Number transactions test dataset:  2784
Total number of transactions:  9278


In [22]:
## run model list
suffix = ' with under resampled data'
under_names = [name + suffix for name in names ]
## init data frame of evalutation and y_pred_proba
under_evalutation_df = pd.DataFrame() # evaluation dataframe
y_under_test_pred_proba_df = pd.DataFrame() # y_test_proba for ROC curve

under_evalutation_df,y_under_test_pred_proba_df= run_model_lst(under_names, models, X_under_train, y_under_train
                                                               , X_under_test, y_under_test)

# under_evalutation_df



Unnamed: 0,Model,Accuracy_train,Accuracy_test,Precision_test,Recall_test,F1_test
0,Logistic Regressorwith under resampled data,0.728981,0.720546,0.752475,0.656115,0.700999
1,Decision Tree Classifierwith under resampled data,0.739914,0.72342,0.769565,0.636691,0.69685
2,Gradient Boost Classifierwith under resampled ...,0.785032,0.738147,0.793778,0.642446,0.710139
3,XGBoost Classifierwith under resampled data,0.927626,0.715158,0.744472,0.653957,0.696285


In [24]:
evaluations = pd.DataFrame()
evaluations = evalutation_df.append(under_evalutation_df, ignore_index = True)
evaluations

Unnamed: 0,Model,Accuracy_train,Accuracy_test,Precision_test,Recall_test,F1_test
0,Logistic Regressor,0.900219,0.900348,0.7051,0.224576,0.340653
1,Decision Tree Classifier,0.900045,0.898405,0.704835,0.195621,0.306247
2,Gradient Boost Classifier,0.914374,0.897758,0.621622,0.27613,0.382396
3,XGBoost Classifier,0.942303,0.893224,0.567268,0.288842,0.38278
4,Logistic Regressorwith under resampled data,0.728981,0.720546,0.752475,0.656115,0.700999
5,Decision Tree Classifierwith under resampled data,0.739914,0.72342,0.769565,0.636691,0.69685
6,Gradient Boost Classifierwith under resampled ...,0.785032,0.738147,0.793778,0.642446,0.710139
7,XGBoost Classifierwith under resampled data,0.927626,0.715158,0.744472,0.653957,0.696285


### ROC curve for the group of models with undersampled data
1. Mặc dù accuracy score chạy trên toàn tập data cao hơn, Kết quả chạy với under sample data set lại cho độ Precision, Recall, F1 score tốt hơn và accuracy score trên tập train và test của undersampled data cho kết quả khá khả quan (>70%).
--> Sẽ chọn cách undersample data (Mô hình còn có thể cải thiện kết quả tiếp nếu ta cross-validaion, điều này sẽ được cải tiến ở phase sau)

2. Vẽ đường ROC curve cho các trường hợp train model trên tập undersampled data

In [None]:
plt.figure(figsize = (15,6))
plt.plot([0, 1], [0, 1], 'k--')

# Generate ROC curve values: fpr, tpr, thresholds
for col in y_test_pred_proba_df.columns:
    fpr1, tpr1, thresholds1 = metrics.roc_curve(y_test_undersample, y_test_pred_proba_df[col])
    plt.plot(fpr1, tpr1)

fpr, tpr, thresholds = metrics.roc_curve(y_test, y_retest_pred_proba_df1)
plt.plot(fpr, tpr)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Successful Client Classifiers')
plt.legend(['Base line']+ y_test_pred_proba_df.columns.tolist()+['Gradient_Raw'], loc='lower right')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_curve
plt.figure(figsize = (15,6))
# plt.plot([0, 1], [0, 1], 'k--')

# Generate ROC curve values: fpr, tpr, thresholds
for col in y_test_pred_proba_df.columns:
    precision1, recall1, thresholds = precision_recall_curve(y_test_undersample, y_test_pred_proba_df[col])
    plt.plot(recall1,precision1)

precision, recall, thresholds = precision_recall_curve(y_test, y_retest_pred_proba_df1)
plt.plot(recall, precision)

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall example')
plt.legend(y_test_pred_proba_df.columns.tolist()+['Gradient_Raw'], loc='lower right')
plt.show()

In [38]:
#!pip install --user imbalanced-learn
# Thử over_sample
# import numpy as np
# from imblearn.over_sampling import RandomOverSampler
# from imblearn.under_sampling import RandomUnderSampler

# ros = RandomOverSampler(random_state=123)
# X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)
# re_eval_df,y_retest_pred_proba_df = run_model(names[1], models[1], X_train_resampled, y_train_resampled, X_test, y_test)
# print(re_eval_df)


In [47]:
# Thử under_sample
# from imblearn.under_sampling import RandomUnderSampler
# rus = RandomUnderSampler(random_state=123)
# X_train_undersampled, y_train_undersampled = rus.fit_resample(X_train, y_train)
# un_eval_df,y_untest_pred_proba_df = run_model(names[1], models[1], X_train_undersampled, y_train_undersampled, X_test, y_test)
# print(un_eval_df)

In [23]:
### Real R O I trên tập Test
#ROI = 20 * # of sales - 10 * # of calls

number_client = len(y_test)
real_roi = get_real_roi(y_test)
pred_roi = get_pred_roi(y_test, y_test_pred)

print('Number of client: '+str(number_client))
print('real_roi: '+str(real_roi))
print('pred_roi: '+str(pred_roi))
print('ROI: '+str(pred_roi - real_roi))

Number of client: 12353
real_roi: -95210
pred_roi: 970
ROI: 96180


In [72]:
### Hệ số mô hình của gradient
col_lst = [i for i in process_mkt_df.columns.values.tolist() if i!= 'y']

optimal_feature_importance = pd.Series(data = optimal_model.feature_importances_, index = col_lst, name = 'Gradient Boost CLF')
optimal_feature_importance.sort_values()

housing           0.031326
education         0.034511
job               0.035344
age               0.035990
campaign          0.036638
marital           0.037144
emp.var.rate      0.038015
loan              0.038247
day_of_week       0.041355
previous          0.042591
cons.price.idx    0.043687
default           0.050095
contact           0.065515
cons.conf.idx     0.070622
poutcome          0.079831
month             0.080324
euribor3m         0.117956
pdays             0.120809
Name: Gradient Boost CLF, dtype: float32