In [1]:
%pip install optuna
%pip install bayesian-optimization

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/1a/18/b49ca91cf592747e19f2d333c2a86cd7c81895b922a5a09adf6335471576/optuna-2.8.0-py3-none-any.whl (301kB)
[K     |█                               | 10kB 11.8MB/s eta 0:00:01[K     |██▏                             | 20kB 16.0MB/s eta 0:00:01[K     |███▎                            | 30kB 19.6MB/s eta 0:00:01[K     |████▍                           | 40kB 22.6MB/s eta 0:00:01[K     |█████▍                          | 51kB 25.0MB/s eta 0:00:01[K     |██████▌                         | 61kB 25.1MB/s eta 0:00:01[K     |███████▋                        | 71kB 26.2MB/s eta 0:00:01[K     |████████▊                       | 81kB 26.2MB/s eta 0:00:01[K     |█████████▊                      | 92kB 22.6MB/s eta 0:00:01[K     |██████████▉                     | 102kB 23.3MB/s eta 0:00:01[K     |████████████                    | 112kB 23.3MB/s eta 0:00:01[K     |█████████████                   | 122kB 23.3MB

In [2]:
import numpy as np
import pandas as pd
from imblearn.combine import SMOTEENN 
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import StratifiedKFold,KFold,train_test_split
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,precision_score,recall_score,roc_auc_score,f1_score
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier,StackingClassifier,VotingClassifier,BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
import xgboost as xgb
import optuna
from bayes_opt import BayesianOptimization
from functools import partial
import pickle
import warnings
warnings.filterwarnings('ignore')



In [3]:
def fill_gender(cols):
    gender = cols[0]
    married = cols[1]
    if pd.isnull(gender):
        if pd.isnull(married) or married == 'Yes':
            return 'Male'
        else:
            return 'Female'
    else:
        return gender

def fill_married(cols):
    gender = cols[0]
    married = cols[1]
    if pd.isnull(married):
        if pd.isnull(gender) or gender == 'Male':
            return 'Yes'
        else:
            return 'No'
    else:
        return married

def fill_loan_amount(cols):
    pa = cols[0]
    edu = cols[1]
    se = cols[2]
    la = cols[3]
    temp = df.groupby(['Property_Area','Education','Self_Employed'])['LoanAmount'].median()
    temp = temp.to_dict()
    if pd.isna(la) and (pa,edu,se) in temp:
        return temp[(pa,edu,se)]
    else:
        return la

def coaplicant_share(cols):
    x = cols[0]
    y = cols[1]
    if y == 0:
        return 'No Share'
    elif x == y :
        return 'Same Share'
    elif x > y and y >= x * 0.85 :
        return 'Lesser But Similar Share'
    elif y > x and y <= x * 1.15:
        return 'More But Similar Share'
    elif x > y :
        return 'Lesser Share'
    else:
        return 'More Share'

def coaplicant_share_similar(cols):
    x = cols[0]
    y = cols[1]
    if x == y or (x > y and y >= x * 0.85) or (y > x and y <= x * 1.15):
        return 'Similar'
    else:
        return 'Not Similar'

def encode(df,test):
    cols = ['Gender', 'Married', 'Education','Self_Employed','Property_Area', 
            'Coapplicant_Share_Similarity','Has_Dependents']
    if not test:
        df.Loan_Status = df.Loan_Status.apply(lambda x : 1 if x == 'Y' else 0)
    df.Married = df.Married.apply(lambda x : 'Married' if x == 'Yes' else 'Not Married')
    df.Self_Employed = df.Self_Employed.apply(lambda x : 'Self Employed' if x == 'Yes' else 'Not Self Employed')
    df.Has_Dependents = df.Has_Dependents.apply(lambda x : 'Has Dependents' if x == 'Yes' else 'No Dependents')
    df = df.drop(['Loan_ID','Coapplicant_Share','Has_Coapplicant','Dependents'],axis=1)
    for col in cols :
        temp = pd.get_dummies(df[col],drop_first=True)
        df = pd.concat([df,temp],axis=1)
    df = df.drop(cols,axis=1)
    return df

def remove_outliers(df,q=0.99):
    df = df[df.ApplicantIncome < df.ApplicantIncome.quantile(q)]
    df = df[df.CoapplicantIncome < df.CoapplicantIncome.quantile(q)]
    df = df[df.LoanAmount < df.LoanAmount.quantile(q)]
    return df

def balance_data(X , Y , balancer) :
  features_balanced, labels_balanced = balancer.fit_resample(X, Y)
  return features_balanced, labels_balanced

def pre_process(df,test=False,balance=False,balancer=RandomUnderSampler()):
    # df['Gender'] = df[['Gender','Married']].apply(fill_gender,axis=1)
    # df['Married'] = df[['Gender','Married']].apply(fill_married,axis=1)
    df['Gender'] = df['Gender'].fillna(df.Gender.mode()[0])
    df['Married'] = df['Married'].fillna(df.Married.mode()[0])
    df['Dependents'] = df['Dependents'].fillna(df.Dependents.mode()[0])
    df['Self_Employed'] = df['Self_Employed'].fillna(df.Self_Employed.mode()[0])
    df['Loan_Amount_Term'] = df['Loan_Amount_Term'].fillna(df.Loan_Amount_Term.mode()[0])
    df['Credit_History'] = df['Credit_History'].fillna(df.Credit_History.mode()[0])
    # df['LoanAmount'] = df[['Property_Area','Education','Self_Employed','LoanAmount']].apply(fill_loan_amount,axis=1)
    df['LoanAmount'] = df['LoanAmount'].fillna(df.LoanAmount.median())
    df['Has_Coapplicant'] = df.CoapplicantIncome.map(lambda x : 'Yes' if x != 0 else 'No')
    df['Coapplicant_Share'] = df[['ApplicantIncome','CoapplicantIncome']].apply(coaplicant_share,axis=1)
    df['Coapplicant_Share_Similarity'] = df[['ApplicantIncome','CoapplicantIncome']].apply(coaplicant_share_similar,axis=1)
    df['Has_Dependents'] = df['Dependents'].map(lambda x : 'No' if str(x) == '0' else 'Yes')
    df = encode(df,test)
    if not test:
        df = remove_outliers(df,q=0.95)
    X = df.drop('Loan_Status',axis=1)
    cols = X.columns
    X = X.values
    Y = df[['Loan_Status']].values
    if not test and balance :
        X,Y = balance_data(X , Y , balancer)
    return X,Y,cols

In [4]:
df = pd.read_csv('train.csv')
X,Y,cols = pre_process(df,balance=True,balancer=SMOTEENN())

In [5]:
def get_confusion_matrix(model,X,Y) :
    x_train,x_test,y_train,y_test = train_test_split(X,Y,random_state=101,test_size=0.2)
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    print('Metrics')
    print("*" * 50)
    print(f'Accuracy        - {str(np.round(accuracy_score(y_pred,y_test) * 100 , 2))}')
    print(f'Precision       - {str(np.round(precision_score(y_pred,y_test) * 100 , 2))}')
    print(f'Recall          - {str(np.round(recall_score(y_pred,y_test) * 100 , 2))}')
    print(f'F1 Score        - {str(np.round(f1_score(y_pred,y_test) * 100 , 2))}')
    print(f'ROC AUC Score   - {str(np.round(roc_auc_score(y_pred,y_test) * 100 , 2))}')
    print('\nConfusion Matrix')
    print("*" * 50)
    print(confusion_matrix(y_test,y_pred))
    print()

def evaluate_model(model,X,Y):
    accuracy = []
    precision = []
    recall = []
    f1 = []
    rocauc = []
    skf = StratifiedKFold(n_splits=10)
    for train_index, test_index in skf.split(X, Y):
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        model.fit(x_train,y_train)
        y_pred = model.predict(x_test)
        accuracy.append(accuracy_score(y_pred,y_test))
        precision.append(precision_score(y_pred,y_test))
        recall.append(recall_score(y_pred,y_test))
        f1.append(f1_score(y_pred,y_test))
        rocauc.append(roc_auc_score(y_pred,y_test))
    get_confusion_matrix(model,X,Y)
    print('\nCross Validation Metrics')
    print("*" * 50)
    print(f'Accuracy        - {str(np.round(np.mean(accuracy) * 100 , 2))}')
    print(f'Precision       - {str(np.round(np.mean(precision) * 100 , 2))}')
    print(f'Recall          - {str(np.round(np.mean(recall) * 100 , 2))}')
    print(f'F1 Score        - {str(np.round(np.mean(f1) * 100 , 2))}')
    print(f'ROC AUC Score   - {str(np.round(np.mean(rocauc) * 100 , 2))}')

In [6]:
evaluate_model(RandomForestClassifier(),X,Y)

Metrics
**************************************************
Accuracy        - 92.45
Precision       - 96.88
Recall          - 91.18
F1 Score        - 93.94
ROC AUC Score   - 92.96

Confusion Matrix
**************************************************
[[18  3]
 [ 1 31]]


Cross Validation Metrics
**************************************************
Accuracy        - 88.76
Precision       - 95.71
Recall          - 87.41
F1 Score        - 90.45
ROC AUC Score   - 91.6


In [7]:
evaluate_model(ExtraTreesClassifier(),X,Y)

Metrics
**************************************************
Accuracy        - 90.57
Precision       - 93.75
Recall          - 90.91
F1 Score        - 92.31
ROC AUC Score   - 90.45

Confusion Matrix
**************************************************
[[18  3]
 [ 2 30]]


Cross Validation Metrics
**************************************************
Accuracy        - 87.96
Precision       - 92.8
Recall          - 87.61
F1 Score        - 89.35
ROC AUC Score   - 89.82


In [8]:
evaluate_model(XGBClassifier(),X,Y)

Metrics
**************************************************
Accuracy        - 92.45
Precision       - 90.62
Recall          - 96.67
F1 Score        - 93.55
ROC AUC Score   - 91.81

Confusion Matrix
**************************************************
[[20  1]
 [ 3 29]]


Cross Validation Metrics
**************************************************
Accuracy        - 91.37
Precision       - 95.66
Recall          - 89.37
F1 Score        - 92.19
ROC AUC Score   - 92.21


In [9]:
def XGB_Accuracy(preds, dtrain):
    labels = dtrain.get_label()
    return 'acc', accuracy_score(preds, labels)
  
dtrain = xgb.DMatrix(X, Y, feature_names=cols)

def XGB_HP(n_estimators,max_depth, subsample, colsample_bytree,min_child_weight, gamma ):
    params = {
        'eta': 0.1,
        'objective': 'binary:hinge',
        'eval_metric':'auc', 
        'silent': 1,
        'booster':'dart'
     }
    params['n_estimators'] = int(round(n_estimators))
    params['max_depth'] = int(round(max_depth))
    params['subsample'] = max(min(subsample, 1), 0)
    params['colsample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['min_child_weight'] = int(min_child_weight)
    params['gamma'] = max(gamma, 0)
    scores = xgb.cv(
        params , 
        dtrain , 
        num_boost_round = 1000 ,
        verbose_eval = False , 
        early_stopping_rounds = 10 , 
        feval = XGB_Accuracy , 
        maximize = True , 
        nfold = 10,
        stratified=True
    )
    return  scores['test-acc-mean'].iloc[-1]

param_space = {
    'min_child_weight':(2, 20),
    'gamma':(0, 5),
    'subsample':(0.5, 1),
    'colsample_bytree':(0.1, 1),
    'max_depth': (3, 50),
    'n_estimators':(50,500)
}


optimizer = BayesianOptimization(
    XGB_HP , 
    param_space , 
    random_state = 7
)
                                  

optimizer.maximize(
    init_points = 30 , 
    n_iter = 60
)

|   iter    |  target   | colsam... |   gamma   | max_depth | min_ch... | n_esti... | subsample |
-------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.8598  [0m | [0m 0.1687  [0m | [0m 3.9     [0m | [0m 23.61   [0m | [0m 15.02   [0m | [0m 490.1   [0m | [0m 0.7692  [0m |
| [95m 2       [0m | [95m 0.8902  [0m | [95m 0.551   [0m | [95m 0.3603  [0m | [95m 15.62   [0m | [95m 11.0    [0m | [95m 355.7   [0m | [95m 0.9019  [0m |
| [0m 3       [0m | [0m 0.8712  [0m | [0m 0.4428  [0m | [0m 0.3297  [0m | [0m 16.54   [0m | [0m 18.37   [0m | [0m 146.0   [0m | [0m 0.7261  [0m |
| [0m 4       [0m | [0m 0.841   [0m | [0m 0.9381  [0m | [0m 0.1245  [0m | [0m 31.23   [0m | [0m 19.1    [0m | [0m 153.6   [0m | [0m 0.7742  [0m |
| [0m 5       [0m | [0m 0.841   [0m | [0m 0.9182  [0m | [0m 0.6658  [0m | [0m 27.6    [0m | [0m 15.51   [0m | [0m 351.1   [0m | [0m 0

In [10]:
print(optimizer.max['params'])

{'colsample_bytree': 0.45848111335008956, 'gamma': 0.04563705434712684, 'max_depth': 18.18354524230952, 'min_child_weight': 2.41665691095497, 'n_estimators': 80.7500951434135, 'subsample': 0.9433200825841126}


In [27]:
params = {
    'colsample_bytree': 0.45848111335008956, 'gamma': 0.04563705434712684, 
    'max_depth': 18, 'min_child_weight': 2.41665691095497, 
    'subsample':0.9433200825841126,'n_estimators': 82,'eta': 0.1,
    'objective': 'binary:hinge', 'eval_metric':'auc', 'silent': 1,
    'booster':'dart'
}

model_xgb = XGBClassifier(**params)

In [28]:
evaluate_model(model_xgb,X,Y)

Metrics
**************************************************
Accuracy        - 92.45
Precision       - 93.75
Recall          - 93.75
F1 Score        - 93.75
ROC AUC Score   - 92.11

Confusion Matrix
**************************************************
[[19  2]
 [ 2 30]]


Cross Validation Metrics
**************************************************
Accuracy        - 91.0
Precision       - 94.29
Recall          - 90.71
F1 Score        - 92.08
ROC AUC Score   - 91.76


In [29]:
filename = "/content/drive/MyDrive/AR/model_xgb.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_xgb, file)

In [30]:
model_xgb_bgg = BaggingClassifier(model_xgb,n_estimators=15)

evaluate_model(model_xgb_bgg,X,Y)

Metrics
**************************************************
Accuracy        - 98.11
Precision       - 100.0
Recall          - 96.97
F1 Score        - 98.46
ROC AUC Score   - 98.48

Confusion Matrix
**************************************************
[[20  1]
 [ 0 32]]


Cross Validation Metrics
**************************************************
Accuracy        - 91.0
Precision       - 96.43
Recall          - 89.03
F1 Score        - 92.17
ROC AUC Score   - 92.26


In [31]:
filename = "/content/drive/MyDrive/AR/model_xgb_bgg.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_xgb_bgg, file)

In [32]:
def optimize(trial , x , y) :  
    max_features = trial.suggest_uniform('max_features' , 0.01 , 1)
    max_depth = trial.suggest_int('max_depth' , 3 , 50)
    min_samples_leaf = trial.suggest_int('min_samples_leaf' , 2 , 10)
    min_samples_split = trial.suggest_int('min_samples_split' , 2 , 10)
    n_estimators = trial.suggest_int('n_estimators' , 50 , 1000)
    model = RandomForestClassifier(
        max_features = max_features ,
        max_depth = max_depth ,
        min_samples_leaf = min_samples_leaf ,
        min_samples_split = min_samples_split ,
        n_estimators = n_estimators ,
        n_jobs = -1
    )
    kf = StratifiedKFold(n_splits = 10)
    acc = []
    for idx in kf.split(x , y) :   
        train_idx , test_idx = idx[0] , idx[1]
        x_train = x[train_idx]  
        y_train = y[train_idx] 
        x_test = x[test_idx]
        y_test = y[test_idx]
        model.fit(x_train , y_train)
        pred = model.predict(x_test)
        fold_acc = accuracy_score(y_test , pred)
        acc.append(fold_acc)
    return np.mean(acc)


optimization_function = partial(optimize , x = X , y = Y)

study = optuna.create_study(direction = 'maximize')
study.optimize(optimization_function , n_trials = 25)

[32m[I 2021-06-29 09:06:36,954][0m A new study created in memory with name: no-name-4e09bffc-6674-474b-b492-9b4e9df7e672[0m
[32m[I 2021-06-29 09:06:50,775][0m Trial 0 finished with value: 0.8538461538461538 and parameters: {'max_features': 0.054197497535592255, 'max_depth': 47, 'min_samples_leaf': 4, 'min_samples_split': 6, 'n_estimators': 620}. Best is trial 0 with value: 0.8538461538461538.[0m
[32m[I 2021-06-29 09:07:00,766][0m Trial 1 finished with value: 0.8534188034188034 and parameters: {'max_features': 0.1740616258657362, 'max_depth': 4, 'min_samples_leaf': 5, 'min_samples_split': 8, 'n_estimators': 475}. Best is trial 0 with value: 0.8538461538461538.[0m
[32m[I 2021-06-29 09:07:16,327][0m Trial 2 finished with value: 0.8763532763532764 and parameters: {'max_features': 0.521773436647293, 'max_depth': 27, 'min_samples_leaf': 4, 'min_samples_split': 9, 'n_estimators': 685}. Best is trial 2 with value: 0.8763532763532764.[0m
[32m[I 2021-06-29 09:07:32,925][0m Trial 3 

In [33]:
p = study.best_params
model_rf = RandomForestClassifier(**p)

In [34]:
evaluate_model(model_rf,X,Y)

Metrics
**************************************************
Accuracy        - 90.57
Precision       - 93.75
Recall          - 90.91
F1 Score        - 92.31
ROC AUC Score   - 90.45

Confusion Matrix
**************************************************
[[18  3]
 [ 2 30]]


Cross Validation Metrics
**************************************************
Accuracy        - 89.12
Precision       - 97.86
Recall          - 85.84
F1 Score        - 90.91
ROC AUC Score   - 91.67


In [35]:
filename = "/content/drive/MyDrive/AR/model_rf.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_rf, file)

In [36]:
model_rf_bgg = BaggingClassifier(model_rf,n_estimators=10)

evaluate_model(model_rf_bgg,X,Y)

Metrics
**************************************************
Accuracy        - 90.57
Precision       - 96.88
Recall          - 88.57
F1 Score        - 92.54
ROC AUC Score   - 91.51

Confusion Matrix
**************************************************
[[17  4]
 [ 1 31]]


Cross Validation Metrics
**************************************************
Accuracy        - 87.99
Precision       - 97.86
Recall          - 84.38
F1 Score        - 90.04
ROC AUC Score   - 90.89


In [37]:
filename = "/content/drive/MyDrive/AR/model_rf_bgg.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_rf_bgg, file)

In [38]:
def optimize(trial , x , y) :  
    max_features = trial.suggest_uniform('max_features' , 0.01 , 1)
    max_depth = trial.suggest_int('max_depth' , 3 , 100)
    min_samples_leaf = trial.suggest_int('min_samples_leaf' , 2 , 10)
    min_samples_split = trial.suggest_int('min_samples_split' , 2 , 100)
    n_estimators = trial.suggest_int('n_estimators' , 100 , 2000)
    model = ExtraTreesClassifier(
        max_features = max_features ,
        max_depth = max_depth ,
        min_samples_leaf = min_samples_leaf ,
        min_samples_split = min_samples_split ,
        n_estimators = n_estimators ,
        n_jobs = -1
    )
    kf = StratifiedKFold(n_splits = 10)
    acc = []
    for idx in kf.split(x , y) :   
        train_idx , test_idx = idx[0] , idx[1]
        x_train = x[train_idx]  
        y_train = y[train_idx] 
        x_test = x[test_idx]
        y_test = y[test_idx]
        model.fit(x_train , y_train)
        pred = model.predict(x_test)
        fold_acc = accuracy_score(y_test , pred)
        acc.append(fold_acc)
    return np.mean(acc)


optimization_function = partial(optimize , x = X , y = Y)

study = optuna.create_study(direction = 'maximize')
study.optimize(optimization_function , n_trials = 25)

[32m[I 2021-06-29 09:16:52,694][0m A new study created in memory with name: no-name-24e2b441-6f67-46f5-adee-e2242dd94877[0m
[32m[I 2021-06-29 09:16:57,923][0m Trial 0 finished with value: 0.8108262108262109 and parameters: {'max_features': 0.04334899920963225, 'max_depth': 100, 'min_samples_leaf': 7, 'min_samples_split': 56, 'n_estimators': 303}. Best is trial 0 with value: 0.8108262108262109.[0m
[32m[I 2021-06-29 09:17:03,916][0m Trial 1 finished with value: 0.8074074074074075 and parameters: {'max_features': 0.7074270813514645, 'max_depth': 76, 'min_samples_leaf': 7, 'min_samples_split': 93, 'n_estimators': 324}. Best is trial 0 with value: 0.8108262108262109.[0m
[32m[I 2021-06-29 09:17:16,838][0m Trial 2 finished with value: 0.8643874643874645 and parameters: {'max_features': 0.8706385224330715, 'max_depth': 76, 'min_samples_leaf': 4, 'min_samples_split': 9, 'n_estimators': 758}. Best is trial 2 with value: 0.8643874643874645.[0m
[32m[I 2021-06-29 09:17:22,795][0m Tria

In [39]:
p = study.best_params
model_et = ExtraTreesClassifier(**p)

In [40]:
evaluate_model(model_et,X,Y)

Metrics
**************************************************
Accuracy        - 88.68
Precision       - 90.62
Recall          - 90.62
F1 Score        - 90.62
ROC AUC Score   - 88.17

Confusion Matrix
**************************************************
[[18  3]
 [ 3 29]]


Cross Validation Metrics
**************************************************
Accuracy        - 87.98
Precision       - 95.0
Recall          - 86.13
F1 Score        - 89.67
ROC AUC Score   - 90.3


In [41]:
filename = "/content/drive/MyDrive/AR/model_et.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_et, file)

In [42]:
model_et_bgg = BaggingClassifier(model_et,n_estimators=10)

evaluate_model(model_et_bgg,X,Y)

Metrics
**************************************************
Accuracy        - 90.57
Precision       - 93.75
Recall          - 90.91
F1 Score        - 92.31
ROC AUC Score   - 90.45

Confusion Matrix
**************************************************
[[18  3]
 [ 2 30]]


Cross Validation Metrics
**************************************************
Accuracy        - 86.07
Precision       - 93.52
Recall          - 84.25
F1 Score        - 87.95
ROC AUC Score   - 88.56


In [43]:
filename = "/content/drive/MyDrive/AR/model_et_bgg.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_et_bgg, file)

In [44]:
estimators = [('rf',model_rf),('xgb',model_xgb)]
final_estimator = model_et
model_stacked = StackingClassifier(estimators=estimators,final_estimator=final_estimator)
evaluate_model(model_stacked,X,Y)

Metrics
**************************************************
Accuracy        - 92.45
Precision       - 90.62
Recall          - 96.67
F1 Score        - 93.55
ROC AUC Score   - 91.81

Confusion Matrix
**************************************************
[[20  1]
 [ 3 29]]


Cross Validation Metrics
**************************************************
Accuracy        - 90.63
Precision       - 94.29
Recall          - 90.17
F1 Score        - 91.78
ROC AUC Score   - 91.45


In [45]:
filename = "/content/drive/MyDrive/AR/model_stacked.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_stacked, file)

In [46]:
estimators = [('rf',model_rf),('xgb',model_xgb), ('et',model_et)]
final_estimator = LogisticRegression()
model_stacked = StackingClassifier(estimators=estimators,final_estimator=final_estimator)
evaluate_model(model_stacked,X,Y)

Metrics
**************************************************
Accuracy        - 92.45
Precision       - 90.62
Recall          - 96.67
F1 Score        - 93.55
ROC AUC Score   - 91.81

Confusion Matrix
**************************************************
[[20  1]
 [ 3 29]]


Cross Validation Metrics
**************************************************
Accuracy        - 91.75
Precision       - 95.71
Recall          - 90.79
F1 Score        - 92.82
ROC AUC Score   - 92.6


In [47]:
filename = "/content/drive/MyDrive/AR/model_stacked_lr.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_stacked, file)

In [48]:
estimators = [('rf',model_rf),('et',model_et),('xgb',model_xgb)]
model_voting = VotingClassifier(estimators=estimators,voting='soft',weights=[3,4,2])
evaluate_model(model_voting,X,Y)

Metrics
**************************************************
Accuracy        - 90.57
Precision       - 90.62
Recall          - 93.55
F1 Score        - 92.06
ROC AUC Score   - 89.96

Confusion Matrix
**************************************************
[[19  2]
 [ 3 29]]


Cross Validation Metrics
**************************************************
Accuracy        - 91.0
Precision       - 98.57
Recall          - 87.87
F1 Score        - 92.47
ROC AUC Score   - 93.06


In [49]:
filename = "/content/drive/MyDrive/AR/model_voting.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_voting, file)

In [50]:
estimators = [('rf_bgg',model_rf_bgg),('et_bgg',model_et_bgg),('xgb_bgg',model_xgb_bgg)]
model_voting_bgg = VotingClassifier(estimators=estimators,voting='soft',weights=[2,4,1])
evaluate_model(model_voting_bgg,X,Y)

Metrics
**************************************************
Accuracy        - 92.45
Precision       - 96.88
Recall          - 91.18
F1 Score        - 93.94
ROC AUC Score   - 92.96

Confusion Matrix
**************************************************
[[18  3]
 [ 1 31]]


Cross Validation Metrics
**************************************************
Accuracy        - 89.5
Precision       - 97.86
Recall          - 86.45
F1 Score        - 91.22
ROC AUC Score   - 91.99


In [51]:
filename = "/content/drive/MyDrive/AR/model_voting_bgg.pkl"  

with open(filename, 'wb') as file:  
    pickle.dump(model_voting_bgg, file)