In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.ensemble import RandomForestClassifier as RFC 
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import KFold
from imblearn.over_sampling import SMOTE
from sklearn.metrics import *
import random

In [4]:
df = pd.read_csv('Dataset/rgb_img_dataset_test_colorfulness.csv')

In [5]:
df = df.drop('filename', axis=1)

In [6]:
df

Unnamed: 0,r_range_1,g_range_1,b_range_1,proportion_1,r_range_2,g_range_2,b_range_2,proportion_2,r_range_3,g_range_3,...,sling,shorts,trousers,skirt,short_sleeved_dress,long_sleeved_dress,vest_dress,sling_dress,colorfulness,label
0,16-31,16-31,16-31,0.303225,0-15,0-15,0-15,0.235109,0-15,0-15,...,0.0,0.0,0.000000,0.887868,0.0,0.0,0.0,0.0,35.539986,0
1,16-31,16-31,16-31,0.176878,240-255,240-255,240-255,0.091829,224-239,224-239,...,0.0,0.0,0.985530,0.930180,0.0,0.0,0.0,0.0,13.672892,0
2,224-239,224-239,224-239,0.092203,208-223,208-223,208-223,0.082258,224-239,224-239,...,0.0,0.0,0.894071,0.000000,0.0,0.0,0.0,0.0,9.019223,0
3,48-63,32-47,32-47,0.063760,112-127,32-47,0-15,0.059953,128-143,48-63,...,0.0,0.0,0.000000,0.939734,0.0,0.0,0.0,0.0,64.195108,0
4,0-15,0-15,0-15,0.127613,192-207,176-191,160-175,0.112392,192-207,176-191,...,0.0,0.0,0.732579,0.000000,0.0,0.0,0.0,0.0,25.580210,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,0-15,0-15,16-31,0.382456,0-15,16-31,16-31,0.152202,112-127,96-111,...,0.0,0.0,0.000000,0.993711,0.0,0.0,0.0,0.0,21.898130,0
603,32-47,32-47,32-47,0.196324,192-207,192-207,192-207,0.066635,176-191,176-191,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,19.477787,0
604,64-79,112-127,128-143,0.030152,48-63,112-127,144-159,0.028196,64-79,112-127,...,0.0,0.0,0.877317,0.000000,0.0,0.0,0.0,0.0,44.893097,0
605,32-47,32-47,0-15,0.134387,32-47,32-47,16-31,0.129191,16-31,32-47,...,0.0,0.0,0.999213,0.000000,0.0,0.0,0.0,0.0,38.425786,0


In [7]:
def convert_range_to_order(val):
    result = int(val.split('-')[0])/16
    return int(result)

In [8]:
for col_name in df.columns:
    if('_range_' in col_name):
        df[col_name] = df[col_name].apply(lambda x: convert_range_to_order(x))

In [9]:
df

Unnamed: 0,r_range_1,g_range_1,b_range_1,proportion_1,r_range_2,g_range_2,b_range_2,proportion_2,r_range_3,g_range_3,...,sling,shorts,trousers,skirt,short_sleeved_dress,long_sleeved_dress,vest_dress,sling_dress,colorfulness,label
0,1,1,1,0.303225,0,0,0,0.235109,0,0,...,0.0,0.0,0.000000,0.887868,0.0,0.0,0.0,0.0,35.539986,0
1,1,1,1,0.176878,15,15,15,0.091829,14,14,...,0.0,0.0,0.985530,0.930180,0.0,0.0,0.0,0.0,13.672892,0
2,14,14,14,0.092203,13,13,13,0.082258,14,14,...,0.0,0.0,0.894071,0.000000,0.0,0.0,0.0,0.0,9.019223,0
3,3,2,2,0.063760,7,2,0,0.059953,8,3,...,0.0,0.0,0.000000,0.939734,0.0,0.0,0.0,0.0,64.195108,0
4,0,0,0,0.127613,12,11,10,0.112392,12,11,...,0.0,0.0,0.732579,0.000000,0.0,0.0,0.0,0.0,25.580210,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,0,0,1,0.382456,0,1,1,0.152202,7,6,...,0.0,0.0,0.000000,0.993711,0.0,0.0,0.0,0.0,21.898130,0
603,2,2,2,0.196324,12,12,12,0.066635,11,11,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,19.477787,0
604,4,7,8,0.030152,3,7,9,0.028196,4,7,...,0.0,0.0,0.877317,0.000000,0.0,0.0,0.0,0.0,44.893097,0
605,2,2,0,0.134387,2,2,1,0.129191,1,2,...,0.0,0.0,0.999213,0.000000,0.0,0.0,0.0,0.0,38.425786,0


In [10]:
df['label'].value_counts()

0    455
1    152
Name: label, dtype: int64

In [27]:
def model_training(df, training_size, param): 
    # training_size is ratio of training data for train-test split (float)
    # param is set of parameter for model training (dict)
    
    X = df.drop('label', axis=1)
    y = df['label']
    X_train, X_test, y_train, y_test = tts(X, y, train_size=training_size, random_state=42)
    
    sm = SMOTE(sampling_strategy = 1, random_state=42)
    X_res, y_res = sm.fit_resample(X_train, y_train.ravel())
    
    if(param['model']=='GBC'):
        clf = GBC(max_depth=param['max_depth'], 
                  criterion=param['criterion'],
                  min_impurity_decrease=param['min_impurity_decrease'],
                  min_samples_leaf=param['min_samples_leaf'], random_state=42).fit(X_res, y_res)
    elif(param['model']=='RFC'):
        clf = RFC(max_depth=param['max_depth'], 
                  criterion=param['criterion'],
                  min_impurity_decrease=param['min_impurity_decrease'],
                  min_samples_leaf=param['min_samples_leaf'], random_state=42).fit(X_res, y_res)
    # clf = GBC(set_of_parameter)
    y_pred = clf.predict(X_test)
    
    print('Confusion Matrix')
    print(confusion_matrix(y_test, y_pred))
    print('Accuracy: {:.4f}'.format(accuracy_score(y_test, y_pred)))
    print('Precision: {:.4f}'.format(precision_score(y_test, y_pred)))
    print('Recall: {:.4f}'.format(recall_score(y_test, y_pred)))
    
    return clf, accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred) 

In [12]:
def random_parameter(param_name, model_name=None):
    
    if(param_name=='model'):
        tmp = random.randint(1, 2)
        if(tmp==1):
            return 'GBC'
        elif(tmp==2):
            return 'RFC'
        
    if(param_name=='max_depth'):
        return random.randint(1, 10)
    
    if(param_name=='criterion'):
        if(model_name=='GBC'):
            tmp = random.randint(1, 2)
            if(tmp==1):
                return 'friedman_mse'
            elif(tmp==2):
                return 'mae'
        elif(model_name=='RFC'):
            tmp = random.randint(1, 2)
            if(tmp==1):
                return 'gini'
            elif(tmp==2):
                return 'entropy'
            
    if(param_name=='min_impurity_decrease'):
        return random.random()/10
    
    if(param_name=='min_samples_leaf'):
        if(random.random()<0.5):
            return random.random()/10
        else:
            return random.randint(1, 20)

In [22]:
model_name = random_parameter('model')
param = {'model': model_name,
         'max_depth': random_parameter('max_depth'),
         'criterion': random_parameter('criterion', model_name),
         'min_impurity_decrease': random_parameter('min_impurity_decrease'),
         'min_samples_leaf': random_parameter('min_samples_leaf')}

In [23]:
model = model_training(df, 0.75, param)

Confusion Matrix
[[103  14]
 [ 15  20]]
Accuracy: 0.8092
Precision: 0.5882
Recall: 0.5714


In [24]:
param

{'model': 'GBC',
 'max_depth': 7,
 'criterion': 'friedman_mse',
 'min_impurity_decrease': 0.05083646977292787,
 'min_samples_leaf': 17}

In [29]:
def gen_param():
    model_name = random_parameter('model')
    param = {'model': model_name,
             'max_depth': random_parameter('max_depth'),
             'criterion': random_parameter('criterion', model_name),
             'min_impurity_decrease': random_parameter('min_impurity_decrease'),
             'min_samples_leaf': random_parameter('min_samples_leaf')}
    return param

In [74]:
def gen_param_crossover(param):
    rand_threshold = 0.3
    
    if(random.random()<rand_threshold):
        model_name = random_parameter('model')
    else:
        model_name = param['model']
        
    if(random.random()<rand_threshold):
        max_depth = random_parameter('max_depth')
    else:
        max_depth = param['max_depth']
    
    if(random.random()<rand_threshold):
        criterion = random_parameter('criterion')
    else:
        criterion = param['criterion']
        
    if(random.random()<rand_threshold):
        min_impurity_decrease = random_parameter('min_impurity_decrease')
    else:
        min_impurity_decrease = param['min_impurity_decrease']
        
    if(random.random()<rand_threshold):
        min_samples_leaf = random_parameter('min_samples_leaf')
    else:
        min_samples_leaf = param['min_samples_leaf']
        
    param = {'model': model_name,
             'max_depth': max_depth,
             'criterion': criterion,
             'min_impurity_decrease': min_impurity_decrease,
             'min_samples_leaf': min_samples_leaf}
    
    return param

In [75]:
def gen_1st(param_prev=None, model_prev=None, eval_prev=None):

    if((param_prev is None)&(model_prev is None)):
        params = np.array([None])
        models = np.array([None for i in range(6)])
        eval_val = np.array([0, 0, 0, 0], ndmin=2)
        count = 0
#     else:
#         acc_score, prec_score, rec_score, f1_score = evaluate(model_prev, x_test, y_test)
#         eval_val = np.array([acc_score, prec_score, rec_score, f1_score], ndmin=2)
#         params = param_prev.copy()
#         models = np.array([model_prev])
#         count = 1

    while(count<5):
        try:
            param = gen_param()
            print('param: {}'.format(param))
            model, acc_score, prec_score, rec_score, f1_score = model_training(df, 0.75, param)
            eval_val = np.append(eval_val, np.array([acc_score, prec_score, rec_score, f1_score], ndmin=2), axis=0)
            params = np.append(params, [param])
            # models = np.append(models, [model])
            models[count+1] = model
            count = count + 1

        except:
            continue

    return params, models, eval_val


In [76]:
def gen_nth(param_best, model_best, eval_val_best):
    # params = np.array([param_best])
    # models = np.array([model_best])
    models = np.array([None for i in range(5)])
    params = param_best.copy()
    for i in range(len(model_best)):
        models[i] = model_best[i]
    # models = model_best.copy()
    eval_val = np.array(eval_val_best.copy(), ndmin=2)
    count = len(model_best)
    for i in range(len(model_best)):
        try:
            param = gen_param_crossover(params[i])
            print('param: {}'.format(param))
            network = gen_network(n_layers=n_layer, layers_list=layers_list, dense_layer_node_list=dense_layer_nodes)
            model = model_training(network=network, resampled_ds=resampled_ds, epoch=EPOCHS, resampled_steps_per_epoch=resampled_steps_per_epoch, x_test=x_test, y_test=y_test)

            acc_score, prec_score, rec_score, f1_score = evaluate(model, x_test, y_test)
            eval_val = np.append(eval_val, np.array([acc_score, prec_score, rec_score, f1_score], ndmin=2), axis=0)
            params = np.append(params, [n_layer, layers_list, dense_layer_nodes])
            models = np.append(models, [model])
            count = count + 1
        except:
            continue


    while(count<5):
        try:
            param = gen_param()
            print('param: {}'.format(param))
            model, acc_score, prec_score, rec_score, f1_score = model_training(df, 0.75, param)
            eval_val = np.append(eval_val, np.array([acc_score, prec_score, rec_score, f1_score], ndmin=2), axis=0)
            params = np.append(params, [param])
            # models = np.append(models, [model])
            models[count] = model
            count = count + 1
        except:
            continue

    return params, models, eval_val

In [77]:
def start_from_new(gen=15, n_select_max=3):

    print('GEN 1:')
    params, models, eval_val = gen_1st()

    print('\n\n\nGEN 2:')
    tmp_params = params.copy()
    tmp_models = models.copy()
    tmp_eval_val = eval_val.copy()
    
    param_best_set = tmp_params[list(np.argsort(tmp_eval_val[:, 3])[::-1])][0:n_select_max]
    model_best_set = tmp_models[list(np.argsort(tmp_eval_val[:, 3])[::-1])][0:n_select_max]
    eval_val_best_set = tmp_eval_val[np.argsort(tmp_eval_val[:, 3])[::-1]][0:n_select_max]
    # model_best_set = list(set(model_best_set))
    # eval_val_best_set = list(set(eval_val_best_set))
    params_2nd, models_2nd, eval_val_2nd = gen_nth(param_best=param_best_set, model_best=model_best_set, eval_val_best=eval_val_best_set)

    print('\n\n\nGEN 3:')
    tmp_params = params_2nd.copy()
    tmp_models = models_2nd.copy()
    tmp_eval_val = eval_val_2nd.copy()
    param_best_set = tmp_params[list(np.argsort(tmp_eval_val[:, 3])[::-1])][0:n_select_max]
    model_best_set = tmp_models[list(np.argsort(tmp_eval_val[:, 3])[::-1])][0:n_select_max]
    eval_val_best_set = tmp_eval_val[np.argsort(tmp_eval_val[:, 3])[::-1]][0:n_select_max]
    # model_best_set = list(set(model_best_set))
    # eval_val_best_set = list(set(eval_val_best_set))
    params_3rd, models_3rd, eval_val_3rd = gen_nth(param_best=param_best_set, model_best=model_best_set, eval_val_best=eval_val_best_set)

    params_nth, models_nth, eval_val_nth = params_3rd.copy(), models_3rd.copy(), eval_val_3rd.copy()
    for i in range(gen-3):
        print('\n\n\nGEN {}:'.format(i+4))
        tmp_params = params_nth.copy()
        tmp_models = models_nth.copy()
        tmp_eval_val = eval_val_nth.copy()
        param_best_set = tmp_params[list(np.argsort(tmp_eval_val[:, 3])[::-1])][0:n_select_max]
        model_best_set = tmp_models[list(np.argsort(tmp_eval_val[:, 3])[::-1])][0:n_select_max]
        eval_val_best_set = tmp_eval_val[np.argsort(tmp_eval_val[:, 3])[::-1]][0:n_select_max]
        # model_best_set = list(set(model_best_set))
        # eval_val_best_set = list(set(eval_val_best_set))
        params_nth, models_nth, eval_val_nth = gen_nth(param_best=param_best_set, model_best=model_best_set, eval_val_best=eval_val_best_set)

    return params, models, eval_val, params_2nd, models_2nd, eval_val_2nd, params_3rd, models_3rd, eval_val_3rd, params_nth, models_nth, eval_val_nth

In [90]:
params, models, eval_val, params_2nd, models_2nd, eval_val_2nd, params_3rd, models_3rd, eval_val_3rd, params_nth, models_nth, eval_val_nth = start_from_new(gen=10, n_select_max=1)

GEN 1:
param: {'model': 'RFC', 'max_depth': 8, 'criterion': 'gini', 'min_impurity_decrease': 0.020922905822125325, 'min_samples_leaf': 9}
Confusion Matrix
[[85 32]
 [ 8 27]]
Accuracy: 0.7368
Precision: 0.4576
Recall: 0.7714
param: {'model': 'RFC', 'max_depth': 2, 'criterion': 'gini', 'min_impurity_decrease': 0.00018071948458765475, 'min_samples_leaf': 4}
Confusion Matrix
[[81 36]
 [ 8 27]]
Accuracy: 0.7105
Precision: 0.4286
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 9, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.09853563307817649, 'min_samples_leaf': 0.03459015756575724}
Confusion Matrix
[[99 18]
 [14 21]]
Accuracy: 0.7895
Precision: 0.5385
Recall: 0.6000
param: {'model': 'GBC', 'max_depth': 4, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.04695794901856417, 'min_samples_leaf': 7}
Confusion Matrix
[[101  16]
 [ 13  22]]
Accuracy: 0.8092
Precision: 0.5789
Recall: 0.6286
param: {'model': 'RFC', 'max_depth': 10, 'criterion': 'entropy', 'min_impurity_decrease



Confusion Matrix
[[90 27]
 [ 8 27]]
Accuracy: 0.7697
Precision: 0.5000
Recall: 0.7714
param: {'model': 'RFC', 'max_depth': 8, 'criterion': 'gini', 'min_impurity_decrease': 0.08174910762111, 'min_samples_leaf': 0.05560749019874389}
Confusion Matrix
[[65 52]
 [ 9 26]]
Accuracy: 0.5987
Precision: 0.3333
Recall: 0.7429
param: {'model': 'GBC', 'max_depth': 3, 'criterion': 'mae', 'min_impurity_decrease': 0.0484577775082912, 'min_samples_leaf': 0.07630791469258816}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714



GEN 3:
param: {'model': 'GBC', 'max_depth': 4, 'criterion': 'mae', 'min_impurity_decrease': 0.0010575659941088333, 'min_samples_leaf': 4}
param: {'model': 'GBC', 'max_depth': 1, 'criterion': 'mae', 'min_impurity_decrease': 0.09583726427413683, 'min_samples_leaf': 20}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 4, 'criterion': 'mae', 'min_impurity_decrease': 0.09009556226936269, 'min_samples_leaf': 2}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 3, 'criterion': 'mae', 'min_impurity_decrease': 0.030419141737302736, 'min_samples_leaf': 14}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'RFC', 'max_depth': 9, 'criterion': 'entropy', 'min_impurity_decrease': 0.055124183452580265, 'min_samples_leaf': 4}
Confusion Matrix
[[73 44]
 [ 9 26]]
Accuracy: 0.6513
Precision: 0.3714
Recall: 0.7429



GEN 4:
param: {'model': 'RFC', 'max_depth': 4, 'criterion': 'mae', 'min_impurity_decrease': 0.0010575659941088333, 'min_samples_leaf': 4}
param: {'model': 'RFC', 'max_depth': 2, 'criterion': 'entropy', 'min_impurity_decrease': 0.08229312831419724, 'min_samples_leaf': 0.001981024778941165}
Confusion Matrix
[[66 51]
 [ 9 26]]
Accuracy: 0.6053
Precision: 0.3377
Recall: 0.7429
param: {'model': 'GBC', 'max_depth': 6, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.06702505724323583, 'min_samples_leaf': 0.0976394083458296}
Confusion Matrix
[[94 23]
 [15 20]]
Accuracy: 0.7500
Precision: 0.4651
Recall: 0.5714
param: {'model': 'GBC', 'max_depth': 4, 'criterion': 'mae', 'min_impurity_



Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'RFC', 'max_depth': 3, 'criterion': 'entropy', 'min_impurity_decrease': 0.07657484179281324, 'min_samples_leaf': 16}
Confusion Matrix
[[71 46]
 [ 9 26]]
Accuracy: 0.6382
Precision: 0.3611
Recall: 0.7429



GEN 5:
param: {'model': 'GBC', 'max_depth': 8, 'criterion': 'mae', 'min_impurity_decrease': 0.00287763522155271, 'min_samples_leaf': 0.08779080175851911}
param: {'model': 'GBC', 'max_depth': 8, 'criterion': 'mae', 'min_impurity_decrease': 0.01998090182254777, 'min_samples_leaf': 11}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 1, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.06143536572372551, 'min_samples_leaf': 1}
Confusion Matrix
[[85 32]
 [12 23]]
Accuracy: 0.7105
Precision: 0.4182
Recall: 0.6571
param: {'model': 'RFC', 'max_depth': 10, 'criterion': 'gini', 'min_impurity_decrease': 0.030652191673816543, 'min_samples_leaf': 0.05025176013111948}
Confusion Matrix
[[76 41]
 [ 9 26]]
Accuracy: 0.6711
Precision: 0.3881
Recall: 0.7429
param: {'model': 'RFC', 'max_depth': 8, 'criterion': 'entropy', 'min_impurity_decrease': 0.01308863504369584, 'min_samples_leaf': 3}
Confusion Matrix
[[90 27]
 [ 8 27]]
Accuracy: 0.7697
Precision: 0.5000
Recall: 0.7714



GEN 6:
param: {'model': 'RFC', 'max_depth': 1, 'criterion': None, 'min_impurity_decrease': 0.01308863504369584, 'min_samples_leaf': 8}
param: {'model': 'RFC', 'max_depth': 9, 'criterion': 'entropy', 'min_impurity_decrease': 0.061390



Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 1, 'criterion': 'mae', 'min_impurity_decrease': 0.0990764375921667, 'min_samples_leaf': 2}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 7, 'criterion': 'mae', 'min_impurity_decrease': 0.07381712067115878, 'min_samples_leaf': 0.05060037423346838}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714



GEN 7:
param: {'model': 'GBC', 'max_depth': 3, 'criterion': 'entropy', 'min_impurity_decrease': 0.01308863504369584, 'min_samples_leaf': 3}
param: {'model': 'RFC', 'max_depth': 9, 'criterion': 'gini', 'min_impurity_decrease': 0.006693627838832849, 'min_samples_leaf': 0.030684953039938144}
Confusion Matrix
[[89 28]
 [ 9 26]]
Accuracy: 0.7566
Precision: 0.4815
Recall: 0.7429
param: {'model': 'GBC', 'max_depth': 3, 'criterion': 'mae', 'min_impurity_decrease': 0.020675828098175864, 'min_samples_leaf': 19}




Confusion Matrix
[[71 46]
 [ 8 27]]
Accuracy: 0.6447
Precision: 0.3699
Recall: 0.7714
param: {'model': 'GBC', 'max_depth': 2, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.0794238527584165, 'min_samples_leaf': 0.01272623462690301}
Confusion Matrix
[[88 29]
 [13 22]]
Accuracy: 0.7237
Precision: 0.4314
Recall: 0.6286
param: {'model': 'RFC', 'max_depth': 3, 'criterion': 'gini', 'min_impurity_decrease': 0.0870890013836097, 'min_samples_leaf': 10}
Confusion Matrix
[[55 62]
 [ 7 28]]
Accuracy: 0.5461
Precision: 0.3111
Recall: 0.8000



GEN 8:
param: {'model': 'GBC', 'max_depth': 8, 'criterion': 'entropy', 'min_impurity_decrease': 0.01308863504369584, 'min_samples_leaf': 0.09441884129406951}
param: {'model': 'RFC', 'max_depth': 2, 'criterion': 'gini', 'min_impurity_decrease': 0.00014843902210016636, 'min_samples_leaf': 0.035792844260010594}
Confusion Matrix
[[81 36]
 [ 9 26]]
Accuracy: 0.7039
Precision: 0.4194
Recall: 0.7429
param: {'model': 'RFC', 'max_depth': 2, 'criterion': 'entr

In [91]:
params

array([None,
       {'model': 'RFC', 'max_depth': 8, 'criterion': 'gini', 'min_impurity_decrease': 0.020922905822125325, 'min_samples_leaf': 9},
       {'model': 'RFC', 'max_depth': 2, 'criterion': 'gini', 'min_impurity_decrease': 0.00018071948458765475, 'min_samples_leaf': 4},
       {'model': 'GBC', 'max_depth': 9, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.09853563307817649, 'min_samples_leaf': 0.03459015756575724},
       {'model': 'GBC', 'max_depth': 4, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.04695794901856417, 'min_samples_leaf': 7},
       {'model': 'RFC', 'max_depth': 10, 'criterion': 'entropy', 'min_impurity_decrease': 0.015737724597368608, 'min_samples_leaf': 7}],
      dtype=object)

In [92]:
models

array([None,
       RandomForestClassifier(max_depth=8, min_impurity_decrease=0.020922905822125325,
                              min_samples_leaf=9, random_state=42)                    ,
       RandomForestClassifier(max_depth=2,
                              min_impurity_decrease=0.00018071948458765475,
                              min_samples_leaf=4, random_state=42)         ,
       GradientBoostingClassifier(max_depth=9,
                                  min_impurity_decrease=0.09853563307817649,
                                  min_samples_leaf=0.03459015756575724,
                                  random_state=42)                          ,
       GradientBoostingClassifier(max_depth=4,
                                  min_impurity_decrease=0.04695794901856417,
                                  min_samples_leaf=7, random_state=42)      ,
       RandomForestClassifier(criterion='entropy', max_depth=10,
                              min_impurity_decrease=0.015737724597368608,
 

In [93]:
eval_val

array([[0.        , 0.        , 0.        , 0.        ],
       [0.73684211, 0.45762712, 0.77142857, 0.57446809],
       [0.71052632, 0.42857143, 0.77142857, 0.55102041],
       [0.78947368, 0.53846154, 0.6       , 0.56756757],
       [0.80921053, 0.57894737, 0.62857143, 0.60273973],
       [0.76973684, 0.5       , 0.74285714, 0.59770115]])

In [94]:
params[np.argsort(eval_val[:, 3])[::-1]]

array([{'model': 'GBC', 'max_depth': 4, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.04695794901856417, 'min_samples_leaf': 7},
       {'model': 'RFC', 'max_depth': 10, 'criterion': 'entropy', 'min_impurity_decrease': 0.015737724597368608, 'min_samples_leaf': 7},
       {'model': 'RFC', 'max_depth': 8, 'criterion': 'gini', 'min_impurity_decrease': 0.020922905822125325, 'min_samples_leaf': 9},
       {'model': 'GBC', 'max_depth': 9, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.09853563307817649, 'min_samples_leaf': 0.03459015756575724},
       {'model': 'RFC', 'max_depth': 2, 'criterion': 'gini', 'min_impurity_decrease': 0.00018071948458765475, 'min_samples_leaf': 4},
       None], dtype=object)

In [95]:
models[np.argsort(eval_val[:, 3])[::-1]]

array([GradientBoostingClassifier(max_depth=4,
                                  min_impurity_decrease=0.04695794901856417,
                                  min_samples_leaf=7, random_state=42)      ,
       RandomForestClassifier(criterion='entropy', max_depth=10,
                              min_impurity_decrease=0.015737724597368608,
                              min_samples_leaf=7, random_state=42)       ,
       RandomForestClassifier(max_depth=8, min_impurity_decrease=0.020922905822125325,
                              min_samples_leaf=9, random_state=42)                    ,
       GradientBoostingClassifier(max_depth=9,
                                  min_impurity_decrease=0.09853563307817649,
                                  min_samples_leaf=0.03459015756575724,
                                  random_state=42)                          ,
       RandomForestClassifier(max_depth=2,
                              min_impurity_decrease=0.00018071948458765475,
                

In [96]:
eval_val[np.argsort(eval_val[:, 3])[::-1]]

array([[0.80921053, 0.57894737, 0.62857143, 0.60273973],
       [0.76973684, 0.5       , 0.74285714, 0.59770115],
       [0.73684211, 0.45762712, 0.77142857, 0.57446809],
       [0.78947368, 0.53846154, 0.6       , 0.56756757],
       [0.71052632, 0.42857143, 0.77142857, 0.55102041],
       [0.        , 0.        , 0.        , 0.        ]])

In [97]:
params_2nd

array([{'model': 'GBC', 'max_depth': 4, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.04695794901856417, 'min_samples_leaf': 7},
       {'model': 'RFC', 'max_depth': 1, 'criterion': 'entropy', 'min_impurity_decrease': 0.004975222676409119, 'min_samples_leaf': 0.06636504299549752},
       {'model': 'GBC', 'max_depth': 4, 'criterion': 'mae', 'min_impurity_decrease': 0.0010575659941088333, 'min_samples_leaf': 4},
       {'model': 'RFC', 'max_depth': 8, 'criterion': 'gini', 'min_impurity_decrease': 0.08174910762111, 'min_samples_leaf': 0.05560749019874389},
       {'model': 'GBC', 'max_depth': 3, 'criterion': 'mae', 'min_impurity_decrease': 0.0484577775082912, 'min_samples_leaf': 0.07630791469258816}],
      dtype=object)

In [98]:
models_2nd

array([GradientBoostingClassifier(max_depth=4,
                                  min_impurity_decrease=0.04695794901856417,
                                  min_samples_leaf=7, random_state=42)      ,
       RandomForestClassifier(criterion='entropy', max_depth=1,
                              min_impurity_decrease=0.004975222676409119,
                              min_samples_leaf=0.06636504299549752, random_state=42),
       GradientBoostingClassifier(criterion='mae', max_depth=4,
                                  min_impurity_decrease=0.0010575659941088333,
                                  min_samples_leaf=4, random_state=42)        ,
       RandomForestClassifier(max_depth=8, min_impurity_decrease=0.08174910762111,
                              min_samples_leaf=0.05560749019874389, random_state=42),
       GradientBoostingClassifier(criterion='mae',
                                  min_impurity_decrease=0.0484577775082912,
                                  min_samples_leaf=0.07

In [99]:
eval_val_2nd

array([[0.80921053, 0.57894737, 0.62857143, 0.60273973],
       [0.61184211, 0.34210526, 0.74285714, 0.46846847],
       [0.76973684, 0.5       , 0.77142857, 0.60674157],
       [0.59868421, 0.33333333, 0.74285714, 0.46017699],
       [0.64473684, 0.36986301, 0.77142857, 0.5       ]])

In [100]:
params_2nd[np.argsort(eval_val_2nd[:, 3])[::-1]]

array([{'model': 'GBC', 'max_depth': 4, 'criterion': 'mae', 'min_impurity_decrease': 0.0010575659941088333, 'min_samples_leaf': 4},
       {'model': 'GBC', 'max_depth': 4, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.04695794901856417, 'min_samples_leaf': 7},
       {'model': 'GBC', 'max_depth': 3, 'criterion': 'mae', 'min_impurity_decrease': 0.0484577775082912, 'min_samples_leaf': 0.07630791469258816},
       {'model': 'RFC', 'max_depth': 1, 'criterion': 'entropy', 'min_impurity_decrease': 0.004975222676409119, 'min_samples_leaf': 0.06636504299549752},
       {'model': 'RFC', 'max_depth': 8, 'criterion': 'gini', 'min_impurity_decrease': 0.08174910762111, 'min_samples_leaf': 0.05560749019874389}],
      dtype=object)

In [101]:
models_2nd[np.argsort(eval_val_2nd[:, 3])[::-1]]

array([GradientBoostingClassifier(criterion='mae', max_depth=4,
                                  min_impurity_decrease=0.0010575659941088333,
                                  min_samples_leaf=4, random_state=42)        ,
       GradientBoostingClassifier(max_depth=4,
                                  min_impurity_decrease=0.04695794901856417,
                                  min_samples_leaf=7, random_state=42)      ,
       GradientBoostingClassifier(criterion='mae',
                                  min_impurity_decrease=0.0484577775082912,
                                  min_samples_leaf=0.07630791469258816,
                                  random_state=42)                         ,
       RandomForestClassifier(criterion='entropy', max_depth=1,
                              min_impurity_decrease=0.004975222676409119,
                              min_samples_leaf=0.06636504299549752, random_state=42),
       RandomForestClassifier(max_depth=8, min_impurity_decrease=0.08174910

In [102]:
eval_val_2nd[np.argsort(eval_val_2nd[:, 3])[::-1]]

array([[0.76973684, 0.5       , 0.77142857, 0.60674157],
       [0.80921053, 0.57894737, 0.62857143, 0.60273973],
       [0.64473684, 0.36986301, 0.77142857, 0.5       ],
       [0.61184211, 0.34210526, 0.74285714, 0.46846847],
       [0.59868421, 0.33333333, 0.74285714, 0.46017699]])

In [103]:
params_nth

array([{'model': 'GBC', 'max_depth': 7, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.05067949565776889, 'min_samples_leaf': 5},
       {'model': 'RFC', 'max_depth': 6, 'criterion': 'entropy', 'min_impurity_decrease': 0.037795516307422575, 'min_samples_leaf': 7},
       {'model': 'RFC', 'max_depth': 6, 'criterion': 'gini', 'min_impurity_decrease': 0.02781680474705752, 'min_samples_leaf': 0.03777625014600956},
       {'model': 'GBC', 'max_depth': 1, 'criterion': 'friedman_mse', 'min_impurity_decrease': 0.04304177988155161, 'min_samples_leaf': 15},
       {'model': 'RFC', 'max_depth': 2, 'criterion': 'entropy', 'min_impurity_decrease': 0.08487743423660357, 'min_samples_leaf': 0.04651287615635341}],
      dtype=object)

In [104]:
models_nth

array([GradientBoostingClassifier(max_depth=7,
                                  min_impurity_decrease=0.05067949565776889,
                                  min_samples_leaf=5, random_state=42)      ,
       RandomForestClassifier(criterion='entropy', max_depth=6,
                              min_impurity_decrease=0.037795516307422575,
                              min_samples_leaf=7, random_state=42)       ,
       RandomForestClassifier(max_depth=6, min_impurity_decrease=0.02781680474705752,
                              min_samples_leaf=0.03777625014600956, random_state=42) ,
       GradientBoostingClassifier(max_depth=1,
                                  min_impurity_decrease=0.04304177988155161,
                                  min_samples_leaf=15, random_state=42)     ,
       RandomForestClassifier(criterion='entropy', max_depth=2,
                              min_impurity_decrease=0.08487743423660357,
                              min_samples_leaf=0.04651287615635341, rando

In [105]:
eval_val_nth

array([[0.81578947, 0.58536585, 0.68571429, 0.63157895],
       [0.72368421, 0.44262295, 0.77142857, 0.5625    ],
       [0.70394737, 0.41935484, 0.74285714, 0.53608247],
       [0.70394737, 0.4137931 , 0.68571429, 0.51612903],
       [0.61842105, 0.34666667, 0.74285714, 0.47272727]])

In [47]:
paramList = []
modelList = []
for i in range(5):
    
    model_name = random_parameter('model')
    param = {'model': model_name,
             'max_depth': random_parameter('max_depth'),
             'criterion': random_parameter('criterion', model_name),
             'min_impurity_decrease': random_parameter('min_impurity_decrease'),
             'min_samples_leaf': random_parameter('min_samples_leaf')}
    paramList.append(param)
    model, acc, prec, rec, f1 = model_training(df, 0.75, param)
    

Confusion Matrix
[[67 50]
 [ 9 26]]
Accuracy: 0.6118
Precision: 0.3421
Recall: 0.7429
Confusion Matrix
[[84 33]
 [12 23]]
Accuracy: 0.7039
Precision: 0.4107
Recall: 0.6571
Confusion Matrix
[[81 36]
 [ 8 27]]
Accuracy: 0.7105
Precision: 0.4286
Recall: 0.7714
Confusion Matrix
[[70 47]
 [ 9 26]]
Accuracy: 0.6316
Precision: 0.3562
Recall: 0.7429
Confusion Matrix
[[102  15]
 [ 14  21]]
Accuracy: 0.8092
Precision: 0.5833
Recall: 0.6000


In [48]:
model

GradientBoostingClassifier(max_depth=8,
                           min_impurity_decrease=0.03448843414870003,
                           min_samples_leaf=0.017690285698752705,
                           random_state=42)

In [26]:
paramList

[{'model': 'GBC',
  'max_depth': 5,
  'criterion': 'friedman_mse',
  'min_impurity_decrease': 0.011872030290903612,
  'min_samples_leaf': 16},
 {'model': 'RFC',
  'max_depth': 9,
  'criterion': 'entropy',
  'min_impurity_decrease': 0.004675507482568342,
  'min_samples_leaf': 0.023947207620617326},
 {'model': 'RFC',
  'max_depth': 5,
  'criterion': 'entropy',
  'min_impurity_decrease': 0.020184089922514192,
  'min_samples_leaf': 0.03096172102854059},
 {'model': 'RFC',
  'max_depth': 4,
  'criterion': 'entropy',
  'min_impurity_decrease': 0.05409431946033868,
  'min_samples_leaf': 19},
 {'model': 'RFC',
  'max_depth': 3,
  'criterion': 'gini',
  'min_impurity_decrease': 0.018054714573187858,
  'min_samples_leaf': 17}]