In [1]:
# importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cluster import KMeans
from tqdm import tqdm
import time

train = pd.read_csv("CSV_train.csv",low_memory=False,delimiter=';')
test=pd.read_csv("CSV_test.csv",low_memory=False,delimiter=',')
hidden=pd.read_csv("CSV_hidden_test.csv",low_memory=False,delimiter=',')

In [2]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136786 entries, 0 to 136785
Data columns (total 28 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   WELL                              136786 non-null  object 
 1   DEPTH_MD                          136786 non-null  float64
 2   X_LOC                             136727 non-null  float64
 3   Y_LOC                             136727 non-null  float64
 4   Z_LOC                             136727 non-null  float64
 5   GROUP                             136786 non-null  object 
 6   FORMATION                         129712 non-null  object 
 7   CALI                              131141 non-null  float64
 8   RSHA                              39097 non-null   float64
 9   RMED                              136199 non-null  float64
 10  RDEP                              136727 non-null  float64
 11  RHOB                              119826 non-null  f

In [3]:
# storing length of datasets 
train_len = train.shape[0] 
test_len = test.shape[0]
All_data = pd.concat((train,test,hidden)).reset_index(drop=True) 

lithology_keys = {30000: 'Sandstone',
                 65030: 'Sandstone/Shale',
                 65000: 'Shale',
                 80000: 'Marl',
                 74000: 'Dolomite',
                 70000: 'Limestone',
                 70032: 'Chalk',
                 88000: 'Halite',
                 86000: 'Anhydrite',
                 99000: 'Tuff',
                 90000: 'Coal',
                 93000: 'Basement'}
All_data['Lithology'] = All_data['FORCE_2020_LITHOFACIES_LITHOLOGY'].map(lithology_keys)
All_data

Unnamed: 0,WELL,DEPTH_MD,X_LOC,Y_LOC,Z_LOC,GROUP,FORMATION,CALI,RSHA,RMED,...,DTS,DCAL,DRHO,MUDWEIGHT,RMIC,ROPA,RXO,FORCE_2020_LITHOFACIES_LITHOLOGY,FORCE_2020_LITHOFACIES_CONFIDENCE,Lithology
0,15/9-13,494.5280,437641.96875,6470972.5,-469.501831,NORDLAND GP.,,19.480835,,1.611410,...,,,-0.574928,,,,,65000,1.0,Shale
1,15/9-13,494.6800,437641.96875,6470972.5,-469.653809,NORDLAND GP.,,19.468800,,1.618070,...,,,-0.570188,,,,,65000,1.0,Shale
2,15/9-13,494.8320,437641.96875,6470972.5,-469.805786,NORDLAND GP.,,19.468800,,1.626459,...,,,-0.574245,,,,,65000,1.0,Shale
3,15/9-13,494.9840,437641.96875,6470972.5,-469.957794,NORDLAND GP.,,19.459282,,1.621594,...,,,-0.586315,,,,,65000,1.0,Shale
4,15/9-13,495.1360,437641.96875,6470972.5,-470.109772,NORDLAND GP.,,19.453100,,1.602679,...,,,-0.597914,,,,,65000,1.0,Shale
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1429689,35/9-7,2973.2988,536096.06250,6793022.0,-2943.444580,BAAT GP.,Etive Fm.,8.276272,,2.820439,...,136.911575,,0.502458,,2.311106,24.306124,,65000,2.0,Shale
1429690,35/9-7,2973.4508,536096.06250,6793022.0,-2943.595947,BAAT GP.,Etive Fm.,8.267273,,3.020778,...,137.583923,,0.374753,,1.853418,22.201078,,65000,2.0,Shale
1429691,35/9-7,2973.6028,536096.06250,6793022.0,-2943.747559,BAAT GP.,Etive Fm.,8.250099,,2.795711,...,138.310898,,0.211487,,1.325961,20.096741,,65000,2.0,Shale
1429692,35/9-7,2973.7548,536096.06250,6793022.0,-2943.899170,BAAT GP.,Etive Fm.,,,2.658694,...,137.592819,,0.147950,,1.260347,17.992323,,65000,2.0,Shale


In [4]:
#dropping columns with high missing values
drop_cols = ['SGR', 'ROPA', 'RXO', 'MUDWEIGHT','DCAL','RMIC','FORCE_2020_LITHOFACIES_CONFIDENCE']
All_data_drop = All_data.drop(drop_cols, axis=1)


In [5]:
All_data_drop

Unnamed: 0,WELL,DEPTH_MD,X_LOC,Y_LOC,Z_LOC,GROUP,FORMATION,CALI,RSHA,RMED,...,NPHI,PEF,DTC,SP,BS,ROP,DTS,DRHO,FORCE_2020_LITHOFACIES_LITHOLOGY,Lithology
0,15/9-13,494.5280,437641.96875,6470972.5,-469.501831,NORDLAND GP.,,19.480835,,1.611410,...,,20.915468,161.131180,24.612379,,34.636410,,-0.574928,65000,Shale
1,15/9-13,494.6800,437641.96875,6470972.5,-469.653809,NORDLAND GP.,,19.468800,,1.618070,...,,19.383013,160.603470,23.895531,,34.636410,,-0.570188,65000,Shale
2,15/9-13,494.8320,437641.96875,6470972.5,-469.805786,NORDLAND GP.,,19.468800,,1.626459,...,,22.591518,160.173615,23.916357,,34.779556,,-0.574245,65000,Shale
3,15/9-13,494.9840,437641.96875,6470972.5,-469.957794,NORDLAND GP.,,19.459282,,1.621594,...,,32.191910,160.149429,23.793688,,39.965164,,-0.586315,65000,Shale
4,15/9-13,495.1360,437641.96875,6470972.5,-470.109772,NORDLAND GP.,,19.453100,,1.602679,...,,38.495632,160.128342,24.104078,,57.483765,,-0.597914,65000,Shale
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1429689,35/9-7,2973.2988,536096.06250,6793022.0,-2943.444580,BAAT GP.,Etive Fm.,8.276272,,2.820439,...,,,75.260658,,8.5,15.195305,136.911575,0.502458,65000,Shale
1429690,35/9-7,2973.4508,536096.06250,6793022.0,-2943.595947,BAAT GP.,Etive Fm.,8.267273,,3.020778,...,,,74.868301,,8.5,15.770223,137.583923,0.374753,65000,Shale
1429691,35/9-7,2973.6028,536096.06250,6793022.0,-2943.747559,BAAT GP.,Etive Fm.,8.250099,,2.795711,...,,,74.848122,,8.5,16.418465,138.310898,0.211487,65000,Shale
1429692,35/9-7,2973.7548,536096.06250,6793022.0,-2943.899170,BAAT GP.,Etive Fm.,,,2.658694,...,,,74.964027,,8.5,17.037945,137.592819,0.147950,65000,Shale


In [6]:
All_data_drop.columns

Index(['WELL', 'DEPTH_MD', 'X_LOC', 'Y_LOC', 'Z_LOC', 'GROUP', 'FORMATION',
       'CALI', 'RSHA', 'RMED', 'RDEP', 'RHOB', 'GR', 'NPHI', 'PEF', 'DTC',
       'SP', 'BS', 'ROP', 'DTS', 'DRHO', 'FORCE_2020_LITHOFACIES_LITHOLOGY',
       'Lithology'],
      dtype='object')

In [7]:
#Inputing missing values by introducing median 
from sklearn.impute import SimpleImputer

numeric_header=['DEPTH_MD', 'X_LOC', 'Y_LOC', 'Z_LOC',
       'CALI', 'RSHA', 'RMED', 'RDEP', 'RHOB', 'GR', 'NPHI', 'PEF', 'DTC',
       'SP', 'BS', 'ROP', 'DTS', 'DRHO', 'FORCE_2020_LITHOFACIES_LITHOLOGY'
       ]
categorical_header=['WELL','GROUP', 'FORMATION','Lithology']
numeric=All_data_drop.select_dtypes(include=[np.number])
categorical= All_data_drop.select_dtypes(exclude=[np.number])
miss = SimpleImputer(missing_values=np.nan, strategy='median')
miss.fit(numeric)
numeric_imp = miss.fit_transform(numeric)
numeric_imp=pd.DataFrame(numeric_imp, columns=numeric_header)
miss2 = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
miss2.fit(categorical)
categorical_imp = miss2.fit_transform(categorical)
categorical_imp=pd.DataFrame(categorical_imp, columns=categorical_header)
frames = [numeric_imp,categorical_imp]
  
result = pd.concat(frames,axis=1, join='inner')
result

Unnamed: 0,DEPTH_MD,X_LOC,Y_LOC,Z_LOC,CALI,RSHA,RMED,RDEP,RHOB,GR,...,SP,BS,ROP,DTS,DRHO,FORCE_2020_LITHOFACIES_LITHOLOGY,WELL,GROUP,FORMATION,Lithology
0,494.5280,437641.96875,6470972.5,-469.501831,19.480835,1.398049,1.611410,1.798681,1.884186,80.200851,...,24.612379,12.250001,34.636410,189.362198,-0.574928,65000.0,15/9-13,NORDLAND GP.,Utsira Fm.,Shale
1,494.6800,437641.96875,6470972.5,-469.653809,19.468800,1.398049,1.618070,1.795641,1.889794,79.262886,...,23.895531,12.250001,34.636410,189.362198,-0.570188,65000.0,15/9-13,NORDLAND GP.,Utsira Fm.,Shale
2,494.8320,437641.96875,6470972.5,-469.805786,19.468800,1.398049,1.626459,1.800733,1.896523,74.821999,...,23.916357,12.250001,34.779556,189.362198,-0.574245,65000.0,15/9-13,NORDLAND GP.,Utsira Fm.,Shale
3,494.9840,437641.96875,6470972.5,-469.957794,19.459282,1.398049,1.621594,1.801517,1.891913,72.878922,...,23.793688,12.250001,39.965164,189.362198,-0.586315,65000.0,15/9-13,NORDLAND GP.,Utsira Fm.,Shale
4,495.1360,437641.96875,6470972.5,-470.109772,19.453100,1.398049,1.602679,1.795299,1.880034,71.729141,...,24.104078,12.250001,57.483765,189.362198,-0.597914,65000.0,15/9-13,NORDLAND GP.,Utsira Fm.,Shale
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1429689,2973.2988,536096.06250,6793022.0,-2943.444580,8.276272,1.398049,2.820439,3.158570,2.331407,90.720284,...,54.270451,8.500000,15.195305,136.911575,0.502458,65000.0,35/9-7,BAAT GP.,Etive Fm.,Shale
1429690,2973.4508,536096.06250,6793022.0,-2943.595947,8.267273,1.398049,3.020778,3.332977,2.331407,87.062027,...,54.270451,8.500000,15.770223,137.583923,0.374753,65000.0,35/9-7,BAAT GP.,Etive Fm.,Shale
1429691,2973.6028,536096.06250,6793022.0,-2943.747559,8.250099,1.398049,2.795711,3.044179,2.331407,86.115921,...,54.270451,8.500000,16.418465,138.310898,0.211487,65000.0,35/9-7,BAAT GP.,Etive Fm.,Shale
1429692,2973.7548,536096.06250,6793022.0,-2943.899170,12.515673,1.398049,2.658694,2.847681,2.331407,89.497131,...,54.270451,8.500000,17.037945,137.592819,0.147950,65000.0,35/9-7,BAAT GP.,Etive Fm.,Shale


In [8]:
# encoding categorical variables
result['GROUP_encoded'] = result['GROUP'].astype('category')
result['GROUP_encoded'] = result['GROUP_encoded'].cat.codes

result['FORMATION_encoded'] = result['FORMATION'].astype('category')
result['FORMATION_encoded'] = result['FORMATION_encoded'].cat.codes

result['WELL_encoded'] = result['WELL'].astype('category')
result['WELL_encoded'] = result['WELL_encoded'].cat.codes

result['Lithology_encoded'] = result['FORCE_2020_LITHOFACIES_LITHOLOGY'].astype('category')
result['Lithology_encoded'] = result['Lithology_encoded'].cat.codes

In [9]:
train_imp = result[:train_len].copy()
test_imp = result[train_len:(train_len+test_len)].copy()
hidden_imp = result[(train_len+test_len):].copy()

In [10]:
print(train_imp.shape)
print(test_imp.shape)
print(hidden_imp.shape)

(1170511, 27)
(136786, 27)
(122397, 27)


In [11]:
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
x_header=['DEPTH_MD', 'X_LOC', 'Y_LOC', 'Z_LOC', 'CALI', 'RSHA', 'RMED', 'RDEP',
       'RHOB', 'GR', 'NPHI', 'PEF', 'DTC', 'SP', 'BS', 'ROP', 'DTS', 'DRHO',
       'GROUP_encoded', 'FORMATION_encoded', 'WELL_encoded']
y_header=['Lithology_encoded']
x_train = train_imp[x_header]
y_train = train_imp[y_header]
x_test = test_imp[x_header]
y_test = test_imp[y_header]
x_hidden = hidden_imp[x_header]
y_hidden = hidden_imp[y_header]

##Min-Max scaler 
scaler = MinMaxScaler()
x_train_scaled = x_train.copy()
x_test_scaled = x_test.copy()
x_hidden_scaled = x_hidden.copy()

x_train_scaled.iloc[:,:18] = scaler.fit_transform(x_train_scaled.iloc[:,:18])
x_test_scaled.iloc[:,:18] = scaler.transform(x_test_scaled.iloc[:,:18])
x_hidden_scaled.iloc[:,:18] = scaler.transform(x_hidden_scaled.iloc[:,:18])

In [12]:
from sklearn.model_selection import cross_val_score

new_train = pd.concat((x_train_scaled, pd.DataFrame(y_train, columns=["Lithology_encoded"])), axis=1)

#Randomly sampling data
sampled_train = new_train.sample(n=50000, random_state=0)


#Spliting training data
x_train_sam = sampled_train.drop(["Lithology_encoded"], axis=1)
y_train_sam = sampled_train["Lithology_encoded"]


In [13]:
#X_train, X_test, Y_train, Y_test = train_test_split( X, y, test_size=0.10, random_state=42,stratify=y)

In [14]:
# A = np.load('penalty_matrix.npy')
# def score(y_true, y_pred):
#     S = 0.0
#     y_true = y_true.astype(int)
#     y_pred = y_pred.astype(int)
#     for i in range(0, y_true.shape[0]):
#         S -= A[y_true[i], y_pred[i]]
#     return S/y_true.shape[0]

In [15]:
#Supervised Algorithms
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, recall_score, precision_score, f1_score
from sklearn.neighbors import KNeighborsRegressor
from pprint import pprint
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import GaussianNB
import xgboost
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
#Comparing base models accuracies by using k-fold cross validation - 10 folds

from sklearn.model_selection import cross_val_score

model_rf = RandomForestClassifier()
                                    


model_rf.fit(x_train_sam, y_train_sam.values.ravel())
             
# train_pred_rf = model_rf.predict(x_train)
# open_pred_rf = model_rf.predict(x_test)
# hidden_pred_rf = model_rf.predict(x_hidden)
#Printing Reports 


  from pandas import MultiIndex, Int64Index


RandomForestClassifier()

In [16]:
# from sklearn.metrics import classification_report, accuracy_score
# print('-----------------------TRAIN SET REPORT---------------------')
# print("Open set RMSE:", np.sqrt(mean_squared_error(y_train, train_pred_rf)))
# print('Open set penalty matrix score:', score(y_train.values, train_pred_rf))
# print('Open set report:', classification_report(y_train, train_pred_rf))
# print('-----------------------OPEN SET REPORT---------------------')
# print("Open set RMSE:", np.sqrt(mean_squared_error(y_test, open_pred_rf)))
# print('Open set penalty matrix score:', score(y_test.values, open_pred_rf))
# print('Open set report:', classification_report(y_test, open_pred_rf))
# print('-----------------------HIDDEN SET REPORT---------------------')
# print("Hidden set RMSE:", np.sqrt(mean_squared_error(y_hidden, hidden_pred_rf)))
# print('Hidden set penalty matrix score:', score(y_hidden.values, hidden_pred_rf))
# print('Hidden set report:', classification_report(y_hidden, hidden_pred_rf))

In [17]:
def grid_search(model):
    param_grid = {'n_estimators': [100, 150,200],
           'max_depth': [10,15,20],'max_features': ['auto', 'sqrt'],'min_samples_leaf' : [1, 2, 4],
           'criterion': ['gini', 'entropy']}
    model_cv = model_selection.GridSearchCV(estimator=model, param_grid=param_grid,
                                          scoring='f1_weighted', verbose=10, n_jobs=1, cv=10)
    model_cv.fit(x_train_sam, y_train_sam)

    print("Best score is: {}".format(model_cv.best_score_))
    print("Tuned Model Parameter: {}".format(model_cv.best_params_))

In [18]:
grid_search(model_rf)

Fitting 10 folds for each of 108 candidates, totalling 1080 fits
[CV 1/10; 1/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100




[CV 1/10; 1/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.811 total time=  10.0s
[CV 2/10; 1/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 2/10; 1/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.815 total time=  10.9s
[CV 3/10; 1/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 3/10; 1/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.817 total time=  10.6s
[CV 4/10; 1/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 4/10; 1/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.819 total time=  11.4s
[CV 5/10; 1/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 

[CV 4/10; 4/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.814 total time=   9.7s
[CV 5/10; 4/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 5/10; 4/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.807 total time=   9.7s
[CV 6/10; 4/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 6/10; 4/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.824 total time=  10.3s
[CV 7/10; 4/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 7/10; 4/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.815 total time=   9.8s
[CV 8/10; 4/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 

[CV 7/10; 7/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.811 total time=   9.6s
[CV 8/10; 7/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100
[CV 8/10; 7/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.811 total time=   9.4s
[CV 9/10; 7/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100
[CV 9/10; 7/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.806 total time=   9.5s
[CV 10/10; 7/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100
[CV 10/10; 7/108] END criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.811 total time=   9.4s
[CV 1/10; 8/108] START criterion=gini, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=150
[C

[CV 10/10; 10/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=100;, score=0.816 total time=  10.8s
[CV 1/10; 11/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 1/10; 11/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.811 total time=  16.0s
[CV 2/10; 11/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 2/10; 11/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.811 total time=  15.4s
[CV 3/10; 11/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 3/10; 11/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.817 total time=  14.2s
[CV 4/10; 11/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators

[CV 3/10; 14/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.814 total time=  17.0s
[CV 4/10; 14/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 4/10; 14/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.817 total time=  21.5s
[CV 5/10; 14/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 5/10; 14/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.804 total time=  23.4s
[CV 6/10; 14/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 6/10; 14/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.820 total time=  20.7s
[CV 7/10; 14/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=

[CV 6/10; 17/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.819 total time=  15.6s
[CV 7/10; 17/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 7/10; 17/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.809 total time=  16.0s
[CV 8/10; 17/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 8/10; 17/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.813 total time=  16.1s
[CV 9/10; 17/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 9/10; 17/108] END criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.811 total time=  17.2s
[CV 10/10; 17/108] START criterion=gini, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators

[CV 9/10; 20/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=150;, score=0.881 total time=  20.3s
[CV 10/10; 20/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=150
[CV 10/10; 20/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=150;, score=0.880 total time=  19.7s
[CV 1/10; 21/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=200
[CV 1/10; 21/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=200;, score=0.881 total time=  26.7s
[CV 2/10; 21/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=200
[CV 2/10; 21/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=200;, score=0.884 total time=  32.3s
[CV 3/10; 21/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimator

[CV 2/10; 24/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.881 total time=  50.1s
[CV 3/10; 24/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200
[CV 3/10; 24/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.877 total time=  45.7s
[CV 4/10; 24/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200
[CV 4/10; 24/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.881 total time=  53.7s
[CV 5/10; 24/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200
[CV 5/10; 24/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.870 total time=  58.1s
[CV 6/10; 24/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=

[CV 5/10; 27/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.864 total time=  47.3s
[CV 6/10; 27/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 6/10; 27/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.874 total time=  51.0s
[CV 7/10; 27/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 7/10; 27/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.874 total time=  56.0s
[CV 8/10; 27/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 8/10; 27/108] END criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.872 total time=  55.4s
[CV 9/10; 27/108] START criterion=gini, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=

[CV 8/10; 30/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.883 total time=  52.4s
[CV 9/10; 30/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 9/10; 30/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.876 total time=  48.7s
[CV 10/10; 30/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 10/10; 30/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.881 total time=  54.0s
[CV 1/10; 31/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=100
[CV 1/10; 31/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=100;, score=0.877 total time=  28.6s
[CV 2/10; 31/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimator

[CV 1/10; 34/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.870 total time=  28.5s
[CV 2/10; 34/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100
[CV 2/10; 34/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.875 total time=  27.7s
[CV 3/10; 34/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100
[CV 3/10; 34/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.871 total time=  27.9s
[CV 4/10; 34/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100
[CV 4/10; 34/108] END criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.873 total time=  28.1s
[CV 5/10; 34/108] START criterion=gini, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=

[CV 4/10; 37/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.897 total time=  31.2s
[CV 5/10; 37/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 5/10; 37/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.885 total time=  31.9s
[CV 6/10; 37/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 6/10; 37/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.893 total time=  31.6s
[CV 7/10; 37/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 7/10; 37/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.901 total time=  31.5s
[CV 8/10; 37/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=

[CV 7/10; 40/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.897 total time=  34.6s
[CV 8/10; 40/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 8/10; 40/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.891 total time=  35.0s
[CV 9/10; 40/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 9/10; 40/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.887 total time=  33.5s
[CV 10/10; 40/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 10/10; 40/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.890 total time=  35.0s
[CV 1/10; 41/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimator

[CV 10/10; 43/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.883 total time=  33.2s
[CV 1/10; 44/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150
[CV 1/10; 44/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150;, score=0.885 total time=  49.2s
[CV 2/10; 44/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150
[CV 2/10; 44/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150;, score=0.884 total time=  49.2s
[CV 3/10; 44/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150
[CV 3/10; 44/108] END criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150;, score=0.883 total time=  50.7s
[CV 4/10; 44/108] START criterion=gini, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators

[CV 3/10; 47/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.894 total time=  52.0s
[CV 4/10; 47/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 4/10; 47/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.897 total time=  52.4s
[CV 5/10; 47/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 5/10; 47/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.886 total time=  55.3s
[CV 6/10; 47/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 6/10; 47/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.895 total time=  52.9s
[CV 7/10; 47/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=

[CV 6/10; 50/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.893 total time=  50.8s
[CV 7/10; 50/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 7/10; 50/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.894 total time=  51.3s
[CV 8/10; 50/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 8/10; 50/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.893 total time=  47.1s
[CV 9/10; 50/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 9/10; 50/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.886 total time=  42.9s
[CV 10/10; 50/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators

[CV 9/10; 53/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.883 total time=  41.8s
[CV 10/10; 53/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 10/10; 53/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.885 total time=  41.3s
[CV 1/10; 54/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200
[CV 1/10; 54/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200;, score=0.885 total time=  55.7s
[CV 2/10; 54/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200
[CV 2/10; 54/108] END criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200;, score=0.886 total time=  55.1s
[CV 3/10; 54/108] START criterion=gini, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimator

[CV 2/10; 57/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200;, score=0.831 total time= 1.5min
[CV 3/10; 57/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200
[CV 3/10; 57/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200;, score=0.837 total time= 1.5min
[CV 4/10; 57/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200
[CV 4/10; 57/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200;, score=0.838 total time= 1.5min
[CV 5/10; 57/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200
[CV 5/10; 57/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=1, n_estimators=200;, score=0.826 total time= 1.3min
[CV 6/10; 57/108] START criterion=entropy, max_depth=10, max_features=auto, min_sampl

[CV 4/10; 60/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.840 total time= 1.2min
[CV 5/10; 60/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200
[CV 5/10; 60/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.819 total time= 1.4min
[CV 6/10; 60/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200
[CV 6/10; 60/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.839 total time= 1.4min
[CV 7/10; 60/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200
[CV 7/10; 60/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=2, n_estimators=200;, score=0.833 total time= 1.4min
[CV 8/10; 60/108] START criterion=entropy, max_depth=10, max_features=auto, min_sampl

[CV 6/10; 63/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.836 total time= 1.5min
[CV 7/10; 63/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 7/10; 63/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.834 total time= 1.6min
[CV 8/10; 63/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 8/10; 63/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.829 total time= 1.5min
[CV 9/10; 63/108] START criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 9/10; 63/108] END criterion=entropy, max_depth=10, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.821 total time= 1.5min
[CV 10/10; 63/108] START criterion=entropy, max_depth=10, max_features=auto, min_samp

[CV 8/10; 66/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.838 total time= 1.5min
[CV 9/10; 66/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 9/10; 66/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.830 total time= 1.5min
[CV 10/10; 66/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 10/10; 66/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.832 total time= 1.3min
[CV 1/10; 67/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=100
[CV 1/10; 67/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=100;, score=0.828 total time=  39.3s
[CV 2/10; 67/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_sam

[CV 10/10; 69/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=2, n_estimators=200;, score=0.834 total time= 1.2min
[CV 1/10; 70/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=100
[CV 1/10; 70/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.831 total time=  37.0s
[CV 2/10; 70/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=100
[CV 2/10; 70/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.825 total time=  41.7s
[CV 3/10; 70/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=100
[CV 3/10; 70/108] END criterion=entropy, max_depth=10, max_features=sqrt, min_samples_leaf=4, n_estimators=100;, score=0.831 total time=  44.5s
[CV 4/10; 70/108] START criterion=entropy, max_depth=10, max_features=sqrt, min_samp

[CV 2/10; 73/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.890 total time=  56.4s
[CV 3/10; 73/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 3/10; 73/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.890 total time=  55.1s
[CV 4/10; 73/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 4/10; 73/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.892 total time=  56.6s
[CV 5/10; 73/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100
[CV 5/10; 73/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=1, n_estimators=100;, score=0.883 total time=  57.1s
[CV 6/10; 73/108] START criterion=entropy, max_depth=15, max_features=auto, min_sampl

[CV 4/10; 76/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.889 total time=  55.4s
[CV 5/10; 76/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 5/10; 76/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.880 total time=  50.6s
[CV 6/10; 76/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 6/10; 76/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.892 total time=  50.3s
[CV 7/10; 76/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100
[CV 7/10; 76/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=2, n_estimators=100;, score=0.890 total time=  51.2s
[CV 8/10; 76/108] START criterion=entropy, max_depth=15, max_features=auto, min_sampl

[CV 6/10; 79/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.885 total time=  53.3s
[CV 7/10; 79/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100
[CV 7/10; 79/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.886 total time=  43.8s
[CV 8/10; 79/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100
[CV 8/10; 79/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.881 total time=  42.3s
[CV 9/10; 79/108] START criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100
[CV 9/10; 79/108] END criterion=entropy, max_depth=15, max_features=auto, min_samples_leaf=4, n_estimators=100;, score=0.877 total time=  42.9s
[CV 10/10; 79/108] START criterion=entropy, max_depth=15, max_features=auto, min_samp

[CV 8/10; 82/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=100;, score=0.890 total time=  43.4s
[CV 9/10; 82/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=100
[CV 9/10; 82/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=100;, score=0.882 total time=  43.2s
[CV 10/10; 82/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=100
[CV 10/10; 82/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=100;, score=0.892 total time=  44.2s
[CV 1/10; 83/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=150
[CV 1/10; 83/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.892 total time= 1.1min
[CV 2/10; 83/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_sam

[CV 10/10; 85/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=100;, score=0.891 total time=  41.8s
[CV 1/10; 86/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 1/10; 86/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.886 total time= 1.1min
[CV 2/10; 86/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 2/10; 86/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.891 total time= 1.1min
[CV 3/10; 86/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=150
[CV 3/10; 86/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=2, n_estimators=150;, score=0.888 total time= 1.3min
[CV 4/10; 86/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samp

[CV 2/10; 89/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.883 total time= 1.0min
[CV 3/10; 89/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 3/10; 89/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.882 total time=  58.0s
[CV 4/10; 89/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 4/10; 89/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.884 total time=  58.1s
[CV 5/10; 89/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150
[CV 5/10; 89/108] END criterion=entropy, max_depth=15, max_features=sqrt, min_samples_leaf=4, n_estimators=150;, score=0.873 total time=  50.0s
[CV 6/10; 89/108] START criterion=entropy, max_depth=15, max_features=sqrt, min_sampl

[CV 4/10; 92/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150;, score=0.906 total time= 1.1min
[CV 5/10; 92/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150
[CV 5/10; 92/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150;, score=0.894 total time= 1.1min
[CV 6/10; 92/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150
[CV 6/10; 92/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150;, score=0.904 total time=  59.5s
[CV 7/10; 92/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150
[CV 7/10; 92/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=1, n_estimators=150;, score=0.908 total time= 1.0min
[CV 8/10; 92/108] START criterion=entropy, max_depth=20, max_features=auto, min_sampl

[CV 6/10; 95/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150;, score=0.900 total time=  59.5s
[CV 7/10; 95/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150
[CV 7/10; 95/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150;, score=0.901 total time= 1.0min
[CV 8/10; 95/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150
[CV 8/10; 95/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150;, score=0.898 total time= 1.1min
[CV 9/10; 95/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150
[CV 9/10; 95/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=2, n_estimators=150;, score=0.893 total time= 1.1min
[CV 10/10; 95/108] START criterion=entropy, max_depth=20, max_features=auto, min_samp

[CV 8/10; 98/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150;, score=0.891 total time= 1.1min
[CV 9/10; 98/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150
[CV 9/10; 98/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150;, score=0.884 total time= 1.2min
[CV 10/10; 98/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150
[CV 10/10; 98/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=150;, score=0.888 total time= 1.1min
[CV 1/10; 99/108] START criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=200
[CV 1/10; 99/108] END criterion=entropy, max_depth=20, max_features=auto, min_samples_leaf=4, n_estimators=200;, score=0.890 total time= 1.5min
[CV 2/10; 99/108] START criterion=entropy, max_depth=20, max_features=auto, min_sam

[CV 10/10; 101/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=150;, score=0.900 total time=  51.7s
[CV 1/10; 102/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 1/10; 102/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.903 total time= 1.1min
[CV 2/10; 102/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 2/10; 102/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.902 total time= 1.1min
[CV 3/10; 102/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=200
[CV 3/10; 102/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=1, n_estimators=200;, score=0.899 total time= 1.2min
[CV 4/10; 102/108] START criterion=entropy, max_depth=20, max_features=sqrt, 

[CV 2/10; 105/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200;, score=0.896 total time= 1.3min
[CV 3/10; 105/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200
[CV 3/10; 105/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200;, score=0.897 total time= 1.3min
[CV 4/10; 105/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200
[CV 4/10; 105/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200;, score=0.902 total time= 1.3min
[CV 5/10; 105/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200
[CV 5/10; 105/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=2, n_estimators=200;, score=0.888 total time= 1.3min
[CV 6/10; 105/108] START criterion=entropy, max_depth=20, max_features=sqrt, m

[CV 4/10; 108/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200;, score=0.893 total time= 1.2min
[CV 5/10; 108/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200
[CV 5/10; 108/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200;, score=0.882 total time= 1.2min
[CV 6/10; 108/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200
[CV 6/10; 108/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200;, score=0.892 total time= 1.3min
[CV 7/10; 108/108] START criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200
[CV 7/10; 108/108] END criterion=entropy, max_depth=20, max_features=sqrt, min_samples_leaf=4, n_estimators=200;, score=0.893 total time= 1.4min
[CV 8/10; 108/108] START criterion=entropy, max_depth=20, max_features=sqrt, m

In [1]:

model_rf_tuned = RandomForestClassifier(criterion='entropy',max_depth=20, max_features='auto', min_samples_leaf= 1, n_estimators=150)

# Fit the regressor to the training data
model_rf_tuned.fit(x_train_scaled, y_train.values.ravel())

# Prediction
train_pred_rf = model_rf_tuned.predict(x_train_scaled)
open_pred_rf = model_rf_tuned.predict(x_test_scaled)
hidden_pred_rf = model_rf_tuned.predict(x_hidden_scaled)
#Printing Reports 
#Printing Reports


NameError: name 'RandomForestClassifier' is not defined

In [None]:
A = np.load('penalty_matrix.npy')
def score(y_true, y_pred):
    S = 0.0
    y_true = y_true.astype(int)
    y_pred = y_pred.astype(int)
    for i in range(0, y_true.shape[0]):
        S -= A[y_true[i], y_pred[i]]
    return S/y_true.shape[0]

In [None]:
from sklearn.metrics import classification_report, accuracy_score
print('-----------------------TRAIN SET REPORT---------------------')
print("Open set RMSE:", np.sqrt(mean_squared_error(y_train, train_pred_rf)))
print('Open set penalty matrix score:', score(y_train.values, train_pred_rf))
print('Open set report:', classification_report(y_train, train_pred_rf))
print('-----------------------OPEN SET REPORT---------------------')
print("Open set RMSE:", np.sqrt(mean_squared_error(y_test, open_pred_rf)))
print('Open set penalty matrix score:', score(y_test.values, open_pred_rf))
print('Open set report:', classification_report(y_test, open_pred_rf))
print('-----------------------HIDDEN SET REPORT---------------------')
print("Hidden set RMSE:", np.sqrt(mean_squared_error(y_hidden, hidden_pred_rf)))
print('Hidden set penalty matrix score:', score(y_hidden.values, hidden_pred_rf))
print('Hidden set report:', classification_report(y_hidden, hidden_pred_rf))