In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import pandas as pd
import logging

In [2]:
data=pd.read_csv('Preprocessed_data.csv')

#Dividing target variable from the main dataset

X = data.iloc[: , 0:-1]
Y = data.iloc[: , -1] 

In [3]:


# Splitting the dataset into the Training set and Test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=1)



In [4]:
ordinal_encoder = OrdinalEncoder()
X_train_cat_encoded = pd.DataFrame(ordinal_encoder.fit_transform(X_train.select_dtypes(exclude='number')))
X_train_cat_encoded.columns = X_train.select_dtypes(exclude='number').columns

X_test_cat_encoded = pd.DataFrame(ordinal_encoder.transform(X_test.select_dtypes(exclude='number')))
X_test_cat_encoded.columns = X_test.select_dtypes(exclude='number').columns

In [5]:


label_encoder = LabelEncoder()
Y_train_cat_encoded= pd.DataFrame(label_encoder.fit_transform(Y_train))
Y_test_cat_encoded = pd.DataFrame(label_encoder.transform(Y_test))



In [6]:


sc = StandardScaler()
X_train_sc=pd.DataFrame(sc.fit_transform(X_train.select_dtypes(exclude='O')))
X_test_sc=pd.DataFrame(sc.transform(X_test.select_dtypes(exclude='O')))

X_train_sc.columns=X_train.select_dtypes(exclude='O').columns
X_test_sc.columns=X_test.select_dtypes(exclude='O').columns



In [7]:


X_train_final=pd.concat([X_train_sc,X_train_cat_encoded],axis=1)
X_test_final=pd.concat([X_test_sc,X_test_cat_encoded],axis=1)



In [8]:


X_train_resample,Y_train_resample=SMOTE(random_state=0,k_neighbors=1).fit_resample(X_train_final,Y_train_cat_encoded)
X_test_resample,Y_test_resample=SMOTE(random_state=0,k_neighbors=1).fit_resample(X_test_final,Y_test_cat_encoded)

X_train_resample.shape,X_test_resample.shape,Y_train_resample.shape,Y_test_resample.shape



((7852, 28), (1476, 28), (7852, 1), (1476, 1))

In [9]:


print('Training dataset shape:', X_train_resample.shape, Y_train_resample.shape)
print('Testing dataset shape:', X_test_resample.shape, Y_test_resample.shape)

Y_train_resample_flat = Y_train_resample.to_numpy().ravel()
Y_test_resample_flat = Y_test_resample.to_numpy().ravel()

print('Training dataset shape:', X_train_resample.shape, Y_train_resample_flat.shape)
print('Testing dataset shape:', X_test_resample.shape, Y_test_resample_flat.shape)



Training dataset shape: (7852, 28) (7852, 1)
Testing dataset shape: (1476, 28) (1476, 1)
Training dataset shape: (7852, 28) (7852,)
Testing dataset shape: (1476, 28) (1476,)


In [10]:
rf = RandomForestClassifier(n_estimators=100, max_depth=5)
forward_fs = sfs(rf , k_features=10,forward=True,floating=False,verbose=2,scoring='accuracy',cv=5)
forward_fs = forward_fs.fit(X_train_resample, Y_train_resample_flat)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:   37.6s finished

[2022-10-15 08:08:41] Features: 1/10 -- score: 0.787441647374588[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:   52.1s finished

[2022-10-15 08:09:33] Features: 2/10 -- score: 0.9679059546639529[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:   49.0s finished

[2022-10-15 08:10:22] Features: 3/10 -- score: 0.9873925894091556[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   

In [11]:
 

# Create the Handler for logging records/messages to a file
file_handler = logging.FileHandler("log_file.log")



In [12]:


#set the format of the log records and the logging level to DEBUG
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG)



In [13]:
# a function  to create and save logs in the log files
def log(path, file):
    """[Create a log file to record the experiment's logs]
    
    Arguments:
        path {string} -- path to the directory
        file {string} -- file name
    
    Returns:
        [obj] -- [logger that record logs]
    """

    # check if the file exist
    log_file = os.path.join(path, file)

    if not os.path.isfile(log_file):
        open(log_file, "w+").close()

    console_logging_format = "%(levelname)s %(message)s"
    file_logging_format = "%(levelname)s: %(asctime)s: %(message)s"

    # configure logger
    logging.basicConfig(level=logging.INFO, format=console_logging_format)
    logger = logging.getLogger()
    
    # create a file handler for output file
    handler = logging.FileHandler(log_file)

    # set the logging level for log file
    handler.setLevel(logging.INFO)
    
    # create a logging format
    formatter = logging.Formatter(file_logging_format)
    handler.setFormatter(formatter)

    # add the handlers to the logger
    logger.addHandler(handler)

    return logger

In [14]:
import os

logger = log(path=".",file="log_file.log")



In [15]:
logger.info("Feature Selection")

2022-10-15 08:15:45,046 INFO:Feature Selection


In [16]:
feat_names = list(forward_fs.k_feature_names_)
logger.info("Features {}".format(feat_names))
X_train_new=X_train_resample[['age','sex','TSH', 'TT4', 'FTI', 'on_thyroxine', 'on_antithyroid_medication', 'goitre', 'hypopituitary', 'psych', 'T3_measured']]
X_test_new=X_test_resample[['age','sex','TSH', 'TT4', 'FTI', 'on_thyroxine', 'on_antithyroid_medication', 'goitre', 'hypopituitary', 'psych', 'T3_measured']]

2022-10-15 08:15:45,065 INFO:Features ['TSH', 'TT4', 'FTI', 'on_thyroxine', 'on_antithyroid_medication', 'pregnant', 'query_hyperthyroid', 'psych', 'T4U_measured', 'referral_source']


In [17]:
#Fitting the Random Forest model
rf_model=rf.fit(X_train_new,Y_train_resample_flat)

logger.info("Traininig Random Forest Model")

#Checking the metrics of Random Forest
def print_Score(clf,x_train,x_test,y_train,y_test,train=True):
    if train:
        pred=clf.predict(x_train)
        clf_report=pd.DataFrame(classification_report(y_train,pred,output_dict=True))
        print("Train Result:\n===============")
        print(f"Accuracy Score:{accuracy_score(y_train,pred)*100:.2f}%")
        print("---------------------------------")
        print(f"Classification Report:\n{clf_report}")
        print("-----------------------------------")
        print(f"Confusion Matrix:\n{confusion_matrix(y_train,pred)}\n")
        logger.info("Train Result:\n==============="+"\n"+
                    f"Accuracy Score:{accuracy_score(y_train,pred)*100:.2f}%""\n"+
        "---------------------------------"+"\n"+
        f"Classification Report:\n{clf_report}"+"\n"+
        "-----------------------------------"+"\n"+
        f"Confusion Matrix:\n{confusion_matrix(y_train,pred)}\n")
    elif train==False:
        pred=clf.predict(x_test)
        clf_report=pd.DataFrame(classification_report(y_test,pred,output_dict=True))
        print("Test Result:\n===============")
        print(f"Accuracy Score:{accuracy_score(y_test,pred)*100:.2f}%")
        print("---------------------------------")
        print(f"Classification Report:\n{clf_report}")
        print("---------------------------------")
        print(f"Confusion Matrix:\n{confusion_matrix(y_test,pred)}\n")
        
        logger.info("Test Result:\n==============="+"\n"+
        f"Accuracy Score:{accuracy_score(y_test,pred)*100:.2f}%"+"\n"+
        "---------------------------------"+"\n"+
        f"Classification Report:\n{clf_report}"+"\n"+
        "---------------------------------"+"\n"+
        f"Confusion Matrix:\n{confusion_matrix(y_test,pred)}\n")
        
print_Score(rf_model,X_train_new,X_test_new,Y_train_resample_flat,Y_test_resample_flat,train=True)
print_Score(rf_model,X_train_new,X_test_new,Y_train_resample_flat,Y_test_resample_flat,train=False)

2022-10-15 08:15:45,581 INFO:Traininig Random Forest Model
2022-10-15 08:15:45,675 INFO:Train Result:
Accuracy Score:99.67%
---------------------------------
Classification Report:
                     0            1            2            3  accuracy  \
precision     0.988917     0.999484     0.998474     1.000000  0.996689   
recall        1.000000     0.987264     1.000000     0.999491  0.996689   
f1-score      0.994428     0.993337     0.999236     0.999745  0.996689   
support    1963.000000  1963.000000  1963.000000  1963.000000  0.996689   

             macro avg  weighted avg  
precision     0.996719      0.996719  
recall        0.996689      0.996689  
f1-score      0.996686      0.996686  
support    7852.000000   7852.000000  
-----------------------------------
Confusion Matrix:
[[1963    0    0    0]
 [  22 1938    3    0]
 [   0    0 1963    0]
 [   0    1    0 1962]]

2022-10-15 08:15:45,706 INFO:Test Result:
Accuracy Score:97.43%
---------------------------------
Cl

Train Result:
Accuracy Score:99.67%
---------------------------------
Classification Report:
                     0            1            2            3  accuracy  \
precision     0.988917     0.999484     0.998474     1.000000  0.996689   
recall        1.000000     0.987264     1.000000     0.999491  0.996689   
f1-score      0.994428     0.993337     0.999236     0.999745  0.996689   
support    1963.000000  1963.000000  1963.000000  1963.000000  0.996689   

             macro avg  weighted avg  
precision     0.996719      0.996719  
recall        0.996689      0.996689  
f1-score      0.996686      0.996686  
support    7852.000000   7852.000000  
-----------------------------------
Confusion Matrix:
[[1963    0    0    0]
 [  22 1938    3    0]
 [   0    0 1963    0]
 [   0    1    0 1962]]

Test Result:
Accuracy Score:97.43%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.928302   

In [18]:
##Hyper parameter tuning
RF=RandomForestClassifier()
model=RF.fit(X_train_new,Y_train_resample_flat)

print_Score(model,X_train_new,X_test_new,Y_train_resample_flat,Y_test_resample_flat,train=True)
print_Score(model,X_train_new,X_test_new,Y_train_resample_flat,Y_test_resample_flat,train=False)

2022-10-15 08:15:46,428 INFO:Train Result:
Accuracy Score:100.00%
---------------------------------
Classification Report:
                0       1       2       3  accuracy  macro avg  weighted avg
precision     1.0     1.0     1.0     1.0       1.0        1.0           1.0
recall        1.0     1.0     1.0     1.0       1.0        1.0           1.0
f1-score      1.0     1.0     1.0     1.0       1.0        1.0           1.0
support    1963.0  1963.0  1963.0  1963.0       1.0     7852.0        7852.0
-----------------------------------
Confusion Matrix:
[[1963    0    0    0]
 [   0 1963    0    0]
 [   0    0 1963    0]
 [   0    0    0 1963]]

2022-10-15 08:15:46,475 INFO:Test Result:
Accuracy Score:97.90%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.940727    1.000000    1.000000  0.978997     0.980242   
recall       1.000000    0.995935    0.941057  0.978997     0.978997   
f1-scor

Train Result:
Accuracy Score:100.00%
---------------------------------
Classification Report:
                0       1       2       3  accuracy  macro avg  weighted avg
precision     1.0     1.0     1.0     1.0       1.0        1.0           1.0
recall        1.0     1.0     1.0     1.0       1.0        1.0           1.0
f1-score      1.0     1.0     1.0     1.0       1.0        1.0           1.0
support    1963.0  1963.0  1963.0  1963.0       1.0     7852.0        7852.0
-----------------------------------
Confusion Matrix:
[[1963    0    0    0]
 [   0 1963    0    0]
 [   0    0 1963    0]
 [   0    0    0 1963]]

Test Result:
Accuracy Score:97.90%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.940727    1.000000    1.000000  0.978997     0.980242   
recall       1.000000    0.995935    0.941057  0.978997     0.978997   
f1-score     0.969458    0.997963    0.969634  0.978997     0.979

In [19]:
import numpy as np

## Randomized Search CV

#No of trees in Random Forest
n_estimators=[int(x) for x in np.linspace(start=200,stop=2000,num=10)]
#No of features consider at every split
max_features=['auto','sqrt','log2']
#maximum no of levels in trees
max_depth=[int(x) for x in np.linspace(10,1000,10)]
#minimum no of samples required to split a node
min_samples_split=[1,3,4,5,7,9]
#minimum samples leafs required at each leaf node
min_sample_leafs=[1,2,4,6,8]

#create random gird
random_grid={'n_estimators':n_estimators,
'max_features':max_features,
'max_depth':max_depth,
'min_samples_split':min_samples_split,
'min_samples_leaf':min_sample_leafs,
'criterion':['entropy','gini']}
print(random_grid)



{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [1, 3, 4, 5, 7, 9], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [20]:
rcv=RandomizedSearchCV(estimator=RF,param_distributions=random_grid,n_iter=100,cv=3,verbose=2,random_state=0,n_jobs=-1)

rcv.fit(X_train_new,Y_train_resample_flat)

logger.info("Use Randomized Search CV")

Fitting 3 folds for each of 100 candidates, totalling 300 fits


45 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\harsha k\AppData\Roaming\Python\Python39\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\harsha k\AppData\Roaming\Python\Python39\site-packages\sklearn\ensemble\_forest.py", line 476, in fit
    trees = Parallel(
  File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._di

In [21]:
rcv.best_estimator_
logger.info(rcv.best_estimator_)

2022-10-15 08:22:09,554 INFO:RandomForestClassifier(criterion='entropy', max_depth=560, min_samples_split=9,
                       n_estimators=400)


In [22]:
best_random_grid=rcv.best_estimator_
logger.info("Result with best estimetors")
print_Score(best_random_grid,X_train_new,X_test_new,Y_train_resample_flat,Y_test_resample_flat,train=False)

2022-10-15 08:22:09,573 INFO:Result with best estimetors
2022-10-15 08:22:09,696 INFO:Test Result:
Accuracy Score:97.97%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.942529    1.000000    1.000000  0.979675     0.980843   
recall       1.000000    0.995935    0.943089  0.979675     0.979675   
f1-score     0.970414    0.997963    0.970711  0.979675     0.979696   
support    492.000000  492.000000  492.000000  0.979675  1476.000000   

           weighted avg  
precision      0.980843  
recall         0.979675  
f1-score       0.979696  
support     1476.000000  
---------------------------------
Confusion Matrix:
[[492   0   0]
 [  2 490   0]
 [ 28   0 464]]



Test Result:
Accuracy Score:97.97%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.942529    1.000000    1.000000  0.979675     0.980843   
recall       1.000000    0.995935    0.943089  0.979675     0.979675   
f1-score     0.970414    0.997963    0.970711  0.979675     0.979696   
support    492.000000  492.000000  492.000000  0.979675  1476.000000   

           weighted avg  
precision      0.980843  
recall         0.979675  
f1-score       0.979696  
support     1476.000000  
---------------------------------
Confusion Matrix:
[[492   0   0]
 [  2 490   0]
 [ 28   0 464]]



In [23]:
param_grid = {
    'criterion': [rcv.best_params_['criterion']],
    'max_depth': [rcv.best_params_['max_depth']],
    'max_features': [rcv.best_params_['max_features']],
    'min_samples_leaf': [rcv.best_params_['min_samples_leaf'], 
                         rcv.best_params_['min_samples_leaf']+2, 
                         rcv.best_params_['min_samples_leaf'] + 4],
    'min_samples_split': [rcv.best_params_['min_samples_split'] - 2,
                          rcv.best_params_['min_samples_split'] - 1,
                          rcv.best_params_['min_samples_split'], 
                          rcv.best_params_['min_samples_split'] +1,
                          rcv.best_params_['min_samples_split'] + 2],
    'n_estimators': [rcv.best_params_['n_estimators'] - 200, rcv.best_params_['n_estimators'] - 100, 
                     rcv.best_params_['n_estimators'], 
                     rcv.best_params_['n_estimators'] + 100, rcv.best_params_['n_estimators'] + 200]
}

In [24]:


print(param_grid)
logger.info(f"Parameter Grid: {param_grid}")



2022-10-15 08:22:09,727 INFO:Parameter Grid: {'criterion': ['entropy'], 'max_depth': [560], 'max_features': ['sqrt'], 'min_samples_leaf': [1, 3, 5], 'min_samples_split': [7, 8, 9, 10, 11], 'n_estimators': [200, 300, 400, 500, 600]}


{'criterion': ['entropy'], 'max_depth': [560], 'max_features': ['sqrt'], 'min_samples_leaf': [1, 3, 5], 'min_samples_split': [7, 8, 9, 10, 11], 'n_estimators': [200, 300, 400, 500, 600]}


In [25]:
grid_search=GridSearchCV(estimator=RF,param_grid=param_grid,cv=10,n_jobs=1,verbose=2)
grid_search.fit(X_train_new,Y_train_resample_flat)
logger.info("Grid Search CV: ") 

Fitting 10 folds for each of 75 candidates, totalling 750 fits
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=7, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=7, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=7, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=7, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=7, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=7, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_spl

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=300; total time=   2.2s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=300; total time=   2.2s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=300; total time=   2.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=300; total time=   2.6s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=300; total time=   2.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=8, n_estimators=300; total time=   2.3s
[CV] END criterion=e

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=300; total time=   2.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=300; total time=   2.2s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=400; total time=   3.0s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=400; total time=   3.2s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=400; total time=   3.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=400; total time=   3.2s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=9, n_estimators=400; total time=   3.0s
[CV] END criterion=e

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=400; total time=   3.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=400; total time=   3.0s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=400; total time=   2.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=   4.0s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=   4.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=   3.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=   3.8s
[CV] END crit

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=500; total time=   3.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=500; total time=   4.1s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=500; total time=   4.1s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=500; total time=   3.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=600; total time=   4.6s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=600; total time=   4.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=1, min_samples_split=11, n_estimators=600; total time=   4.7s
[CV] END crit

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=7, n_estimators=600; total time=   4.7s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=7, n_estimators=600; total time=   4.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=7, n_estimators=600; total time=   4.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=7, n_estimators=600; total time=   5.1s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=7, n_estimators=600; total time=   4.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=8, n_estimators=200; total time=   1.6s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=8, n_estimators=200; total time=   1.7s
[CV] END criterion=e

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=200; total time=   1.6s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=200; total time=   1.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=200; total time=   1.7s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=200; total time=   1.6s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=200; total time=   1.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=9, n_estimators=300; total time=   2.2s
[CV] END criterion=e

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   2.0s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   1.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   2.1s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   2.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   2.1s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   2.0s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=10, n_estimators=300; total time=   2.2s
[CV] END crit

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   3.1s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   2.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   2.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   2.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   2.7s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   2.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=3, min_samples_split=11, n_estimators=400; total time=   3.0s
[CV] END crit

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.6s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.2s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=7, n_estimators=500; total time=   3.5s
[CV] END criterion=e

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   4.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   5.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   4.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   4.7s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   4.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   4.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=8, n_estimators=600; total time=   4.6s
[CV] END criterion=e

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=9, n_estimators=600; total time=   4.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=10, n_estimators=200; total time=   1.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=10, n_estimators=200; total time=   1.7s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=10, n_estimators=200; total time=   1.7s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=10, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=10, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=10, n_estimators=200; total time=   1.4s
[CV] END crite

[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=200; total time=   1.5s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=200; total time=   1.9s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=300; total time=   2.8s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=300; total time=   2.4s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=300; total time=   2.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=300; total time=   2.3s
[CV] END criterion=entropy, max_depth=560, max_features=sqrt, min_samples_leaf=5, min_samples_split=11, n_estimators=300; total time=   2.4s
[CV] END crit

2022-10-15 09:03:21,227 INFO:Grid Search CV: 


In [26]:


best_grid=grid_search.best_estimator_
best_grid
logger.info(f"Best Grid: {best_grid}")



2022-10-15 09:03:21,249 INFO:Best Grid: RandomForestClassifier(criterion='entropy', max_depth=560, min_samples_split=7,
                       n_estimators=300)


In [27]:
logger.info("Result with Best Grid: ")
print_Score(best_grid,X_train_new,X_test_new,Y_train_resample_flat,Y_test_resample_flat,train=False)

2022-10-15 09:03:21,259 INFO:Result with Best Grid: 
2022-10-15 09:03:21,353 INFO:Test Result:
Accuracy Score:97.97%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.942529    1.000000    1.000000  0.979675     0.980843   
recall       1.000000    0.995935    0.943089  0.979675     0.979675   
f1-score     0.970414    0.997963    0.970711  0.979675     0.979696   
support    492.000000  492.000000  492.000000  0.979675  1476.000000   

           weighted avg  
precision      0.980843  
recall         0.979675  
f1-score       0.979696  
support     1476.000000  
---------------------------------
Confusion Matrix:
[[492   0   0]
 [  2 490   0]
 [ 28   0 464]]



Test Result:
Accuracy Score:97.97%
---------------------------------
Classification Report:
                    0           1           2  accuracy    macro avg  \
precision    0.942529    1.000000    1.000000  0.979675     0.980843   
recall       1.000000    0.995935    0.943089  0.979675     0.979675   
f1-score     0.970414    0.997963    0.970711  0.979675     0.979696   
support    492.000000  492.000000  492.000000  0.979675  1476.000000   

           weighted avg  
precision      0.980843  
recall         0.979675  
f1-score       0.979696  
support     1476.000000  
---------------------------------
Confusion Matrix:
[[492   0   0]
 [  2 490   0]
 [ 28   0 464]]



In [28]:
 #Now, will convert our final model into pickle file

import pickle

pickle_out=open('Thyroid.pkl','wb')
pickle.dump(grid_search,pickle_out)
pickle_out.close()
logger.info("Random Forest Model with Grid Search Saved in Pickle")   

2022-10-15 09:03:21,400 INFO:Random Forest Model with Grid Search Saved in Pickle
