## Predication with Different Classification Method to The Dataset

In [None]:
%pip install scikeras

In [2]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf

from sklearn.exceptions import ConvergenceWarning
import warnings

from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# ML Models
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from scikeras.wrappers import KerasClassifier

# Model Selection
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Ensemble Methods
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.ensemble import IsolationForest

In [3]:
# Suppress warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')

In [4]:
# Set maximum output lines before scrolling
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.instance().display_formatter.formatters['text/plain'].for_type(
    type, lambda obj, p, cycle: p.text(repr(obj)[:10000])
)

<function IPython.lib.pretty._type_pprint(obj, p, cycle)>

### Metrics Calculations

In [5]:
def calculate_metrics(classifier, y_value, y_pred):
    print(f"\n{classifier} Metrics: ")
    print(classification_report(y_value, y_pred))

In [6]:
def fit_model_and_generate_metrics(model, label, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    calculate_metrics(label, y_test, model.predict(X_test))

In [7]:
class ModelTuningAndEvaluation:

    param_grid_logistic_regression = {
        'C': [0.01, 1, 10],
        'solver': ['lbfgs', 'liblinear', 'saga'],
        'penalty': ['l2'],
        'max_iter': [100, 500]
    }

    param_grid_decission_tree_classifier = {
        'max_depth': [None, 5, 20, 50],
        'min_samples_split': [2, 5, 10, 20],
        'criterion': ['gini', 'entropy'],
    }

    param_grid_random_forest_classifier = {
        'n_estimators': [50, 100],
        'max_depth': [10, 20],
        'bootstrap': [True],
        'criterion': ['gini', 'entropy']
    }

    param_grid_gaussian_naive_bias = {
        'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4]
    }

    param_grid_svc = {
        'C': [0.1, 1, 10, 100, 1000],
        'gamma': [1, 0.1, 0.01, 0.001],
        'kernel': ['rbf', 'poly']
    }

    param_grid_knn = {
        'n_neighbors': [100, 500, 700, 900, 1100],
        'weights': ['uniform', 'distance'],
        'metric': ['minkowski'],
        'p': [1, 2]
    }

    param_grid_ada_boost = {
        'n_estimators': [50, 100],
        'learning_rate': [0.5, 1.0],
        'estimator': [
            DecisionTreeClassifier(max_depth=1)
        ],
    }

    param_grid_xgb = {
        'n_estimators': [50],
        'learning_rate': [0.1, 0.2],
        'subsample': [0.8, 1.0],
        'gamma': [0, 0.1],
    }

    param_grid_grad_boost = {
      'n_estimators': [50, 100],
      'learning_rate': [0.2],
      'max_depth': [3],
      'random_state': [42]
    }

    param_grid_ann = {
        'model__n_neurons': [16],
        'model__activation': ['relu', 'tanh'],
        'epochs': [100, 150],
        'batch_size': [50, 100]
    }

    def __init__(self, file_path):
        self.feature_path = file_path
        self.feature_df = self.get_feture()
        self.X, self.y = self.split_feture_and_target()
        # for xgaboost mapping y to start from zero
        self.y = self.map_zero_to_n() 
        self.number_of_categories = self.get_number_of_categories()
        self.X_train, self.X_test, self.y_train, self.y_test = self.get_scale_and_test_train_split()

    # data read and processing section
    def remove_outliear(self, feature_df):
        iso = IsolationForest(contamination=0.01, random_state=42)
        outliers = iso.fit_predict(feature_df)
        data_cleaned = feature_df[outliers == 1]
        return data_cleaned

    def get_feture(self):
        feature_df = pd.read_csv(self.feature_path)
        feature_df = feature_df.iloc[:, 1:] # remove index
        return self.remove_outliear(feature_df)

    def split_feture_and_target(self):
        X = self.feature_df.iloc[:, :-1]
        y = self.feature_df.iloc[:, -1]
        return X, y

    def get_scale_and_test_train_split(self):
        #Scaling
        scaler = StandardScaler()
        scaled_fature = scaler.fit_transform(self.X)
        #test train split
        return train_test_split(scaled_fature, self.y, train_size=.20, random_state=42, stratify=self.y)

    def map_zero_to_n(self):
        unique_values = {val: idx for idx, val in enumerate(self.y.unique())}
        y_mapped = self.y.map(unique_values)
        return y_mapped

    def get_number_of_categories(self):
        return len(self.y.unique())

    def onehot_encode(self):
        self.y_train = to_categorical(self.y_train, num_classes = self.number_of_categories)

    # Cross validation
    def kfold_cross_validation(self, model, n_splits):
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
        kfold_score = cross_val_score(model, self.X, self.y, cv=kf)
        mean_score = np.mean(kfold_score)
        print("\nK-fold cross-validation scores:", kfold_score)
        print("Mean K-fold cross-validation score:", mean_score)

    def stratified_cross_validation(self, model, n_splits):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        skfold_score = cross_val_score(model, self.X, self.y, cv=skf)
        
        mean_score = np.mean(skfold_score)
        print("\nStraified cross validation scores:", skfold_score)
        print("Mean Straified cross-validation score:", mean_score)

    def cross_validation(self, model, n_splits):
        self.kfold_cross_validation(model, n_splits)
        self.stratified_cross_validation(model, n_splits)

    # Hyper-parameter tuning

    def gridSerach(self, estimator, param_grid):
        print("#---------- Grid Search ------------#")

        grid_search = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=3, verbose=0)
        grid_search.fit(self.X_train, self.y_train)

        print("Best parameters: ", grid_search.best_params_)
        print("Best score: ", grid_search.best_score_)
        return grid_search

    def randomSearch(self, estimator, param_grid):
        print("\n#---------- Random Search -----------#")

        random_search = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, n_iter=500, cv=3, random_state=42)
        random_search.fit(self.X_train, self.y_train)

        print("Best parameters: ", random_search.best_params_)
        print("Best score: ", random_search.best_score_)
        return random_search

    def hyper_parameter_tuning(self, model, param_grid):
        grid_search = self.gridSerach(model, param_grid)
        random_search = self.randomSearch(model, param_grid)
        return grid_search if grid_search.best_score_ > random_search.best_score_ else random_search

    # Models section
    def logistic_regression_model(self):
        print("#------------------- #1. Logistic Regression Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(LogisticRegression(), self.param_grid_logistic_regression)
        lrm = tuned_model.best_estimator_

        fit_model_and_generate_metrics(lrm, "Logistic Regression", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(lrm, 3)

    def decission_tree_classifier_model(self):
        print("#-------------------- #2. Decission Tree Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(DecisionTreeClassifier(), self.param_grid_decission_tree_classifier)
        dt = tuned_model.best_estimator_

        fit_model_and_generate_metrics(dt, "Decission Tree Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(dt, 3)

    def random_forest_classifier_model(self):
        print("#-------------------- #3. Random Forest Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(RandomForestClassifier(), self.param_grid_random_forest_classifier)
        rfc = tuned_model.best_estimator_

        fit_model_and_generate_metrics(rfc, "Random Forest Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(rfc, 3)

    def gaussian_naive_bias_classifier_model(self):
        print("#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(GaussianNB(), self.param_grid_gaussian_naive_bias)
        gnb = tuned_model.best_estimator_

        fit_model_and_generate_metrics(gnb, "Gaussian Naive Bias Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(gnb, 3)


    def support_vector_classifier_model(self):
        print("#-------------------- #5. Support Vector Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(SVC(), self.param_grid_svc)
        svc = tuned_model.best_estimator_

        fit_model_and_generate_metrics(svc, "Support Vector Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(svc, 3)


    def knn_classifier_model(self):
        print("#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(KNeighborsClassifier(), self.param_grid_knn)
        knn = tuned_model.best_estimator_

        fit_model_and_generate_metrics(knn, "K-Nearest Neighbors", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(knn, 3)

    def ada_boost_classifier_model(self):
        print("#-------------------- #7. Ada-Boost Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(AdaBoostClassifier(), self.param_grid_ada_boost)
        abc = tuned_model.best_estimator_

        fit_model_and_generate_metrics(abc, "Ada-Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(abc, 3)

    def xg_boost_classifier_model(self):
        print("-------------------- #8. XG Boost Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(XGBClassifier(), self.param_grid_xgb)
        xgb = tuned_model.best_estimator_

        fit_model_and_generate_metrics(xgb, "XG Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(xgb, 3)

    def gradient_boost_model(self):
        print("#-------------------- #9. Gradient Boost Classifier Model --------------------#")

        tuned_model = self.hyper_parameter_tuning(GradientBoostingClassifier(), self.param_grid_grad_boost)
        gb = tuned_model.best_estimator_

        fit_model_and_generate_metrics(gb, "Gradient Boost Classifier", self.X_train, self.X_test, self.y_train, self.y_test)
        self.cross_validation(gb, 3)

    
    # ----------------------- #10. Artificial Neural Net Model ------------------------#
    def ann_kfold_cross_validation(self, model, n_splits=2, epochs=50, batch_size=100):
      kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
      scores = []

      X = self.X
      y = self.y
      for train_index, val_index in kf.split(X):
          X_train, X_val = X.iloc[train_index], X.iloc[val_index]
          y_train, y_val = y.iloc[train_index], y.iloc[val_index]

          y_train = to_categorical(y_train, num_classes=self.number_of_categories)

          model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

          # Evaluate the model
          y_val_pred = np.argmax(model.predict(X_val), axis=1)  # Convert probabilities to class labels
          accuracy = accuracy_score(y_val, y_val_pred)
          scores.append(accuracy)

      print("\nK-fold cross-validation scores:", scores)
      print("Average score:", np.mean(scores))

    def ann_stratified_cross_validation(self, model, n_splits=2, epochs=50, batch_size=100):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        scores = []

        X = self.X
        y = self.y

        for train_index, val_index in skf.split(self.X, self.y):
            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]

            y_train = to_categorical(y_train, num_classes=self.number_of_categories)

            model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

            # Evaluate the model
            y_val_pred = np.argmax(model.predict(X_val), axis=1)  # Convert probabilities to class labels
            accuracy = accuracy_score(y_val, y_val_pred)
            scores.append(accuracy)

        print("\nStratified cross-validation scores:", scores)
        print("Average score:", np.mean(scores))

    def ann_cross_validation(self, model, n_splits=2, epochs=50, batch_size=100):
        self.ann_kfold_cross_validation(model, n_splits, epochs, batch_size)
        self.ann_stratified_cross_validation(model, n_splits, epochs, batch_size)

    @staticmethod
    def build_ann(n_neurons=16, activation='relu'):
        model = Sequential()
        # Input layer
        model.add(Dense(n_neurons, activation=activation, input_shape=(25,)))

        model.add(Dense(n_neurons, activation=activation))
        model.add(Dense(n_neurons, activation=activation))
        model.add(Dense(n_neurons, activation=activation))

        # Output layer (example for binary classification)
        model.add(Dense(units=15, activation='softmax'))

        model.compile(optimizer='adam',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

        return model

    def ann_model(self):
        print("#----------------------- #10. Artificial Neural Net Model ------------------------#")

        y_train_tmp = self.y_train

        self.onehot_encode()

        model = KerasClassifier(build_fn=self.build_ann, verbose=0, epochs = 50, batch_size = 100)

        tuned_model = self.hyper_parameter_tuning(model, self.param_grid_ann)
        ann = tuned_model.best_estimator_

        ann.fit(self.X_train, self.y_train)
        y_pred = ann.predict(self.X_test)
        y_pred_classes = np.argmax(y_pred, axis=1)

        calculate_metrics("Artificial Neural Net", self.y_test, y_pred_classes)
        self.ann_cross_validation(ann)

        self.y_train = y_train_tmp
        
    def driver(self):
        self.run_ann_model()


In [8]:
path = '../data/Processed_Features/'

In [22]:
# Evaluation for W100_O25_Features
evaluate_model = ModelTuningAndEvaluation(path + "W100_O25_Features.csv")

In [25]:
# 1. Logistic Regression on W100_O25_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'C': 100, 'max_iter': 100, 'penalty': 'l2', 'solver': 'liblinear'}
Best score:  0.3515655965120888

#---------- Random Search -----------#
Best parameters:  {'solver': 'liblinear', 'penalty': 'l2', 'max_iter': 100, 'C': 100}
Best score:  0.3515655965120888
Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.26      0.43      0.32      2021
           1       0.37      0.57      0.45      2114
           2       0.26      0.24      0.25      2278
           3       0.22      0.51      0.31      2224
           4       0.32      0.33      0.33      2270
           5       0.32      0.38      0.35      1974
           6       0.27      0.12      0.16      2084
           7       0.16      0.13      0.14      2222
           8       0.60      0.80      0.69      1650
           9       0.87      0.73      0.80     

In [27]:
# 2. Decision Tree Classifier on W100_O25_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'criterion': 'entropy', 'max_depth': 50, 'min_samples_split': 5}
Best score:  0.4975558197912538

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': 20, 'criterion': 'entropy'}
Best score:  0.5001981767736822
Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.68      0.69      0.69      2021
           1       0.83      0.74      0.79      2114
           2       0.56      0.63      0.60      2278
           3       0.62      0.59      0.61      2224
           4       0.51      0.48      0.49      2270
           5       0.34      0.34      0.34      1974
           6       0.28      0.27      0.27      2084
           7       0.40      0.41      0.41      2222
           8       0.64      0.69      0.67      1650
           9       0.83      0.83      

In [28]:
# 3. Random Forest Classifier on W100_O25_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'n_estimators': 500}
Best score:  0.6364116792178623

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 200, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': False}
Best score:  0.6356189721231339
Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.75      0.85      0.80      2021
           1       0.87      0.87      0.87      2114
           2       0.66      0.74      0.70      2278
           3       0.68      0.69      0.69      2224
           4       0.59      0.69      0.64      2270
           5       0.40      0.61      0.49      1974
           6       0.49      0.32      0.39      2084
           7       0.49      0.54      0.51      2222
           8       0.72      0.85      0.78      1650
     

In [29]:
# 4. Gaussian Naive Bayes on W100_O25_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.2212973972783723

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.2212973972783723
Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.11      0.88      0.20      2021
           1       0.10      0.02      0.03      2114
           2       0.11      0.07      0.09      2278
           3       0.09      0.04      0.05      2224
           4       0.23      0.13      0.16      2270
           5       0.30      0.18      0.22      1974
           6       0.16      0.07      0.10      2084
           7       0.11      0.06      0.08      2222
           8       0.52      0.65      0.58      1650
           9       0.70      0.70      0.70      1698
          10       0.49      0.57      0.52      1996
   

In [30]:
# 5. Support Vector Classifier on W100_O25_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5768265292641036

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5768265292641036
Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.75      0.78      0.76      2021
           1       0.83      0.85      0.84      2114
           2       0.56      0.72      0.63      2278
           3       0.59      0.68      0.63      2224
           4       0.59      0.61      0.60      2270
           5       0.38      0.41      0.39      1974
           6       0.41      0.33      0.37      2084
           7       0.49      0.46      0.47      2222
           8       0.72      0.78      0.75      1650
           9       0.73      0.88      0.80      1698
          10       0.63      0.

In [31]:
# 6. K-Nearest Neighbors on W100_O25_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.49742370194213237

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.49742370194213237
K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.64      0.73      0.68      2021
           1       0.72      0.79      0.75      2114
           2       0.44      0.65      0.52      2278
           3       0.47      0.73      0.57      2224
           4       0.55      0.50      0.52      2270
           5       0.33      0.40      0.36      1974
           6       0.37      0.17      0.24      2084
           7       0.31      0.38      0.34      2222
           8       0.49      0.86      0.62      1650
           9       0

In [32]:
# 7. AdaBoost Classifier on W100_O25_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#-------- Grid Search --------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=3), 'learning_rate': 0.01, 'n_estimators': 100}
Best score:  0.3361078081648831

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'learning_rate': 0.01, 'estimator': DecisionTreeClassifier(max_depth=3)}
Best score:  0.3361078081648831
Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.34      0.61      0.44      2021
           1       0.76      0.28      0.41      2114
           2       0.27      0.54      0.36      2278
           3       0.36      0.47      0.41      2224
           4       0.32      0.47      0.38      2270
           5       0.26      0.66      0.37      1974
           6       0.16      0.02      0.04      2084
           7       0.23      0.23      0.23      2222
           8       0.71      0.66      0.69  

In [42]:
# 8. Gradient Boost on W100_O25_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 100, 'random_state': 42}
Best score:  0.6123662306777646

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.2}
Best score:  0.6123662306777646
Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.75      0.82      0.78      2021
           1       0.86      0.84      0.85      2114
           2       0.64      0.70      0.66      2278
           3       0.66      0.68      0.67      2224
           4       0.62      0.64      0.63      2270
           5       0.40      0.52      0.45      1974
           6       0.38      0.33      0.36      2084
           7       0.47      0.52      0.49      2222
           8       0.76      0.82      0.79      1650
     

In [23]:
# 9. XGBoost Classifier on W100_O25_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0.1, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 1.0}
Best score:  0.6206896551724138

#---------- Random Search -----------#
Best parameters:  {'subsample': 1.0, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0.1}
Best score:  0.6206896551724138

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.73      0.82      0.77      2021
           1       0.84      0.87      0.85      2114
           2       0.60      0.72      0.66      2278
           3       0.62      0.70      0.66      2224
           4       0.63      0.67      0.65      2270
           5       0.41      0.58      0.48      1974
           6       0.42      0.30      0.35      2084
           7       0.47      0.52      0.49      2222
           8       0.74      0.85      0.79      1650
           9       0.91    

In [24]:
# 10. Artificial Neural Network on W100_O25_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.4908178094860616

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.4898929845422117

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.61      0.69      0.65      2021
           1       0.72      0.78      0.75      2114
           2       0.45      0.55      0.49      2278
           3       0.46      0.63      0.53      2224
           4       0.55      0.59      0.57      2270
           5       0.37      0.44      0.41      1974
           6       0.22      0.17      0.19      2084
           7       0.35      0.37      0.36      2222
           8       0.74      0.74      0.74 

In [25]:
# Evaluation for W100_O50_Features
evaluate_model = ModelTuningAndEvaluation(path + "W100_O50_Features.csv")

In [35]:
# 1. Logistic Regression on W100_O50_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.3527498002302693

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 100}
Best score:  0.3527498002302693
Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.28      0.39      0.32      3031
           1       0.38      0.63      0.48      3168
           2       0.27      0.27      0.27      3416
           3       0.22      0.58      0.32      3333
           4       0.34      0.33      0.33      3404
           5       0.36      0.38      0.37      2958
           6       0.29      0.14      0.19      3123
           7       0.16      0.15      0.16      3334
           8       0.61      0.78      0.69      2473
           9       0.88      0.75      0.81      2

In [36]:
# 2. Decision Tree Classifier on W100_O50_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 2}
Best score:  0.5348086262128361

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': None, 'criterion': 'entropy'}
Best score:  0.533663244033037
Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.72      0.73      0.73      3031
           1       0.82      0.82      0.82      3168
           2       0.62      0.63      0.63      3416
           3       0.64      0.64      0.64      3333
           4       0.54      0.53      0.54      3404
           5       0.35      0.35      0.35      2958
           6       0.32      0.31      0.32      3123
           7       0.45      0.46      0.46      3334
           8       0.68      0.73      0.71      2473
           9       0.84      0

In [26]:
# 3. Random Forest Classifier on W100_O50_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.6326227313513809

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': True}
Best score:  0.6313890259983256

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.79      0.83      0.81      3031
           1       0.85      0.90      0.88      3168
           2       0.67      0.76      0.71      3416
           3       0.71      0.72      0.72      3333
           4       0.60      0.71      0.65      3404
           5       0.43      0.60      0.50      2958
           6       0.50      0.34      0.41      3123
           7       0.51      0.60      0.55      3334
           8       0.71      0.85      0.78      2473
  

In [81]:
# 4. Gaussian Naive Bayes on W100_O50_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.24656051018036984

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.24656051018036984

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.16      0.81      0.27      3031
           1       0.12      0.07      0.09      3168
           2       0.11      0.05      0.07      3416
           3       0.19      0.50      0.27      3333
           4       0.23      0.13      0.16      3404
           5       0.30      0.17      0.22      2958
           6       0.14      0.07      0.09      3123
           7       0.13      0.08      0.10      3334
           8       0.53      0.62      0.57      2473
           9       0.80      0.71      0.76      2536
          10       0.47      0.58      0.52   

In [82]:
# 5. Support Vector Classifier on W100_O50_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5836261324575477

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5836261324575477

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.79      0.81      0.80      3031
           1       0.86      0.87      0.86      3168
           2       0.61      0.74      0.67      3416
           3       0.61      0.71      0.66      3333
           4       0.61      0.65      0.63      3404
           5       0.43      0.50      0.46      2958
           6       0.46      0.35      0.40      3123
           7       0.52      0.48      0.50      3334
           8       0.75      0.81      0.78      2473
           9       0.89      0.88      0.88      2536
          10       0.62 

In [83]:
# 6. K-Nearest Neighbors on W100_O50_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.5176236532579929

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.5176236532579929

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.67      0.66      0.67      3031
           1       0.71      0.84      0.77      3168
           2       0.46      0.71      0.56      3416
           3       0.54      0.67      0.60      3333
           4       0.55      0.53      0.54      3404
           5       0.36      0.40      0.38      2958
           6       0.43      0.20      0.27      3123
           7       0.34      0.48      0.40      3334
           8       0.50      0.86      0.64      2473
           9   

In [84]:
# 7. AdaBoost Classifier on W100_O50_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 100}
Best score:  0.2658628162888713

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.2658628162888713

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.21      0.30      0.24      3031
           1       0.22      0.49      0.30      3168
           2       0.23      0.21      0.22      3416
           3       0.11      0.01      0.02      3333
           4       0.26      0.29      0.27      3404
           5       0.35      0.34      0.34      2958
           6       0.18      0.05      0.08      3123
           7       0.15      0.34      0.20      3334
           8       0.37      0.59      0

In [27]:
# 8. Gradient Boost on W100_O50_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5990479968765609

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.5990479968765609

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.75      0.78      0.77      3031
           1       0.81      0.88      0.85      3168
           2       0.61      0.72      0.66      3416
           3       0.64      0.68      0.66      3333
           4       0.61      0.68      0.64      3404
           5       0.43      0.54      0.48      2958
           6       0.41      0.31      0.35      3123
           7       0.47      0.54      0.50      3334
           8       0.75      0.83      0.79      2473
    

In [28]:
# 9. XGBoost Classifier on W100_O50_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 0.8}
Best score:  0.636147340852755

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0}
Best score:  0.636147340852755

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.80      0.82      0.81      3031
           1       0.84      0.90      0.87      3168
           2       0.64      0.74      0.68      3416
           3       0.68      0.72      0.70      3333
           4       0.64      0.71      0.67      3404
           5       0.46      0.60      0.52      2958
           6       0.47      0.34      0.39      3123
           7       0.48      0.57      0.52      3334
           8       0.76      0.86      0.81      2473
           9       0.93      0.90

In [29]:
# 10. Artificial Neural Network on W100_O50_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.5115437059452934

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.5071371694060829

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.58      0.71      0.64      3031
           1       0.76      0.78      0.77      3168
           2       0.46      0.70      0.56      3416
           3       0.50      0.59      0.54      3333
           4       0.61      0.60      0.61      3404
           5       0.38      0.47      0.42      2958
           6       0.28      0.12      0.16      3123
           7       0.36      0.48      0.42      3334
           8       0.73      0.81      0.77 

In [30]:
# Evaluation for W200_O25_Features
evaluate_model = ModelTuningAndEvaluation(path + "W200_O25_Features.csv")

In [31]:
# 1. Logistic Regression on W200_O25_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.37927131118202295

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 10}
Best score:  0.37927131118202295

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.27      0.41      0.33      1008
           1       0.39      0.57      0.46      1053
           2       0.27      0.34      0.30      1135
           3       0.25      0.57      0.35      1108
           4       0.36      0.34      0.35      1135
           5       0.42      0.43      0.43       986
           6       0.35      0.22      0.27      1038
           7       0.23      0.13      0.17      1109
           8       0.64      0.77      0.70       822
           9       0.84      0.80      0.82      

In [32]:
# 2. Decision Tree Classifier on W200_O25_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'gini', 'max_depth': 50, 'min_samples_split': 2}
Best score:  0.46805699423978236

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': 50, 'criterion': 'gini'}
Best score:  0.4691185640223967

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.70      0.65      0.67      1008
           1       0.74      0.75      0.75      1053
           2       0.55      0.54      0.55      1135
           3       0.58      0.56      0.57      1108
           4       0.48      0.48      0.48      1135
           5       0.36      0.33      0.34       986
           6       0.24      0.25      0.25      1038
           7       0.40      0.41      0.41      1109
           8       0.65      0.65      0.65       822
           9       0.87      0.83    

In [33]:
# 3. Random Forest Classifier on W200_O25_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'gini', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.5809664079353817

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'gini', 'bootstrap': True}
Best score:  0.5812273736603374

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.80      0.77      1008
           1       0.84      0.87      0.85      1053
           2       0.59      0.71      0.64      1135
           3       0.63      0.67      0.65      1108
           4       0.53      0.69      0.60      1135
           5       0.43      0.61      0.51       986
           6       0.45      0.34      0.39      1038
           7       0.46      0.51      0.48      1109
           8       0.74      0.82      0.78       822
        

In [34]:
# 4. Gaussian Naive Bayes on W200_O25_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.23455780643763235

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.23455780643763235

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.12      0.86      0.21      1008
           1       0.16      0.06      0.09      1053
           2       0.15      0.10      0.12      1135
           3       0.13      0.05      0.07      1108
           4       0.22      0.12      0.15      1135
           5       0.33      0.21      0.26       986
           6       0.23      0.15      0.18      1038
           7       0.11      0.07      0.09      1109
           8       0.55      0.61      0.58       822
           9       0.83      0.75      0.79       862
          10       0.46      0.61      0.53     

In [35]:
# 5. Support Vector Classifier on W200_O25_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.548103065862437

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.548103065862437

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.77      0.80      0.78      1008
           1       0.83      0.84      0.84      1053
           2       0.59      0.67      0.63      1135
           3       0.60      0.67      0.63      1108
           4       0.59      0.59      0.59      1135
           5       0.38      0.50      0.43       986
           6       0.41      0.34      0.37      1038
           7       0.45      0.46      0.46      1109
           8       0.71      0.79      0.75       822
           9       0.88      0.84      0.86       862
          10       0.59   

In [36]:
# 6. K-Nearest Neighbors on W200_O25_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.45613035469837376

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.45613035469837376

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.58      0.76      0.65      1008
           1       0.61      0.73      0.66      1053
           2       0.38      0.61      0.47      1135
           3       0.46      0.63      0.53      1108
           4       0.48      0.40      0.44      1135
           5       0.36      0.41      0.38       986
           6       0.42      0.22      0.29      1038
           7       0.31      0.42      0.36      1109
           8       0.49      0.82      0.61       822
           9 

In [37]:
# 7. AdaBoost Classifier on W200_O25_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 50}
Best score:  0.2274027502161715

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.2274027502161715

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.29      0.11      0.16      1008
           1       0.30      0.40      0.34      1053
           2       0.14      0.35      0.20      1135
           3       0.17      0.18      0.18      1108
           4       0.19      0.35      0.25      1135
           5       0.31      0.59      0.40       986
           6       0.16      0.07      0.10      1038
           7       0.13      0.07      0.09      1109
           8       0.23      0.12      0.1

In [38]:
# 8. Gradient Boost on W200_O25_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5616163685797267

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.5616163685797267

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.71      0.75      0.73      1008
           1       0.84      0.84      0.84      1053
           2       0.60      0.63      0.62      1135
           3       0.59      0.65      0.62      1108
           4       0.60      0.62      0.61      1135
           5       0.44      0.56      0.50       986
           6       0.37      0.36      0.36      1038
           7       0.45      0.49      0.47      1109
           8       0.74      0.83      0.78       822
    

In [39]:
# 9. XGBoost Classifier on W200_O25_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0.1, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 0.8}
Best score:  0.5928928366805666

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0.1}
Best score:  0.5928928366805666

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.76      0.78      0.77      1008
           1       0.84      0.87      0.85      1053
           2       0.63      0.68      0.66      1135
           3       0.65      0.68      0.66      1108
           4       0.65      0.66      0.65      1135
           5       0.47      0.63      0.54       986
           6       0.44      0.37      0.40      1038
           7       0.45      0.53      0.49      1109
           8       0.78      0.83      0.80       822
           9       0.91    

In [40]:
# 10. Artificial Neural Network on W200_O25_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.4648817707220066

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.4709674577005758

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.49      0.71      0.58      1008
           1       0.65      0.63      0.64      1053
           2       0.45      0.52      0.48      1135
           3       0.49      0.58      0.53      1108
           4       0.57      0.58      0.58      1135
           5       0.42      0.46      0.44       986
           6       0.26      0.23      0.24      1038
           7       0.37      0.35      0.36      1109
           8       0.71      0.77      0.74 

In [41]:
# Evaluation for W200_O50_Features
evaluate_model = ModelTuningAndEvaluation(path + "W200_O50_Features.csv")

In [42]:
# 1. Logistic Regression on W200_O50_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.3752855166069051

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 100, 'C': 10}
Best score:  0.3752855166069051

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.27      0.41      0.33      1507
           1       0.40      0.59      0.48      1574
           2       0.27      0.31      0.29      1698
           3       0.27      0.58      0.37      1656
           4       0.35      0.32      0.33      1693
           5       0.39      0.43      0.41      1474
           6       0.38      0.20      0.27      1554
           7       0.21      0.20      0.21      1657
           8       0.65      0.77      0.70      1229
           9       0.87      0.78      0.83      12

In [43]:
# 2. Decision Tree Classifier on W200_O50_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'entropy', 'max_depth': 50, 'min_samples_split': 2}
Best score:  0.509308180724754

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': 50, 'criterion': 'entropy'}
Best score:  0.5057634010881492

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.71      0.67      0.69      1507
           1       0.79      0.78      0.79      1574
           2       0.58      0.58      0.58      1698
           3       0.59      0.64      0.61      1656
           4       0.50      0.49      0.50      1693
           5       0.41      0.39      0.40      1474
           6       0.31      0.31      0.31      1554
           7       0.43      0.44      0.43      1657
           8       0.71      0.70      0.71      1229
           9       0.90      0.85

In [44]:
# 3. Random Forest Classifier on W200_O50_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.620812360258049

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': True}
Best score:  0.6179769890016251

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.77      0.82      0.79      1507
           1       0.85      0.89      0.87      1574
           2       0.65      0.77      0.70      1698
           3       0.65      0.73      0.69      1656
           4       0.57      0.69      0.62      1693
           5       0.45      0.65      0.53      1474
           6       0.50      0.39      0.43      1554
           7       0.53      0.57      0.55      1657
           8       0.77      0.87      0.82      1229
    

In [45]:
# 4. Gaussian Naive Bayes on W200_O50_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.23949112249784144

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.23949112249784144

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.34      0.47      0.40      1507
           1       0.20      0.13      0.16      1574
           2       0.10      0.06      0.08      1698
           3       0.16      0.85      0.27      1656
           4       0.23      0.15      0.18      1693
           5       0.33      0.21      0.26      1474
           6       0.19      0.07      0.10      1554
           7       0.11      0.08      0.09      1657
           8       0.53      0.63      0.58      1229
           9       0.75      0.72      0.74      1294
          10       0.53      0.60      0.56   

In [46]:
# 5. Support Vector Classifier on W200_O50_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5802164986935424

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5802164986935424

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.76      0.79      0.78      1507
           1       0.81      0.89      0.85      1574
           2       0.62      0.71      0.66      1698
           3       0.59      0.69      0.64      1656
           4       0.61      0.65      0.63      1693
           5       0.44      0.49      0.46      1474
           6       0.45      0.38      0.41      1554
           7       0.49      0.47      0.48      1657
           8       0.76      0.81      0.78      1229
           9       0.91      0.87      0.89      1294
          10       0.66 

In [47]:
# 6. K-Nearest Neighbors on W200_O50_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.48626013777189586

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.48626013777189586

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.67      0.71      0.69      1507
           1       0.67      0.80      0.73      1574
           2       0.43      0.71      0.53      1698
           3       0.51      0.65      0.57      1656
           4       0.55      0.43      0.48      1693
           5       0.38      0.45      0.41      1474
           6       0.42      0.24      0.30      1554
           7       0.31      0.47      0.37      1657
           8       0.50      0.86      0.63      1229
           9 

In [48]:
# 7. AdaBoost Classifier on W200_O50_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 100}
Best score:  0.2329390395179869

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.2329390395179869

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.27      0.18      0.21      1507
           1       0.17      0.33      0.23      1574
           2       0.17      0.39      0.24      1698
           3       0.20      0.09      0.13      1656
           4       0.45      0.13      0.20      1693
           5       0.32      0.63      0.42      1474
           6       0.29      0.05      0.08      1554
           7       0.19      0.35      0.25      1657
           8       0.37      0.24      0

In [49]:
# 8. Gradient Boost on W200_O50_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.6075193894902741

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.6075193894902741

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.76      0.75      1507
           1       0.82      0.84      0.83      1574
           2       0.66      0.73      0.69      1698
           3       0.65      0.71      0.68      1656
           4       0.63      0.69      0.66      1693
           5       0.46      0.57      0.51      1474
           6       0.40      0.37      0.38      1554
           7       0.48      0.52      0.50      1657
           8       0.79      0.83      0.81      1229
    

In [50]:
# 9. XGBoost Classifier on W200_O50_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0.1, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 0.8}
Best score:  0.6255994057785771

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0.1}
Best score:  0.6255994057785771

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.77      0.79      0.78      1507
           1       0.84      0.89      0.87      1574
           2       0.69      0.75      0.72      1698
           3       0.66      0.75      0.70      1656
           4       0.66      0.73      0.69      1693
           5       0.49      0.63      0.55      1474
           6       0.46      0.39      0.42      1554
           7       0.50      0.54      0.52      1657
           8       0.80      0.86      0.83      1229
           9       0.94    

In [51]:
# 10. Artificial Neural Network on W200_O50_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.4997320159414224

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.48271865727072893

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.55      0.75      0.64      1507
           1       0.77      0.66      0.71      1574
           2       0.49      0.61      0.55      1698
           3       0.46      0.63      0.53      1656
           4       0.52      0.62      0.56      1693
           5       0.40      0.49      0.44      1474
           6       0.34      0.16      0.22      1554
           7       0.40      0.42      0.41      1657
           8       0.77      0.75      0.76

In [9]:
# Evaluation for W300_O25_Features
evaluate_model = ModelTuningAndEvaluation(path + "W300_O25_Features.csv")

In [164]:
# 1. Logistic Regression on W300_O25_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.38751462123310865

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 10}
Best score:  0.38751462123310865

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.30      0.43      0.36       669
           1       0.41      0.59      0.48       698
           2       0.31      0.36      0.34       753
           3       0.25      0.52      0.34       734
           4       0.37      0.36      0.36       750
           5       0.46      0.44      0.45       654
           6       0.43      0.27      0.33       689
           7       0.21      0.17      0.19       734
           8       0.62      0.77      0.69       545
           9       0.84      0.86      0.85      

In [165]:
# 2. Decision Tree Classifier on W300_O25_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'gini', 'max_depth': None, 'min_samples_split': 5}
Best score:  0.4555591467356173

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 5, 'max_depth': 20, 'criterion': 'gini'}
Best score:  0.45676058885092496

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.62      0.63      0.62       669
           1       0.71      0.68      0.70       698
           2       0.43      0.55      0.49       753
           3       0.49      0.55      0.52       734
           4       0.38      0.37      0.38       750
           5       0.37      0.39      0.38       654
           6       0.25      0.26      0.25       689
           7       0.39      0.36      0.37       734
           8       0.63      0.69      0.66       545
           9       0.79      0.88  

In [166]:
# 3. Random Forest Classifier on W300_O25_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'gini', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.572048530950842

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': True}
Best score:  0.5724506148613292

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.75      0.75       669
           1       0.83      0.84      0.83       698
           2       0.55      0.70      0.62       753
           3       0.60      0.68      0.64       734
           4       0.49      0.58      0.53       750
           5       0.45      0.66      0.53       654
           6       0.45      0.38      0.41       689
           7       0.47      0.50      0.49       734
           8       0.72      0.91      0.80       545
      

In [167]:
# 4. Gaussian Naive Bayes on W300_O25_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.26380744605534523

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.26380744605534523

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.16      0.85      0.26       669
           1       0.24      0.06      0.10       698
           2       0.19      0.14      0.16       753
           3       0.15      0.21      0.17       734
           4       0.27      0.17      0.21       750
           5       0.36      0.24      0.29       654
           6       0.27      0.18      0.21       689
           7       0.14      0.10      0.12       734
           8       0.49      0.59      0.53       545
           9       0.70      0.75      0.73       571
          10       0.48      0.61      0.54   

In [168]:
# 5. Support Vector Classifier on W300_O25_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5520376419798689

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5520376419798689

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.73      0.74       669
           1       0.81      0.85      0.83       698
           2       0.59      0.63      0.61       753
           3       0.56      0.67      0.61       734
           4       0.57      0.58      0.58       750
           5       0.37      0.52      0.43       654
           6       0.44      0.32      0.37       689
           7       0.44      0.41      0.42       734
           8       0.73      0.80      0.76       545
           9       0.70      0.92      0.79       571
          10       0.74 

In [169]:
# 6. K-Nearest Neighbors on W300_O25_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.43275002308615756

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.43275002308615756

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.52      0.78      0.62       669
           1       0.55      0.67      0.60       698
           2       0.38      0.66      0.48       753
           3       0.44      0.68      0.53       734
           4       0.56      0.31      0.40       750
           5       0.42      0.37      0.39       654
           6       0.31      0.25      0.28       689
           7       0.30      0.40      0.34       734
           8       0.46      0.83      0.60       545
           9 

In [170]:
# 7. AdaBoost Classifier on W300_O25_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 100}
Best score:  0.23858341413488474

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.23858341413488474

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.30      0.58      0.40       669
           1       0.25      0.18      0.21       698
           2       0.18      0.37      0.24       753
           3       0.24      0.23      0.23       734
           4       0.35      0.18      0.24       750
           5       0.36      0.45      0.40       654
           6       0.20      0.10      0.14       689
           7       0.17      0.15      0.16       734
           8       0.17      0.24     

In [171]:
# 8. Gradient Boost on W300_O25_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5628458114014837

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.5628458114014837

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.71      0.69      0.70       669
           1       0.79      0.82      0.80       698
           2       0.61      0.67      0.64       753
           3       0.59      0.62      0.60       734
           4       0.62      0.60      0.61       750
           5       0.46      0.58      0.51       654
           6       0.41      0.38      0.40       689
           7       0.43      0.51      0.47       734
           8       0.74      0.84      0.79       545
    

In [172]:
# 9. XGBoost Classifier on W300_O25_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 0.8}
Best score:  0.5968709599224306

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0}
Best score:  0.5968709599224306

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.72      0.71      0.72       669
           1       0.81      0.85      0.83       698
           2       0.58      0.67      0.62       753
           3       0.63      0.68      0.66       734
           4       0.62      0.61      0.62       750
           5       0.51      0.66      0.57       654
           6       0.47      0.42      0.44       689
           7       0.45      0.54      0.49       734
           8       0.77      0.89      0.82       545
           9       0.89      0.

In [10]:
# 10. Artificial Neural Network on W300_O25_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'relu', 'model__n_neurons': 16}
Best score:  0.4399485755840798

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.43314489257241356

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.54      0.55      0.55       669
           1       0.60      0.70      0.65       698
           2       0.38      0.59      0.46       753
           3       0.39      0.56      0.46       734
           4       0.46      0.41      0.43       750
           5       0.38      0.40      0.39       654
           6       0.35      0.17      0.23       689
           7       0.29      0.31      0.30       734
           8       0.65      0.74      0.69

In [11]:
# Evaluation for W300_O50_Features
evaluate_model = ModelTuningAndEvaluation(path + "W300_O50_Features.csv")

In [12]:
# 1. Logistic Regression on W300_O50_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.4024584215793117

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 10}
Best score:  0.4024584215793117

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.32      0.45      0.37       999
           1       0.43      0.61      0.51      1045
           2       0.31      0.33      0.32      1127
           3       0.27      0.58      0.37      1099
           4       0.39      0.34      0.37      1123
           5       0.45      0.49      0.47       980
           6       0.41      0.33      0.37      1032
           7       0.21      0.17      0.19      1099
           8       0.69      0.75      0.72       816
           9       0.88      0.82      0.85       8

In [13]:
# 2. Decision Tree Classifier on W300_O50_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 2}
Best score:  0.5042730758949684

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': None, 'criterion': 'entropy'}
Best score:  0.5013341849498626

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.70      0.68      0.69       999
           1       0.76      0.78      0.77      1045
           2       0.57      0.58      0.57      1127
           3       0.59      0.59      0.59      1099
           4       0.51      0.48      0.49      1123
           5       0.44      0.41      0.42       980
           6       0.32      0.33      0.33      1032
           7       0.43      0.45      0.44      1099
           8       0.66      0.66      0.66       816
           9       0.87     

In [14]:
# 3. Random Forest Classifier on W300_O50_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.611169156402717

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': True}
Best score:  0.6202533516336645

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.77      0.81      0.79       999
           1       0.81      0.87      0.84      1045
           2       0.62      0.76      0.68      1127
           3       0.65      0.72      0.68      1099
           4       0.56      0.68      0.61      1123
           5       0.48      0.64      0.55       980
           6       0.48      0.44      0.46      1032
           7       0.55      0.56      0.55      1099
           8       0.77      0.88      0.82       816
   

In [15]:
# 4. Gaussian Naive Bayes on W300_O50_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.25708281071151956

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.25708281071151956

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.13      0.86      0.23       999
           1       0.20      0.06      0.10      1045
           2       0.14      0.10      0.11      1127
           3       0.12      0.07      0.09      1099
           4       0.26      0.17      0.21      1123
           5       0.38      0.27      0.32       980
           6       0.27      0.19      0.22      1032
           7       0.11      0.07      0.09      1099
           8       0.55      0.56      0.56       816
           9       0.75      0.74      0.75       851
          10       0.52      0.65      0.57     

In [16]:
# 5. Support Vector Classifier on W300_O50_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5702810677249329

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5702810677249329

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.79      0.79      0.79       999
           1       0.82      0.83      0.83      1045
           2       0.59      0.70      0.64      1127
           3       0.60      0.69      0.64      1099
           4       0.60      0.63      0.61      1123
           5       0.47      0.54      0.50       980
           6       0.43      0.38      0.40      1032
           7       0.54      0.48      0.51      1099
           8       0.76      0.78      0.77       816
           9       0.90      0.90      0.90       851
          10       0.72 

In [17]:
# 6. K-Nearest Neighbors on W300_O50_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.4599153352661773

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.4599153352661773

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.61      0.71      0.65       999
           1       0.59      0.74      0.65      1045
           2       0.43      0.70      0.54      1127
           3       0.45      0.71      0.55      1099
           4       0.56      0.45      0.50      1123
           5       0.40      0.46      0.43       980
           6       0.36      0.28      0.32      1032
           7       0.38      0.43      0.40      1099
           8       0.48      0.83      0.61       816
           9   

In [18]:
# 7. AdaBoost Classifier on W300_O50_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 50}
Best score:  0.2124584044441703

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.2124584044441703

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.24      0.42      0.31       999
           1       0.02      0.00      0.01      1045
           2       0.11      0.12      0.12      1127
           3       0.09      0.04      0.06      1099
           4       0.16      0.15      0.15      1123
           5       0.34      0.59      0.43       980
           6       0.12      0.22      0.16      1032
           7       0.11      0.00      0.01      1099
           8       0.30      0.22      0.2

In [19]:
# 8. Gradient Boost on W300_O50_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5905928501908856

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.5905928501908856

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.78      0.75      0.76       999
           1       0.81      0.88      0.84      1045
           2       0.62      0.71      0.66      1127
           3       0.65      0.66      0.66      1099
           4       0.65      0.67      0.66      1123
           5       0.50      0.58      0.53       980
           6       0.40      0.38      0.39      1032
           7       0.46      0.50      0.48      1099
           8       0.79      0.84      0.82       816
    

In [20]:
# 9. XGBoost Classifier on W300_O50_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 0.8}
Best score:  0.6229268620758196

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0}
Best score:  0.6229268620758196

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.78      0.79      0.78       999
           1       0.83      0.89      0.86      1045
           2       0.65      0.73      0.69      1127
           3       0.70      0.72      0.71      1099
           4       0.68      0.68      0.68      1123
           5       0.51      0.65      0.57       980
           6       0.48      0.41      0.44      1032
           7       0.49      0.57      0.53      1099
           8       0.81      0.86      0.83       816
           9       0.94      0.

In [21]:
# 10. Artificial Neural Network on W300_O50_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.4935846030473136

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'relu', 'epochs': 150, 'batch_size': 100}
Best score:  0.4791503882823049

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.58      0.62      0.60       999
           1       0.71      0.72      0.71      1045
           2       0.43      0.46      0.44      1127
           3       0.38      0.73      0.50      1099
           4       0.57      0.58      0.58      1123
           5       0.48      0.51      0.49       980
           6       0.49      0.28      0.36      1032
           7       0.31      0.34      0.32      1099
           8       0.72      0.79      0.75

In [152]:
# Evaluation for W400_O25_Features
evaluate_model = ModelTuningAndEvaluation(path + "W400_O25_Features.csv")

In [162]:
# 1. Logistic Regression on W400_O25_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.4081640780565512

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 10}
Best score:  0.4081640780565512

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.33      0.48      0.39       498
           1       0.45      0.64      0.53       520
           2       0.32      0.38      0.35       561
           3       0.28      0.49      0.36       546
           4       0.39      0.33      0.35       558
           5       0.45      0.48      0.47       488
           6       0.44      0.38      0.41       514
           7       0.24      0.24      0.24       548
           8       0.63      0.68      0.66       406
           9       0.92      0.86      0.89       4

In [154]:
# 2. Decision Tree Classifier on W400_O25_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'entropy', 'max_depth': 20, 'min_samples_split': 2}
Best score:  0.46670562568178275

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 5, 'max_depth': 20, 'criterion': 'entropy'}
Best score:  0.4624149394836632

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.61      0.66      0.64       498
           1       0.76      0.73      0.74       520
           2       0.53      0.61      0.57       561
           3       0.50      0.56      0.53       546
           4       0.46      0.45      0.45       558
           5       0.42      0.37      0.40       488
           6       0.27      0.27      0.27       514
           7       0.39      0.40      0.40       548
           8       0.60      0.59      0.59       406
           9       0.83      0.

In [155]:
# 3. Random Forest Classifier on W400_O25_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'gini', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.5478087718386924

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'gini', 'bootstrap': True}
Best score:  0.5478139663740411

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.72      0.76      0.74       498
           1       0.75      0.83      0.79       520
           2       0.54      0.75      0.63       561
           3       0.57      0.64      0.60       546
           4       0.50      0.62      0.55       558
           5       0.48      0.63      0.54       488
           6       0.43      0.42      0.42       514
           7       0.49      0.43      0.45       548
           8       0.68      0.87      0.76       406
        

In [156]:
# 4. Gaussian Naive Bayes on W400_O25_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.2814148182778384

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.2814148182778384

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.14      0.91      0.25       498
           1       0.15      0.04      0.07       520
           2       0.24      0.12      0.17       561
           3       0.13      0.07      0.09       546
           4       0.25      0.19      0.22       558
           5       0.37      0.27      0.31       488
           6       0.35      0.28      0.31       514
           7       0.15      0.10      0.12       548
           8       0.53      0.62      0.57       406
           9       0.70      0.75      0.72       426
          10       0.56      0.65      0.60       

In [157]:
# 5. Support Vector Classifier on W400_O25_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5290002250965319

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 100}
Best score:  0.5290002250965319

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.71      0.72      0.71       498
           1       0.76      0.81      0.78       520
           2       0.56      0.76      0.65       561
           3       0.50      0.65      0.56       546
           4       0.57      0.52      0.54       558
           5       0.42      0.52      0.47       488
           6       0.47      0.39      0.43       514
           7       0.51      0.43      0.46       548
           8       0.75      0.80      0.77       406
           9       0.72      0.91      0.80       426
          10       0.78   

In [158]:
# 6. K-Nearest Neighbors on W400_O25_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.40815628625352796

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.40815628625352796

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.48      0.70      0.57       498
           1       0.54      0.73      0.62       520
           2       0.33      0.69      0.45       561
           3       0.40      0.59      0.48       546
           4       0.45      0.20      0.28       558
           5       0.39      0.40      0.39       488
           6       0.25      0.32      0.28       514
           7       0.33      0.31      0.32       548
           8       0.45      0.77      0.56       406
           9 

In [159]:
# 7. AdaBoost Classifier on W400_O25_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 100}
Best score:  0.19009834986927085

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.19009834986927085

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.39      0.50      0.44       498
           1       0.28      0.24      0.26       520
           2       0.18      0.07      0.10       561
           3       0.13      0.11      0.12       546
           4       0.18      0.40      0.25       558
           5       0.41      0.46      0.43       488
           6       0.25      0.08      0.13       514
           7       0.11      0.27      0.15       548
           8       0.24      0.34     

In [160]:
# 8. Gradient Boost on W400_O25_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5483481377590774

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.5483481377590774

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.69      0.70      0.69       498
           1       0.78      0.79      0.78       520
           2       0.59      0.71      0.65       561
           3       0.57      0.64      0.60       546
           4       0.59      0.59      0.59       558
           5       0.48      0.62      0.55       488
           6       0.40      0.41      0.41       514
           7       0.45      0.45      0.45       548
           8       0.75      0.82      0.78       406
    

In [161]:
# 9. XGBoost Classifier on W400_O25_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0.1, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 1.0}
Best score:  0.5698284071823109

#---------- Random Search -----------#
Best parameters:  {'subsample': 1.0, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0.1}
Best score:  0.5698284071823109

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.76      0.75       498
           1       0.78      0.84      0.81       520
           2       0.61      0.71      0.66       561
           3       0.57      0.69      0.63       546
           4       0.64      0.65      0.64       558
           5       0.54      0.66      0.59       488
           6       0.43      0.41      0.42       514
           7       0.47      0.46      0.46       548
           8       0.74      0.85      0.79       406
           9       0.91    

In [153]:
# 10. Artificial Neural Network on W400_O25_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'relu', 'model__n_neurons': 16}
Best score:  0.44199954980693645

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.42964694474745907

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.60      0.53      0.56       498
           1       0.55      0.75      0.64       520
           2       0.42      0.60      0.50       561
           3       0.30      0.41      0.35       546
           4       0.39      0.48      0.43       558
           5       0.43      0.41      0.42       488
           6       0.44      0.29      0.35       514
           7       0.30      0.32      0.31       548
           8       0.66      0.77      0.7

In [148]:
# Evaluation for W400_O50_Features
evaluate_model = ModelTuningAndEvaluation(path + "W400_O50_Features.csv")

In [121]:
# 1. Logistic Regression on W400_O50_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.4248295658414065

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 10}
Best score:  0.4248295658414065

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.34      0.44      0.38       745
           1       0.45      0.66      0.53       778
           2       0.31      0.34      0.32       839
           3       0.26      0.57      0.36       818
           4       0.39      0.36      0.37       837
           5       0.47      0.50      0.48       730
           6       0.47      0.38      0.42       770
           7       0.23      0.23      0.23       818
           8       0.66      0.71      0.68       607
           9       0.87      0.86      0.87       6

In [122]:
# 2. Decision Tree Classifier on W400_O50_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'entropy', 'max_depth': 50, 'min_samples_split': 2}
Best score:  0.4876210979547901

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': 50, 'criterion': 'entropy'}
Best score:  0.4876210979547901

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.61      0.63      0.62       745
           1       0.66      0.67      0.67       778
           2       0.65      0.60      0.62       839
           3       0.55      0.51      0.53       818
           4       0.44      0.45      0.44       837
           5       0.43      0.40      0.41       730
           6       0.35      0.38      0.36       770
           7       0.44      0.45      0.45       818
           8       0.70      0.70      0.70       607
           9       0.83      0.8

In [123]:
# 3. Random Forest Classifier on W400_O50_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'gini', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.6067456045927521

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'entropy', 'bootstrap': True}
Best score:  0.6092572658772873

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.79      0.83      0.80       745
           1       0.83      0.85      0.84       778
           2       0.60      0.77      0.67       839
           3       0.64      0.68      0.66       818
           4       0.59      0.66      0.62       837
           5       0.52      0.70      0.60       730
           6       0.52      0.46      0.49       770
           7       0.48      0.59      0.53       818
           8       0.77      0.85      0.81       607
     

In [124]:
# 4. Gaussian Naive Bayes on W400_O50_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.2938643702906351

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 1e-09}
Best score:  0.2938643702906351

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.28      0.73      0.40       745
           1       0.24      0.05      0.08       778
           2       0.10      0.05      0.07       839
           3       0.18      0.73      0.29       818
           4       0.26      0.21      0.23       837
           5       0.38      0.28      0.32       730
           6       0.31      0.25      0.28       770
           7       0.15      0.10      0.12       818
           8       0.55      0.59      0.57       607
           9       0.76      0.74      0.75       633
          10       0.57      0.64      0.60       

In [125]:
# 5. Support Vector Classifier on W400_O50_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5744528166487263

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5744528166487263

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.79      0.77      0.78       745
           1       0.84      0.85      0.85       778
           2       0.59      0.72      0.65       839
           3       0.61      0.65      0.63       818
           4       0.58      0.61      0.60       837
           5       0.48      0.56      0.52       730
           6       0.49      0.44      0.46       770
           7       0.50      0.47      0.48       818
           8       0.76      0.82      0.79       607
           9       0.91      0.90      0.90       633
          10       0.72 

In [126]:
# 6. K-Nearest Neighbors on W400_O50_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.45461069250089703

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.45461069250089703

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.58      0.73      0.64       745
           1       0.56      0.76      0.65       778
           2       0.38      0.76      0.51       839
           3       0.49      0.61      0.54       818
           4       0.43      0.32      0.37       837
           5       0.44      0.40      0.42       730
           6       0.34      0.30      0.32       770
           7       0.32      0.40      0.36       818
           8       0.39      0.72      0.51       607
           9 

In [127]:
# 7. AdaBoost Classifier on W400_O50_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 50}
Best score:  0.24255471833512735

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.24183710082526014

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.21      0.57      0.31       745
           1       0.14      0.01      0.01       778
           2       0.15      0.64      0.25       839
           3       0.00      0.00      0.00       818
           4       0.23      0.16      0.19       837
           5       0.35      0.59      0.44       730
           6       0.36      0.11      0.17       770
           7       0.25      0.02      0.03       818
           8       0.29      0.58      0

In [151]:
# 8. Gradient Boost on W400_O50_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.6081808396124865

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.6081808396124865

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.78      0.76       745
           1       0.89      0.83      0.86       778
           2       0.67      0.68      0.67       839
           3       0.62      0.70      0.66       818
           4       0.64      0.65      0.65       837
           5       0.57      0.64      0.60       730
           6       0.47      0.46      0.46       770
           7       0.46      0.58      0.52       818
           8       0.80      0.84      0.82       607
    

In [150]:
# 9. XGBoost Classifier on W400_O50_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 1.0}
Best score:  0.6250448510943668

#---------- Random Search -----------#
Best parameters:  {'subsample': 1.0, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0}
Best score:  0.6250448510943668

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.75      0.81      0.78       745
           1       0.89      0.85      0.87       778
           2       0.64      0.70      0.67       839
           3       0.62      0.70      0.66       818
           4       0.67      0.67      0.67       837
           5       0.57      0.65      0.61       730
           6       0.49      0.45      0.47       770
           7       0.47      0.62      0.53       818
           8       0.77      0.83      0.80       607
           9       0.92      0.

In [149]:
# 10. Artificial Neural Network on W400_O50_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.4786508790814496

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.5001794043774668

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.57      0.58      0.57       745
           1       0.73      0.62      0.67       778
           2       0.41      0.65      0.51       839
           3       0.42      0.62      0.50       818
           4       0.54      0.54      0.54       837
           5       0.52      0.54      0.53       730
           6       0.38      0.30      0.34       770
           7       0.32      0.32      0.32       818
           8       0.71      0.77      0.74 

In [140]:
# Evaluation for W500_O25_Features
evaluate_model = ModelTuningAndEvaluation(path + "W500_O25_Features.csv")

In [104]:
# 1. Logistic Regression on W500_O25_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.4216682958618443

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 100, 'C': 10}
Best score:  0.4216682958618443

Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.28      0.38      0.32       397
           1       0.43      0.65      0.51       414
           2       0.35      0.34      0.34       447
           3       0.28      0.53      0.37       437
           4       0.33      0.29      0.31       448
           5       0.48      0.49      0.49       388
           6       0.42      0.38      0.39       410
           7       0.20      0.21      0.20       439
           8       0.60      0.70      0.64       324
           9       0.89      0.80      0.84       3

In [105]:
# 2. Decision Tree Classifier on W500_O25_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'entropy', 'max_depth': 50, 'min_samples_split': 5}
Best score:  0.42973281199087654

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 5, 'max_depth': 20, 'criterion': 'entropy'}
Best score:  0.43041028565222117

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.57      0.57      0.57       397
           1       0.67      0.71      0.69       414
           2       0.46      0.43      0.44       447
           3       0.46      0.55      0.50       437
           4       0.32      0.31      0.31       448
           5       0.37      0.42      0.39       388
           6       0.25      0.31      0.28       410
           7       0.26      0.30      0.28       439
           8       0.57      0.64      0.60       324
           9       0.85      0

In [106]:
# 3. Random Forest Classifier on W500_O25_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.5635861301183882

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'max_depth': 20, 'criterion': 'gini', 'bootstrap': True}
Best score:  0.557535027696318

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.63      0.69      0.65       397
           1       0.67      0.82      0.73       414
           2       0.60      0.64      0.62       447
           3       0.57      0.58      0.58       437
           4       0.47      0.60      0.53       448
           5       0.47      0.69      0.56       388
           6       0.40      0.48      0.43       410
           7       0.41      0.46      0.44       439
           8       0.63      0.84      0.72       324
       

In [107]:
# 4. Gaussian Naive Bayes on W500_O25_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.2837976539589443

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.2837976539589443

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.30      0.13      0.18       397
           1       0.29      0.04      0.07       414
           2       0.13      0.06      0.08       447
           3       0.16      0.89      0.27       437
           4       0.32      0.23      0.27       448
           5       0.36      0.28      0.32       388
           6       0.28      0.31      0.30       410
           7       0.17      0.15      0.16       439
           8       0.45      0.61      0.52       324
           9       0.76      0.72      0.74       339
          10       0.60      0.59      0.59     

In [108]:
# 5. Support Vector Classifier on W500_O25_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.534646193113935

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.534646193113935

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.71      0.76      0.73       397
           1       0.79      0.80      0.80       414
           2       0.56      0.60      0.58       447
           3       0.59      0.62      0.60       437
           4       0.58      0.53      0.55       448
           5       0.42      0.53      0.47       388
           6       0.41      0.49      0.45       410
           7       0.44      0.43      0.44       439
           8       0.66      0.77      0.71       324
           9       0.73      0.87      0.79       339
          10       0.76   

In [109]:
# 6. K-Nearest Neighbors on W500_O25_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.3873845986749213

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.3873845986749213

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.41      0.47      0.44       397
           1       0.49      0.77      0.60       414
           2       0.39      0.64      0.48       447
           3       0.43      0.67      0.52       437
           4       0.40      0.25      0.31       448
           5       0.39      0.37      0.38       388
           6       0.26      0.41      0.32       410
           7       0.28      0.29      0.28       439
           8       0.34      0.70      0.46       324
           9   

In [110]:
# 7. AdaBoost Classifier on W500_O25_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 50}
Best score:  0.24207668078635822

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.24207668078635822

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       397
           1       0.24      0.36      0.29       414
           2       0.09      0.01      0.02       447
           3       0.18      0.91      0.29       437
           4       0.24      0.27      0.25       448
           5       0.36      0.52      0.43       388
           6       0.22      0.10      0.13       410
           7       0.00      0.00      0.00       439
           8       0.30      0.40      0

In [118]:
# 8. Gradient Boost on W500_O25_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5635481155642446

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.2}
Best score:  0.5635481155642446

Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.70      0.72      0.71       397
           1       0.78      0.83      0.80       414
           2       0.62      0.60      0.61       447
           3       0.54      0.55      0.54       437
           4       0.60      0.57      0.59       448
           5       0.50      0.66      0.57       388
           6       0.38      0.45      0.41       410
           7       0.41      0.47      0.44       439
           8       0.67      0.77      0.72       324
    

In [119]:
# 9. XGBoost Classifier on W500_O25_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 50, 'subsample': 0.8}
Best score:  0.5884367872271098

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 50, 'learning_rate': 0.2, 'gamma': 0}
Best score:  0.5884367872271098

XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.68      0.68      0.68       397
           1       0.74      0.85      0.79       414
           2       0.64      0.63      0.63       447
           3       0.60      0.63      0.62       437
           4       0.55      0.56      0.56       448
           5       0.55      0.68      0.61       388
           6       0.44      0.51      0.47       410
           7       0.41      0.48      0.44       439
           8       0.67      0.81      0.73       324
           9       0.94      0.

In [141]:
# 10. Artificial Neural Network on W500_O25_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 64}
Best score:  0.5420060823286631

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 64, 'model__activation': 'tanh', 'epochs': 100, 'batch_size': 50}
Best score:  0.5520975887911371

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.58      0.70      0.64       397
           1       0.73      0.78      0.76       414
           2       0.55      0.68      0.61       447
           3       0.55      0.63      0.59       437
           4       0.58      0.61      0.59       448
           5       0.52      0.56      0.54       388
           6       0.43      0.44      0.44       410
           7       0.46      0.52      0.49       439
           8       0.69      0.79      0.74 

In [146]:
# Evaluation for W500_O50_Features
evaluate_model = ModelTuningAndEvaluation(path + "W500_O50_Features.csv")

In [75]:
# 1. Logistic Regression on W500_O50_Features
evaluate_model.logistic_regression_model()

#------------------- #1. Logistic Regression Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
Best score:  0.437269656864363

#---------- Random Search -----------#
Best parameters:  {'solver': 'lbfgs', 'penalty': 'l2', 'max_iter': 500, 'C': 10}
Best score:  0.437269656864363
Logistic Regression Metrics: 
              precision    recall  f1-score   support

           0       0.38      0.43      0.41       591
           1       0.43      0.66      0.52       619
           2       0.36      0.38      0.37       667
           3       0.29      0.56      0.38       651
           4       0.39      0.34      0.36       667
           5       0.48      0.54      0.51       579
           6       0.44      0.45      0.45       610
           7       0.27      0.19      0.22       652
           8       0.68      0.70      0.69       484
           9       0.90      0.84      0.87       502


In [96]:
# 2. Decision Tree Classifier on W500_O50_Features
evaluate_model.decission_tree_classifier_model()

#-------------------- #2. Decission Tree Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'criterion': 'gini', 'max_depth': None, 'min_samples_split': 5}
Best score:  0.4787757327280084

#---------- Random Search -----------#
Best parameters:  {'min_samples_split': 2, 'max_depth': 50, 'criterion': 'entropy'}
Best score:  0.4851119643357011

Decission Tree Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.65      0.64      0.65       591
           1       0.73      0.69      0.71       619
           2       0.55      0.55      0.55       667
           3       0.51      0.54      0.52       651
           4       0.42      0.39      0.40       667
           5       0.45      0.46      0.45       579
           6       0.38      0.38      0.38       610
           7       0.38      0.43      0.40       652
           8       0.66      0.68      0.67       484
           9       0.88      0.85

In [97]:
# 3. Random Forest Classifier on W500_O50_Features
evaluate_model.random_forest_classifier_model()

#-------------------- #3. Random Forest Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 100}
Best score:  0.598825532684736

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 100, 'max_depth': 20, 'criterion': 'gini', 'bootstrap': True}
Best score:  0.6096540284302256

Random Forest Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.76      0.81      0.79       591
           1       0.83      0.84      0.83       619
           2       0.59      0.76      0.67       667
           3       0.66      0.69      0.67       651
           4       0.58      0.60      0.59       667
           5       0.54      0.73      0.62       579
           6       0.49      0.55      0.52       610
           7       0.49      0.54      0.52       652
           8       0.76      0.84      0.80       484
      

In [98]:
# 4. Gaussian Naive Bayes on W500_O50_Features
evaluate_model.gaussian_naive_bias_classifier_model()

#-------------------- #4. Gaussian Naive Bias Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.2847441487495615

#---------- Random Search -----------#
Best parameters:  {'var_smoothing': 0.0001}
Best score:  0.2847441487495615

Gaussian Naive Bias Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.14      0.86      0.25       591
           1       0.31      0.06      0.10       619
           2       0.21      0.16      0.18       667
           3       0.19      0.03      0.05       651
           4       0.29      0.22      0.25       667
           5       0.45      0.28      0.35       579
           6       0.32      0.41      0.36       610
           7       0.14      0.10      0.12       652
           8       0.52      0.60      0.55       484
           9       0.71      0.73      0.72       502
          10       0.57      0.66      0.61     

In [99]:
# 5. Support Vector Classifier on W500_O50_Features
evaluate_model.support_vector_classifier_model()

#-------------------- #5. Support Vector Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Best score:  0.5762572533258035

#---------- Random Search -----------#
Best parameters:  {'kernel': 'rbf', 'gamma': 0.1, 'C': 1000}
Best score:  0.5762572533258035

Support Vector Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.77      0.77      0.77       591
           1       0.83      0.84      0.83       619
           2       0.62      0.76      0.69       667
           3       0.63      0.65      0.64       651
           4       0.61      0.65      0.63       667
           5       0.51      0.59      0.54       579
           6       0.52      0.47      0.50       610
           7       0.54      0.49      0.51       652
           8       0.76      0.83      0.79       484
           9       0.87      0.90      0.89       502
          10       0.71 

In [100]:
# 6. K-Nearest Neighbors on W500_O50_Features
evaluate_model.knn_classifier_model()

#-------------------- #6. K-Nearest Neighbors Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'metric': 'minkowski', 'n_neighbors': 100, 'p': 1, 'weights': 'distance'}
Best score:  0.4300478074695045

#---------- Random Search -----------#
Best parameters:  {'weights': 'distance', 'p': 1, 'n_neighbors': 100, 'metric': 'minkowski'}
Best score:  0.4300478074695045

K-Nearest Neighbors Metrics: 
              precision    recall  f1-score   support

           0       0.57      0.69      0.63       591
           1       0.58      0.72      0.65       619
           2       0.43      0.67      0.52       667
           3       0.43      0.73      0.54       651
           4       0.52      0.32      0.40       667
           5       0.42      0.43      0.42       579
           6       0.28      0.40      0.33       610
           7       0.34      0.39      0.36       652
           8       0.43      0.76      0.55       484
           9   

In [101]:
# 7. AdaBoost Classifier on W500_O50_Features
evaluate_model.ada_boost_classifier_model()

#-------------------- #7. Ada-Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 0.5, 'n_estimators': 50}
Best score:  0.2125354338793726

#---------- Random Search -----------#
Best parameters:  {'n_estimators': 50, 'learning_rate': 0.5, 'estimator': DecisionTreeClassifier(max_depth=1)}
Best score:  0.2125354338793726

Ada-Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.37      0.38      0.38       591
           1       0.00      0.00      0.00       619
           2       0.16      0.31      0.21       667
           3       0.17      0.29      0.22       651
           4       0.20      0.31      0.25       667
           5       0.34      0.58      0.43       579
           6       0.14      0.10      0.12       610
           7       0.13      0.09      0.11       652
           8       0.27      0.08      0.1

In [77]:
# 8. Gradient Boost on W500_O50_Features
evaluate_model.gradient_boost_model()

#-------------------- #9. Gradient Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
Best score:  0.5879945921696476

#---------- Random Search -----------#
Best parameters:  {'random_state': 42, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.1}
Best score:  0.5879945921696476
Gradient Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.74      0.69      0.72       591
           1       0.80      0.84      0.82       619
           2       0.66      0.71      0.69       667
           3       0.62      0.64      0.63       651
           4       0.62      0.64      0.63       667
           5       0.57      0.74      0.65       579
           6       0.45      0.51      0.48       610
           7       0.49      0.53      0.51       652
           8       0.81      0.82      0.81       484
     

In [64]:
# 9. XGBoost Classifier on W500_O50_Features
evaluate_model.xg_boost_classifier_model()

-------------------- #8. XG Boost Classifier Model --------------------#
#---------- Grid Search ------------#
Best parameters:  {'gamma': 0, 'learning_rate': 0.1, 'n_estimators': 100, 'subsample': 0.8}
Best score:  0.628156655946352

#---------- Random Search -----------#
Best parameters:  {'subsample': 0.8, 'n_estimators': 100, 'learning_rate': 0.1, 'gamma': 0}
Best score:  0.628156655946352
XG Boost Classifier Metrics: 
              precision    recall  f1-score   support

           0       0.81      0.76      0.78       591
           1       0.85      0.89      0.87       619
           2       0.67      0.77      0.72       667
           3       0.69      0.69      0.69       651
           4       0.63      0.66      0.65       667
           5       0.64      0.77      0.70       579
           6       0.50      0.57      0.53       610
           7       0.53      0.54      0.54       652
           8       0.80      0.86      0.83       484
           9       0.92      0.9

In [147]:
# 10. Artificial Neural Network on W500_O50_Features
evaluate_model.ann_model()

#----------------------- #10. Artificial Neural Net Model ------------------------#
#---------- Grid Search ------------#
Best parameters:  {'batch_size': 50, 'epochs': 150, 'model__activation': 'tanh', 'model__n_neurons': 16}
Best score:  0.4814888157902779

#---------- Random Search -----------#
Best parameters:  {'model__n_neurons': 16, 'model__activation': 'tanh', 'epochs': 150, 'batch_size': 50}
Best score:  0.4833138362957829

Artificial Neural Net Metrics: 
              precision    recall  f1-score   support

           0       0.59      0.58      0.58       591
           1       0.66      0.77      0.71       619
           2       0.49      0.45      0.47       667
           3       0.42      0.57      0.48       651
           4       0.55      0.58      0.57       667
           5       0.45      0.51      0.48       579
           6       0.41      0.41      0.41       610
           7       0.34      0.50      0.40       652
           8       0.70      0.75      0.72 