# Importing necessary libraries

In [26]:
import pickle
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV


np.random.seed(42)

In [2]:
X_train = pickle.load(open('X_train.unknown','rb'))

X_test= pickle.load(open('X_test.unknown','rb'))

y_test= pickle.load(open('y_test.unknown','rb'))

y_train= pickle.load(open('y_train.unknown','rb'))

# Logistic grid CV

In [3]:
# Define the logistic regression model
logistic = LogisticRegression(max_iter=1000, random_state=42)

# Define a parameter grid for grid search
param_grid = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'C': np.logspace(-3, 3, 7),
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(logistic, param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Get the best model
best_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Output the entire classification report
report = classification_report(y_test, y_pred)
print(report)

Fitting 5 folds for each of 140 candidates, totalling 700 fits


315 fits failed out of a total of 700.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Dheeraj Mekala\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Dheeraj Mekala\anaconda3\lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\Dheeraj Mekala\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1169, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\Dheeraj Mekala\anaconda3\lib\site-packages\sklearn\linear_m

              precision    recall  f1-score   support

           0       0.96      0.95      0.96      4440
           1       0.95      0.96      0.96      4360

    accuracy                           0.96      8800
   macro avg       0.96      0.96      0.96      8800
weighted avg       0.96      0.96      0.96      8800



In [5]:
grid_search.best_params_


{'C': 0.1, 'penalty': 'l2', 'solver': 'newton-cg'}

# KNN Grid CV

In [3]:

x = np.arange(1, 31)
x = np.concatenate([x, np.arange(130, 151)])
print(x)

[  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30 130 131 132 133 134 135
 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150]


In [4]:

# Define a range of parameters for grid search
param_grid = {
    'n_neighbors': x,  # Range of k values
    'weights': ['uniform', 'distance'],  # Weighting options
    'metric': ['euclidean', 'manhattan']  # Distance metrics
}

# Create the KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_

# Train a KNN classifier with the best parameters
best_knn = KNeighborsClassifier(n_neighbors=best_params['n_neighbors'],
                               weights=best_params['weights'],
                               metric=best_params['metric'])
best_knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_knn.predict(X_test)

# Output the classification report
report = classification_report(y_test, y_pred)
print("Best KNN Parameters:", best_params)
print("Classification Report:\n", report)


Best KNN Parameters: {'metric': 'manhattan', 'n_neighbors': 6, 'weights': 'distance'}
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.97      0.97      4440
           1       0.97      0.96      0.97      4360

    accuracy                           0.97      8800
   macro avg       0.97      0.97      0.97      8800
weighted avg       0.97      0.97      0.97      8800



# SVC Grid CV

In [3]:


# Define the parameter grid to search
param_grid = {
    'C': [0.1, 1, 10, 100],  # Example values for the regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Example kernel types
    'gamma': [0.001, 0.01, 0.1, 1]  # Example values for the kernel coefficient
}

# Create an SVM model
svm_model = SVC()

# Perform grid search with cross-validation
grid_search = GridSearchCV(svm_model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best SVM model from grid search
best_svm_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_svm_model.predict(X_test)

# Generate and print the classification report
classification_rep = classification_report(y_test, y_pred)
print(classification_rep)

              precision    recall  f1-score   support

           0       0.96      0.97      0.97      4440
           1       0.97      0.96      0.97      4360

    accuracy                           0.97      8800
   macro avg       0.97      0.97      0.97      8800
weighted avg       0.97      0.97      0.97      8800



In [4]:
print(best_svm_model)

SVC(C=100, gamma=0.01)


In [5]:
best_params = grid_search.best_params_
print("Best Parameters (Accuracy):", best_params)

Best Parameters (Accuracy): {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}


# Decision Tree Grid CV

In [5]:
# Define the parameter grid for GridSearchCV
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the Decision Tree classifier
dt_classifier = DecisionTreeClassifier()

# Perform GridSearchCV with a large parameter range
grid_search = GridSearchCV(estimator=dt_classifier, param_grid=param_grid, cv=5, n_jobs=-1)

# Fit the classifier on the training data
grid_search.fit(X_train, y_train)

# Get the best estimator from the grid search
best_dt_classifier = grid_search.best_estimator_

# Make predictions on the test data
y_pred = best_dt_classifier.predict(X_test)

# Generate the classification report
classification_rep = classification_report(y_test, y_pred)

print(classification_rep)

              precision    recall  f1-score   support

           0       0.95      0.97      0.96      4440
           1       0.97      0.95      0.96      4360

    accuracy                           0.96      8800
   macro avg       0.96      0.96      0.96      8800
weighted avg       0.96      0.96      0.96      8800



In [6]:
print(best_dt_classifier)


DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=4,
                       min_samples_split=10)


In [9]:
# After grid search for accuracy
best_params_accuracy = grid_search.best_params_
print("Best Parameters (Accuracy):", best_params_accuracy)


Best Parameters (Accuracy): {'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 4, 'min_samples_split': 10}


# Random Forest Grid CV

In [3]:
# Define the Random Forest classifier
rf_classifier = RandomForestClassifier()

# Define a parameter grid to search
param_grid = {
    'n_estimators': [100, 200, 300],  # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],  # Maximum depth of the trees
    'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4]  # Minimum number of samples required to be at a leaf node
}

# Define the GridSearchCV with accuracy scoring
grid_search_accuracy = GridSearchCV(rf_classifier, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_accuracy.fit(X_train, y_train)

# Get the best estimator for accuracy
best_rf_accuracy = grid_search_accuracy.best_estimator_

# Define the GridSearchCV with recall scoring
grid_search_recall = GridSearchCV(rf_classifier, param_grid, cv=5, scoring='recall', n_jobs=-1)
grid_search_recall.fit(X_train, y_train)

# Get the best estimator for recall
best_rf_recall = grid_search_recall.best_estimator_

# Output the classification report for accuracy on test data
y_pred_accuracy_test = best_rf_accuracy.predict(X_test)
print("Classification Report (Accuracy) on Test Data:")
print(classification_report(y_test, y_pred_accuracy_test))

# Output the classification report for recall on test data
y_pred_recall_test = best_rf_recall.predict(X_test)
print("Classification Report (Recall) on Test Data:")
print(classification_report(y_test, y_pred_recall_test))

Classification Report (Accuracy) on Test Data:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      4440
           1       0.97      0.97      0.97      4360

    accuracy                           0.97      8800
   macro avg       0.97      0.97      0.97      8800
weighted avg       0.97      0.97      0.97      8800

Classification Report (Recall) on Test Data:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      4440
           1       0.96      0.96      0.96      4360

    accuracy                           0.96      8800
   macro avg       0.96      0.96      0.96      8800
weighted avg       0.96      0.96      0.96      8800



In [6]:
print(best_rf_accuracy)
print(best_rf_recall)

# After grid search for accuracy
best_params_accuracy = grid_search_accuracy.best_params_
print("Best Parameters (Accuracy):", best_params_accuracy)

# After grid search for recall
best_params_recall = grid_search_recall.best_params_
print("Best Parameters (Recall):", best_params_recall)

RandomForestClassifier(max_depth=20, n_estimators=300)
RandomForestClassifier(max_depth=20, min_samples_leaf=2, min_samples_split=5)
Best Parameters (Accuracy): {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Best Parameters (Recall): {'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}


In [17]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

param_grid_n_estimators = [50, 100, 150]
param_grid_learning_rate = [0.01, 0.1, 0.5]

param_grid_estimator = {  # Replace 'base_estimator' with 'estimator'
    'estimator__max_depth': [1, 2, 3],
    'estimator__min_samples_split': [2, 4, 6]
}

# Split the parameter grid into smaller chunks
param_grids = []
for n_estimators in param_grid_n_estimators:
    for learning_rate in param_grid_learning_rate:
        param_grid = {
            'n_estimators': [n_estimators],
            'learning_rate': [learning_rate],
            **param_grid_estimator  # Use 'estimator' instead of 'base_estimator'
        }
        param_grids.append(param_grid)

# Perform grid search on smaller chunks
results = []
for param_grid in param_grids:
    adaboost = AdaBoostClassifier(estimator=DecisionTreeClassifier())  # Replace 'base_estimator' with 'estimator'
    grid_search = GridSearchCV(adaboost, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    results.append(grid_search)

# Find the best result
best_result = max(results, key=lambda x: x.best_score_)

# Get the best hyperparameters and model
best_params = best_result.best_params_
best_adaboost = best_result.best_estimator_

# Train and evaluate the best model
best_adaboost.fit(X_train, y_train)
y_pred = best_adaboost.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Test Accuracy: {accuracy}")


Best Parameters: {'estimator__max_depth': 3, 'estimator__min_samples_split': 4, 'learning_rate': 0.5, 'n_estimators': 150}
Test Accuracy: 0.9655681818181818


# XGBoost Grid CV

In [3]:


# Define the parameter grid for grid search
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
}

# Create the XGBoost classifier
clf = xgb.XGBClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=1, verbose=2)
grid_search.fit(X_train, y_train)

# Get the best estimator from the grid search
best_clf = grid_search.best_estimator_

# Fit the best classifier on the training data
best_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = best_clf.predict(X_test)

# Output the classification report
report = classification_report(y_test, y_pred, target_names=y_test)
print(report)


Fitting 5 folds for each of 243 candidates, totalling 1215 fits
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.9s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.9; total time=   0.9s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.9; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.9; total time=   0.8s
[CV] END

[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=200, subsample=1.0; total time=   1.9s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=200, subsample=1.0; total time=   1.9s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=200, subsample=1.0; total time=   1.9s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=200, subsample=1.0; total time=   1.9s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=300, subsample=0.8; total time=   3.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=300, subsample=0.8; total time=   3.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=300, subsample=0.8; total time=   3.7s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estimators=300, subsample=0.8; total time=   3.8s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=4, n_estima

[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.9; total time=   1.2s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.9; total time=   1.1s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.9; total time=   1.0s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.7s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=200,

[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=0.8; total time=   3.7s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=0.9; total time=   3.6s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=0.9; total time=   3.5s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=0.9; total time=   3.5s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=0.9; total time=   3.6s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=0.9; total time=   3.5s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=1.0; total time=   2.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300, subsample=1.0; total time=   2.9s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=4, n_estimators=300,

[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.8; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.8; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.8; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.8; total time=   2.1s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.9; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.9; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.9; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200, subsample=0.9; total time=   2.0s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, n_estimators=200,

[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=4, n_estimators=300, subsample=1.0; total time=   2.9s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=4, n_estimators=300, subsample=1.0; total time=   4.2s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100, subsample=0.8; total time=   1.7s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100, subsample=0.8; total time=   1.4s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100, subsample=0.8; total time=   1.4s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100, subsample=0.8; total time=   1.5s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100, subsample=0.8; total time=   1.4s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100, subsample=0.9; total time=   1.5s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, n_estimators=100,

[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   1.7s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   1.8s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   1.7s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   1.7s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   1.7s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=300, subsample=0.8; total time=   3.3s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=300, subsample=0.8; total time=   3.4s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estimators=300, subsample=0.8; total time=   3.3s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, n_estima

[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.9; total time=   1.5s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.9; total time=   1.5s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.9; total time=   1.5s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.9; total time=   1.6s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   1.2s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   1.2s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   1.2s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   1.3s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, n_estima

[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.8; total time=   3.3s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.8; total time=   3.6s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.8; total time=   3.5s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.9; total time=   3.2s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.9; total time=   3.3s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.9; total time=   3.4s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.9; total time=   3.4s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300, subsample=0.9; total time=   3.3s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=300,

[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time=   1.3s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   3.1s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   3.1s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   3.2s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   3.1s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   3.0s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.9; total time=   2.9s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.9; total time=   3.0s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=200,

[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, n_estimators=300, subsample=1.0; total time=   3.0s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, n_estimators=300, subsample=1.0; total time=   3.0s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, n_estimators=300, subsample=1.0; total time=   2.9s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, n_estimators=300, subsample=1.0; total time=   2.9s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=4, n_estimators=100, subsample=0.8; total time=   1.5s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=4, n_estimators=100, subsample=0.8; total time=   1.5s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=4, n_estimators=100, subsample=0.8; total time=   1.6s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=4, n_estimators=100, subsample=0.8; total time=   1.5s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=4, n_estimators=100,

[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=0.9; total time=   3.0s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=0.9; total time=   2.9s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=1.0; total time=   2.4s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=1.0; total time=   4.2s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=1.0; total time=   2.4s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=1.0; total time=   2.4s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=200, subsample=1.0; total time=   2.4s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=300, subsample=0.8; total time=   4.7s
[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, n_estimators=300,

[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=0.8; total time=   1.4s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=0.9; total time=   1.4s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=0.9; total time=   1.6s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=0.9; total time=   1.5s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=0.9; total time=   1.5s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=0.9; total time=   1.6s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=1.0; total time=   1.4s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estimators=100, subsample=1.0; total time=   1.4s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=4, n_estima

[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.8; total time=   5.4s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.8; total time=   5.6s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.8; total time=   5.7s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.8; total time=   5.9s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.8; total time=   5.7s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.9; total time=   5.2s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.9; total time=   5.2s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=300, subsample=0.9; total time=   5.4s
[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estima

[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=100, subsample=1.0; total time=   1.1s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=100, subsample=1.0; total time=   1.1s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=100, subsample=1.0; total time=   1.1s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=200, subsample=0.8; total time=   2.9s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=200, subsample=0.8; total time=   2.9s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=200, subsample=0.8; total time=   2.8s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=200, subsample=0.8; total time=   2.8s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=200, subsample=0.8; total time=   2.8s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=4, n_estimators=200,

[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=300, subsample=0.9; total time=   4.7s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=300, subsample=1.0; total time=   5.8s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=300, subsample=1.0; total time=   3.8s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=300, subsample=1.0; total time=   4.0s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=300, subsample=1.0; total time=   4.2s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=300, subsample=1.0; total time=   6.3s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8; total time=   1.2s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8; total time=   1.2s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=3, n_estimators=100,

[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=0.9; total time=   2.7s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=0.9; total time=   2.6s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=0.9; total time=   2.6s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=0.9; total time=   2.6s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0; total time=   2.1s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0; total time=   4.0s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0; total time=   2.1s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0; total time=   2.1s
[CV] END colsample_bytree=1.0, learning_rate=0.2, max_depth=4, n_estimators=200,

NameError: name 'data' is not defined

In [9]:
report = classification_report(y_test, y_pred)
print(report)


              precision    recall  f1-score   support

           0       0.96      0.97      0.97      4440
           1       0.97      0.96      0.97      4360

    accuracy                           0.97      8800
   macro avg       0.97      0.97      0.97      8800
weighted avg       0.97      0.97      0.97      8800



In [11]:
best_params = grid_search.best_params_
print("Best Parameters:", best_params)


Best Parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 300, 'subsample': 0.8}


# MLP Classifier Grid Search CV

In [8]:

# Define the parameter grid for the MLP Classifier
param_grid = {
    'hidden_layer_sizes': [(50, 50), (100, 50, 25), (100, 100, 100),(100, 100, 100,100)],
    'activation': ['logistic', 'tanh', 'relu'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01, 0.1]
}

# Create an MLP Classifier
mlp_classifier = MLPClassifier(random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=mlp_classifier, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)

# Fit the model to find the best parameters
grid_search.fit(X_train, y_train)

# Get the best parameters and estimator
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Make predictions using the best model
y_pred = best_estimator.predict(X_test)

# Output the entire classification report
report = classification_report(y_test, y_pred)
print(report)


Fitting 5 folds for each of 108 candidates, totalling 540 fits


KeyboardInterrupt: 

In [5]:
best_params


{'activation': 'logistic',
 'alpha': 0.0001,
 'hidden_layer_sizes': (100, 100, 100),
 'learning_rate_init': 0.01}