# **Linking to Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


#**Importing Libraries**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.cluster import KMeans, AgglomerativeClustering
from scipy.cluster.hierarchy import linkage
from sklearn.metrics import confusion_matrix, roc_auc_score, classification_report, accuracy_score, make_scorer, silhouette_score
import pickle
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
warnings.filterwarnings("ignore")

# **The Dataset for The Supervised Learning Models**

In [2]:
hdSup=pd.read_csv('/content/featureSelection_heart.csv')

In [3]:
hdSup.head()

Unnamed: 0,age,cp,trestbps,chol,thalach,oldpeak,ca,thal,target
0,0.479167,0.0,0.292453,0.303887,0.740458,0.227273,0.5,1.0,0
1,0.5,0.0,0.433962,0.272085,0.641221,0.704545,0.0,1.0,0
2,0.854167,0.0,0.481132,0.169611,0.412214,0.590909,0.0,1.0,0
3,0.666667,0.0,0.509434,0.272085,0.687023,0.0,0.25,1.0,0
4,0.6875,0.0,0.415094,0.59364,0.267176,0.431818,0.75,0.5,0


**Splitting Data**

In [4]:
X = hdSup.drop("target", axis=1)
y = hdSup["target"]

print("Shape of X:", X.shape)
print("\nShape of y:", y.shape)

Shape of X: (298, 8)

Shape of y: (298,)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

# **Logistic Regression Hyperparameter Tuning**

**Logistic Regression Base Model**

In [6]:
logreg=LogisticRegression(random_state=1)

**Metrics of the model before tuning the hyperparameters**\
**On the training data:**

In [7]:
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_train)

print("Logistic regression results on TRAINING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Logistic regression results on TRAINING data BEFORE tuning the hyperparameters:

Accuracy: 0.8109243697478992
Confusion Matrix:
 [[ 76  33]
 [ 12 117]]
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.70      0.77       109
           1       0.78      0.91      0.84       129

    accuracy                           0.81       238
   macro avg       0.82      0.80      0.81       238
weighted avg       0.82      0.81      0.81       238



**On the testing data:**

In [8]:
y_pred = logreg.predict(X_test)

print("Logistic regression results on TESTING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Logistic regression results on TESTING data BEFORE tuning the hyperparameters:

Accuracy: 0.9166666666666666
Confusion Matrix:
 [[26  1]
 [ 4 29]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.96      0.91        27
           1       0.97      0.88      0.92        33

    accuracy                           0.92        60
   macro avg       0.92      0.92      0.92        60
weighted avg       0.92      0.92      0.92        60



**GridSearchCV for Logistic Regression**

In [11]:
param_grid = [
    {
        'solver': ['liblinear'],
        'penalty': ['l1', 'l2'],
        'C': [0.01, 0.1, 1, 10],
        'class_weight': ['balanced']
    },
    {
        'solver': ['lbfgs'],
        'penalty': ['l2'],
        'C': [0.01, 0.1, 1, 10],
        'class_weight': ['balanced']
    }
]

grid_logreg = GridSearchCV(logreg, param_grid, cv=5, scoring="f1_macro")
grid_logreg.fit(X_train, y_train)

print("Best Logistic Regression:", grid_logreg.best_params_)
print("Best score:", grid_logreg.best_score_)

Best Logistic Regression: {'C': 1, 'class_weight': 'balanced', 'penalty': 'l2', 'solver': 'lbfgs'}
Best score: 0.7988134063619748


**Applying the model with the best parameters**

In [12]:
best_logreg = grid_logreg.best_estimator_
best_logreg.fit(X_train, y_train)

**Metrics of the model after tuning the hyperparameters**\
**On the training data:**

In [13]:
y_pred = best_logreg.predict(X_train)

print("Logistic regression results on TRAINING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Logistic regression results on TRAINING data AFTER tuning the hyperparameters:

Accuracy: 0.8151260504201681
Confusion Matrix:
 [[ 81  28]
 [ 16 113]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.74      0.79       109
           1       0.80      0.88      0.84       129

    accuracy                           0.82       238
   macro avg       0.82      0.81      0.81       238
weighted avg       0.82      0.82      0.81       238



**On the testing data:**

In [14]:
y_pred = best_logreg.predict(X_test)

print("Logistic regression results on TESTING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Logistic regression results on TESTING data AFTER tuning the hyperparameters:

Accuracy: 0.9166666666666666
Confusion Matrix:
 [[26  1]
 [ 4 29]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.96      0.91        27
           1       0.97      0.88      0.92        33

    accuracy                           0.92        60
   macro avg       0.92      0.92      0.92        60
weighted avg       0.92      0.92      0.92        60



In [15]:
y_proba = best_logreg.predict_proba(X_test)[:, 1]
test_auc = roc_auc_score(y_test, y_proba)
print("Testing AUC Score", test_auc)

Testing AUC Score 0.9382716049382716


# **Decisoin Tree Classifier Hyperparameter Tuning**

**Decisoin Tree Classifier Base Model**

In [None]:
dtc = DecisionTreeClassifier(random_state=1)

**Metrics of the model before tuning the hyperparameters**\
**On the training data:**

In [None]:
dtc.fit(X_train, y_train)

y_pred = dtc.predict(X_train)

print("Decision tree results on TRAINING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Decision tree results on TRAINING data BEFORE tuning the hyperparameters:

Accuracy: 1.0
Confusion Matrix:
 [[109   0]
 [  0 129]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       109
           1       1.00      1.00      1.00       129

    accuracy                           1.00       238
   macro avg       1.00      1.00      1.00       238
weighted avg       1.00      1.00      1.00       238



**On the testing data:**

In [None]:
y_pred = dtc.predict(X_test)

print("Decision tree results on TESTING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Decision tree results on TESTING data BEFORE tuning the hyperparameters:

Accuracy: 0.8
Confusion Matrix:
 [[23  4]
 [ 8 25]]
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.85      0.79        27
           1       0.86      0.76      0.81        33

    accuracy                           0.80        60
   macro avg       0.80      0.80      0.80        60
weighted avg       0.81      0.80      0.80        60



**GridSearchCV for Decision Tree**

In [None]:
param_grid = {
    "max_depth": [2, 3, 4, 5, 6, 8, 10, 20, 30, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "criterion": ["gini", "entropy"]
}
grid_dtc = GridSearchCV(dtc, param_grid, cv=5, scoring="f1_macro")
grid_dtc.fit(X_train, y_train)

print("Best Decision Tree:", grid_dtc.best_params_)
print("Best score:", grid_dtc.best_score_)

Best Decision Tree: {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best score: 0.7687387309749927


**Applying the model with the best parameters**

In [None]:
best_dtc = grid_dtc.best_estimator_
best_dtc.fit(X_train, y_train)

**Metrics of the model after tuning the hyperparameters**\
**On the training data:**

In [None]:
y_pred = best_dtc.predict(X_train)

print("Decision tree results on TRAINING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Decision tree results on TRAINING data AFTER tuning the hyperparameters

Accuracy: 0.8571428571428571
Confusion Matrix:
 [[ 85  24]
 [ 10 119]]
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.78      0.83       109
           1       0.83      0.92      0.88       129

    accuracy                           0.86       238
   macro avg       0.86      0.85      0.85       238
weighted avg       0.86      0.86      0.86       238



**On the testing data:**

In [None]:
y_pred = best_dtc.predict(X_test)

print("Decision tree results on TESTING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Decision tree results on TESTING data AFTER tuning the hyperparameters

Accuracy: 0.85
Confusion Matrix:
 [[24  3]
 [ 6 27]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.89      0.84        27
           1       0.90      0.82      0.86        33

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60



In [None]:
y_proba = best_dtc.predict_proba(X_test)[:, 1]
test_auc = roc_auc_score(y_test, y_proba)
print("Testing AUC Score", test_auc)

Testing AUC Score 0.9169472502805835


# **Random Forest Classifier Hyperparameter Tuning**

**Random Forest Classifier Base Model**

In [None]:
rfc = RandomForestClassifier(random_state=42)

**Metrics of the model before tuning the hyperparameters**\
**On the training data:**

In [None]:
rfc.fit(X_train, y_train)

y_pred = rfc.predict(X_train)

print("Random forest results on TRAINING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Random forest results on TRAINING data BEFORE tuning the hyperparameters:

Accuracy: 1.0
Confusion Matrix:
 [[109   0]
 [  0 129]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       109
           1       1.00      1.00      1.00       129

    accuracy                           1.00       238
   macro avg       1.00      1.00      1.00       238
weighted avg       1.00      1.00      1.00       238



**On the training data:**

In [None]:
y_pred = rfc.predict(X_test)

print("Random forest results on TESTING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Random forest results on TESTING data BEFORE tuning the hyperparameters:

Accuracy: 0.85
Confusion Matrix:
 [[23  4]
 [ 5 28]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.85      0.84        27
           1       0.88      0.85      0.86        33

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60



**RandomizedSearchCV for Random Forest**

In [None]:
param_grid = {
    'n_estimators': [100, 200, 300],
    "max_depth": [2, 3, 4, 5, 6, 8, 10, 20, 30, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "criterion": ["gini", "entropy"]
}
ransrch_rfc = RandomizedSearchCV(rfc, param_grid, cv=5, scoring="f1_macro")
ransrch_rfc.fit(X_train, y_train)

print("Best Random Forest:", ransrch_rfc.best_params_)
print("Best score:", ransrch_rfc.best_score_)

Best Random Forest: {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'criterion': 'entropy'}
Best score: 0.8019220562719014


**Applying the model with the best parameters**

In [None]:
best_rfc = ransrch_rfc.best_estimator_
best_rfc.fit(X_train, y_train)

**Metrics of the model after tuning the hyperparameters**\
**On the training data:**

In [None]:
y_pred = best_rfc.predict(X_train)

print("Random forest results on TRAINING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Random forest results on TRAINING data AFTER tuning the hyperparameters:

Accuracy: 0.9663865546218487
Confusion Matrix:
 [[104   5]
 [  3 126]]
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.95      0.96       109
           1       0.96      0.98      0.97       129

    accuracy                           0.97       238
   macro avg       0.97      0.97      0.97       238
weighted avg       0.97      0.97      0.97       238



**On the training data:**

In [None]:
y_pred = best_rfc.predict(X_test)

print("Random forest results on TESTING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Random forest results on TESTING data AFTER tuning the hyperparameters:

Accuracy: 0.85
Confusion Matrix:
 [[23  4]
 [ 5 28]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.85      0.84        27
           1       0.88      0.85      0.86        33

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60



In [None]:
y_proba = best_rfc.predict_proba(X_test)[:, 1]
test_auc = roc_auc_score(y_test, y_proba)
print("Testing AUC Score", test_auc)

Testing AUC Score 0.9225589225589226


# **Support Vector Classifier Hyperparameter Tuning**

**Support Vector Classifier Base Model**

In [21]:
svm = SVC(probability=True,random_state=42)

**Metrics of the model before tuning the hyperparameters**\
**On the training data:**

In [22]:
svm.fit(X_train, y_train)

y_pred = svm.predict(X_train)

print("Support vector results on TRAINING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Support vector results on TRAINING data BEFORE tuning the hyperparameters:

Accuracy: 0.865546218487395
Confusion Matrix:
 [[ 84  25]
 [  7 122]]
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.77      0.84       109
           1       0.83      0.95      0.88       129

    accuracy                           0.87       238
   macro avg       0.88      0.86      0.86       238
weighted avg       0.87      0.87      0.86       238



**On the testing data:**

In [23]:
y_pred = svm.predict(X_test)

print("Support vector results on TESTING data BEFORE tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Support vector results on TESTING data BEFORE tuning the hyperparameters:

Accuracy: 0.8666666666666667
Confusion Matrix:
 [[24  3]
 [ 5 28]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.89      0.86        27
           1       0.90      0.85      0.88        33

    accuracy                           0.87        60
   macro avg       0.87      0.87      0.87        60
weighted avg       0.87      0.87      0.87        60



**GridSearchCV for Support Vector Classifier**

In [24]:
param_grid = {
    'kernel': ['linear'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.001, 0.0001]
}

grid_svm = GridSearchCV(svm, param_grid=param_grid, cv=5, scoring="f1_macro")

grid_svm.fit(X_train, y_train)

print("Best Random Forest:", grid_svm.best_params_)
print("Best score:", grid_svm.best_score_)

Best Random Forest: {'C': 100, 'gamma': 'scale', 'kernel': 'linear'}
Best score: 0.8360070232802885


**Applying the model with the best parameters**

In [25]:
best_svm = grid_svm.best_estimator_
best_svm.fit(X_train, y_train)

**Metrics of the model after tuning the hyperparameters**\
**On the training data:**

In [26]:
y_pred = best_svm.predict(X_train)

print("Random forest results on TRAINING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("Classification Report:\n", classification_report(y_train, y_pred))

Random forest results on TRAINING data AFTER tuning the hyperparameters:

Accuracy: 0.8319327731092437
Confusion Matrix:
 [[ 79  30]
 [ 10 119]]
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.72      0.80       109
           1       0.80      0.92      0.86       129

    accuracy                           0.83       238
   macro avg       0.84      0.82      0.83       238
weighted avg       0.84      0.83      0.83       238



**On the training data:**

In [27]:
y_pred = best_svm.predict(X_test)

print("Random forest results on TESTING data AFTER tuning the hyperparameters:\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Random forest results on TESTING data AFTER tuning the hyperparameters:

Accuracy: 0.8833333333333333
Confusion Matrix:
 [[25  2]
 [ 5 28]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.93      0.88        27
           1       0.93      0.85      0.89        33

    accuracy                           0.88        60
   macro avg       0.88      0.89      0.88        60
weighted avg       0.89      0.88      0.88        60



In [28]:
y_proba = best_svm.predict_proba(X_test)[:, 1]
test_auc = roc_auc_score(y_test, y_proba)
print("Testing AUC Score", test_auc)

Testing AUC Score 0.9371492704826038


# **The Dataset for The Unsupervised Learning Models**

In [None]:
hdUnsup=pd.read_csv('/content/pca_heart.csv')

In [None]:
hdUnsup.head()

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8
0,-0.012756,0.398917,-0.334051,0.101826,-0.525314,0.28644,0.169919,0.13477
1,1.084516,-0.011767,0.823636,-0.094482,0.02287,-0.038243,0.127943,-0.630965
2,1.026259,-0.139006,-0.044097,-0.509337,0.121475,0.113303,0.350597,-0.195307
3,-0.072698,0.385418,-0.348495,0.182108,-0.450502,0.269782,0.14131,0.014171
4,-0.18827,-0.59277,0.927464,0.032338,-0.725426,-0.042154,0.385333,0.293181


# **K-Means Clustering Hyperparameter Tuning**

**K-Means Clustering Base Model**

In [None]:
kmeans = KMeans(random_state=42)

**Metrics of the model before tuning the hyperparameters**

In [None]:
kmeans.fit(hdUnsup)

y_pred = kmeans.predict(hdUnsup)

print("K-Means results on the data BEFORE tuning the hyperparameters:\n")
print("Silhouette Score:", silhouette_score(hdUnsup, y_pred))

K-Means results on the data BEFORE tuning the hyperparameters:

Silhouette Score: 0.26077934313003853


**GridSearchCV for K-Means Clustering**

In [None]:
from sklearn.metrics import silhouette_score
from sklearn.metrics import make_scorer

def kmeans_silhouette(estimator, X):
    labels = estimator.fit_predict(X)
    return silhouette_score(X, labels)

scorer = make_scorer(kmeans_silhouette)

In [None]:
param_grid = {
    'n_clusters': [2, 3, 4, 5, 6, 7, 8],
    'init': ['k-means++', 'random'],
    'n_init': [10, 20],
    'max_iter': [300]
}

grid_kmeans = GridSearchCV(KMeans(random_state=42), param_grid=param_grid, cv=5, scoring=scorer)
grid_kmeans.fit(hdUnsup)

print("Best K-Means:", grid_kmeans.best_params_)
print("Best score:", grid_kmeans.best_score_)

Best K-Means: {'init': 'k-means++', 'max_iter': 300, 'n_clusters': 2, 'n_init': 10}
Best score: nan


**Applying the model with the best parameters**

In [None]:
best_kmeans = grid_kmeans.best_estimator_
best_kmeans.fit(hdUnsup)

**Metrics of the model after tuning the hyperparameters**

In [None]:
y_pred = best_kmeans.predict(hdUnsup)

print("K-Means results on the data AFTER tuning the hyperparameters:\n")
print("Silhouette Score:", silhouette_score(hdUnsup, y_pred))

K-Means results on the data AFTER tuning the hyperparameters:

Silhouette Score: 0.2709929683936727


# **Hierarchical Clustering Hyperparameter Tuning**

**Hierarchical Clustering Base Model**

In [None]:
hc = AgglomerativeClustering()

**Metrics of the model before tuning the hyperparameters**

In [None]:
hc.fit_predict(hdUnsup)

print("Hierarchical clustering results on the data BEFORE tuning the hyperparameters:\n")
print("Silhouette Score:", silhouette_score(hdUnsup, y_pred))

Hierarchical clustering results on the data BEFORE tuning the hyperparameters:

Silhouette Score: 0.2709929683936727


**GridSearchCV for Hierarchical Clustering**

In [None]:
def hierarchical_silhouette(estimator, X):
    labels = estimator.fit_predict(X)
    return silhouette_score(X, labels)

scorer = make_scorer(hierarchical_silhouette)

In [None]:
param_grid = {
    'n_clusters': [2, 3, 4, 5, 6],
    'linkage': ['ward', 'complete', 'average'],
    'metric': ['euclidean']
}

grid_hier = GridSearchCV(AgglomerativeClustering(), param_grid=param_grid, cv=5, scoring=scorer)
grid_hier.fit(hdUnsup)

print("Best Hierarchical Clustering:", grid_kmeans.best_params_)
print("Best score:", grid_kmeans.best_score_)

Best K-Means: {'init': 'k-means++', 'max_iter': 300, 'n_clusters': 2, 'n_init': 10}
Best score: nan


**Applying the model with the best parameters**

In [None]:
best_hier = grid_hier.best_estimator_

**Metrics of the model after tuning the hyperparameters**

In [None]:
y_pred = best_hier.fit_predict(hdUnsup)

print("K-Means results on the data AFTER tuning the hyperparameters:\n")
print("Silhouette Score:", silhouette_score(hdUnsup, y_pred))

K-Means results on the data AFTER tuning the hyperparameters:

Silhouette Score: 0.23041629049652235


# **Saving The Best Model (Logistic Regression)**

In [32]:
with open("final_model.pkl", "wb") as f:
    pickle.dump(best_logreg, f)

# **Creating and Saving The Pipeline**

In [33]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('classifier', best_logreg)
])

In [34]:
pipeline.fit(X_train, y_train)

**Saving The Pipeline**

In [35]:
with open("final_pipeline.pkl", "wb") as f:
    pickle.dump(pipeline, f)