In [2]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


# Phishing Websites Dataset – UCI Machine Learning Repository

The Phishing Websites dataset, contributed by Rami Mohammad and Lee McCluskey in 2015, is designed for classifying websites as phishing or legitimate. It comprises 11,055 instances and 30 features, all represented as integers. The target variable, "Result," indicates the website's status: `1` for legitimate, `0` for suspicious, and `-1` for phishing. The dataset was sourced from PhishTank, MillerSmiles, and Google's search operators. Notably, it does not contain missing values. :contentReference[oaicite:10]{index=10}:contentReference[oaicite:11]{index=11}

Each feature in the dataset corresponds to specific attributes of a website's URL or domain, such as:​:contentReference[oaicite:14]{index=14}

- Presence of an IP address in the URL
- URL length
- Use of shortening services
- Presence of the "@" symbol
- Double slash redirection
- Prefix or suffix in the domain
- Subdomain presence
- SSL certificate status
- Domain registration length
- Favicon presence:contentReference[oaicite:35]{index=35}

These features are instrumental in distinguishing phishing websites from legitimate ones. For a detailed description of each feature, refer to the [Phishing Websites Features document](https://archive.ics.uci.edu/ml/machine-learning-databases/00327/Phishing%20Websites%20Features.docx). :contentReference[oaicite:40]{index=40}

This dataset is widely utilized in cybersecurity research and machine learning applications aimed at enhancing web security.:contentReference[oaicite:43]{index=43}

For more information or to download the dataset, visit the [UCI Machine Learning Repository's Phishing Websites page](https://archive.ics.uci.edu/dataset/327/phishing+websites).


In [4]:
!pip install liac-arff --quiet

import arff
import pandas as pd
from sklearn.model_selection import train_test_split

# === Path to the ARFF file inside the extracted ZIP
arff_path = "/content/phishing_dataset/Training Dataset.arff"

# === Load ARFF
with open(arff_path, 'r') as f:
    dataset = arff.load(f)

# === Convert to DataFrame
df = pd.DataFrame(dataset['data'], columns=[attr[0] for attr in dataset['attributes']])

# === Convert target column (last column is usually 'Result')
df['Result'] = df['Result'].astype(int).map({1: 0, -1: 1})  # 0 = Legitimate, 1 = Phishing

# === Split to Train / Validation / Test
train_df, temp_df = train_test_split(df, test_size=0.4, stratify=df['Result'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['Result'], random_state=42)

print(f"Train size: {train_df.shape}")
print(f"Validation size: {val_df.shape}")
print(f"Test size: {test_df.shape}")


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for liac-arff (setup.py) ... [?25l[?25hdone
Train size: (6633, 31)
Validation size: (2211, 31)
Test size: (2211, 31)


In [3]:
# Display first 5 rows
print("Columns:", df.columns.tolist())
print("\nValue counts for 'Result':\n", df['Result'].value_counts())
df.head()

Columns: ['having_IP_Address', 'URL_Length', 'Shortining_Service', 'having_At_Symbol', 'double_slash_redirecting', 'Prefix_Suffix', 'having_Sub_Domain', 'SSLfinal_State', 'Domain_registeration_length', 'Favicon', 'port', 'HTTPS_token', 'Request_URL', 'URL_of_Anchor', 'Links_in_tags', 'SFH', 'Submitting_to_email', 'Abnormal_URL', 'Redirect', 'on_mouseover', 'RightClick', 'popUpWidnow', 'Iframe', 'age_of_domain', 'DNSRecord', 'web_traffic', 'Page_Rank', 'Google_Index', 'Links_pointing_to_page', 'Statistical_report', 'Result']

Value counts for 'Result':
 Result
 1    6157
-1    4898
Name: count, dtype: int64


Unnamed: 0,having_IP_Address,URL_Length,Shortining_Service,having_At_Symbol,double_slash_redirecting,Prefix_Suffix,having_Sub_Domain,SSLfinal_State,Domain_registeration_length,Favicon,...,popUpWidnow,Iframe,age_of_domain,DNSRecord,web_traffic,Page_Rank,Google_Index,Links_pointing_to_page,Statistical_report,Result
0,-1,1,1,1,-1,-1,-1,-1,-1,1,...,1,1,-1,-1,-1,-1,1,1,-1,-1
1,1,1,1,1,1,-1,0,1,-1,1,...,1,1,-1,-1,0,-1,1,1,1,-1
2,1,0,1,1,1,-1,-1,-1,-1,1,...,1,1,1,-1,1,-1,1,0,-1,-1
3,1,0,1,1,1,-1,-1,-1,1,1,...,1,1,-1,-1,1,-1,1,-1,1,-1
4,1,0,-1,1,1,-1,1,1,-1,1,...,-1,1,-1,-1,0,-1,1,1,1,1


In [None]:
!pip install -U scikit-learn imbalanced-learn

# Semi-Supervised Clustering on Balanced Phishing Dataset Using Multiple Methods

This code loads a phishing dataset, balances classes using oversampling, and extracts Catch22 features alongside XGBoost leaf index embeddings.  
It combines these features and applies UMAP for dimensionality reduction.  
**Three clustering algorithms—Gaussian Mixture Model, Agglomerative Clustering, and HDBSCAN***—are applied to the reduced features.  
Clusters are mapped to true labels by majority vote, and evaluation metrics including classification report, Adjusted Rand Index (ARI), and Normalized Mutual Information (NMI) are printed for each method.  
This provides a comparative analysis of clustering performance on semi-supervised phishing data.  


In [1]:
import os
import pandas as pd
import numpy as np
from scipy.io import arff
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report, adjusted_rand_score, normalized_mutual_info_score
from sklearn.mixture import GaussianMixture
from sklearn.cluster import AgglomerativeClustering
import hdbscan
from xgboost import XGBClassifier
from pycatch22 import catch22_all
from umap import UMAP
from scipy.stats import mode
from imblearn.over_sampling import RandomOverSampler

# === Load dataset ===
data, meta = arff.loadarff("/content/phishing_dataset/Training Dataset.arff")
df = pd.DataFrame(data)
#df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df = df.apply(lambda col: col.map(lambda x: x.decode() if isinstance(x, bytes) else x))

df["Result"] = df["Result"].astype(int)

# === Prepare data ===
X = df.drop(columns=["Result"]).astype(float)
y = df["Result"].astype(int)

ros = RandomOverSampler(random_state=42)
X_balanced, y_balanced = ros.fit_resample(X, y)

df_balanced = pd.DataFrame(X_balanced, columns=X.columns)
df_balanced["Result"] = y_balanced

X_all = df_balanced.drop(columns=["Result"]).astype(float)
y_all = df_balanced["Result"].astype(int)
y_all_mapped = y_all.replace({-1:0, 1:1}).values

scaler = StandardScaler()
X_all_scaled = scaler.fit_transform(X_all)

# === Catch22 features ===
catch22_path = "/content/catch22_feats.npy"
if os.path.exists(catch22_path):
    catch22_feats = np.load(catch22_path)
else:
    catch22_feats = np.array([catch22_all(x.values)["values"] for _, x in X_all.iterrows()])
    np.save(catch22_path, catch22_feats)

# === XGBoost embedding ===
xgb = XGBClassifier(n_estimators=200, max_depth=4, learning_rate=0.1, eval_metric='logloss', random_state=42)
xgb.fit(X_all_scaled, y_all_mapped)

leaf_indices = xgb.apply(X_all_scaled)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
leaf_onehot = encoder.fit_transform(leaf_indices)

# === Combine features ===
X_combined = np.hstack([leaf_onehot, catch22_feats])

# === UMAP reduction ===
umap_model = UMAP(n_components=20, n_neighbors=30, min_dist=0.0)
X_reduced = umap_model.fit_transform(X_combined)

# === Clustering methods ===

# 1. Gaussian Mixture Model (for comparison)
gmm = GaussianMixture(n_components=4, random_state=42)
gmm_clusters = gmm.fit_predict(X_reduced)

# 2. Agglomerative Clustering
agglo = AgglomerativeClustering(n_clusters=4)
agglo_clusters = agglo.fit_predict(X_reduced)

# 3. HDBSCAN (no need to specify clusters count)
hdbscan_clusterer = hdbscan.HDBSCAN(min_cluster_size=50)
hdbscan_clusters = hdbscan_clusterer.fit_predict(X_reduced)

# === Map clusters to labels ===
def map_clusters(true_labels, pred_clusters):
    mapping = {}
    for cluster_id in np.unique(pred_clusters):
        if cluster_id == -1:  # HDBSCAN noise label
            continue
        majority = mode(true_labels[pred_clusters == cluster_id], keepdims=True).mode[0]
        mapping[cluster_id] = majority
    # For noise points in HDBSCAN assign -1 label
    mapped = np.array([mapping.get(c, -1) for c in pred_clusters])
    return mapped

print("\n--- GMM Clustering Evaluation ---")
mapped_gmm = map_clusters(y_all_mapped, gmm_clusters)
print(classification_report(y_all_mapped, mapped_gmm))
print("ARI:", adjusted_rand_score(y_all_mapped, gmm_clusters))
print("NMI:", normalized_mutual_info_score(y_all_mapped, gmm_clusters))

print("\n--- Agglomerative Clustering Evaluation ---")
mapped_agglo = map_clusters(y_all_mapped, agglo_clusters)
print(classification_report(y_all_mapped, mapped_agglo))
print("ARI:", adjusted_rand_score(y_all_mapped, agglo_clusters))
print("NMI:", normalized_mutual_info_score(y_all_mapped, agglo_clusters))

print("\n--- HDBSCAN Clustering Evaluation ---")
mapped_hdbscan = map_clusters(y_all_mapped, hdbscan_clusters)
print(classification_report(y_all_mapped, mapped_hdbscan))
print("ARI:", adjusted_rand_score(y_all_mapped, hdbscan_clusters))
print("NMI:", normalized_mutual_info_score(y_all_mapped, hdbscan_clusters))





--- GMM Clustering Evaluation ---
              precision    recall  f1-score   support

           0       0.86      0.84      0.85      6157
           1       0.85      0.86      0.85      6157

    accuracy                           0.85     12314
   macro avg       0.85      0.85      0.85     12314
weighted avg       0.85      0.85      0.85     12314

ARI: 0.2936026942800383
NMI: 0.30682773880430075

--- Agglomerative Clustering Evaluation ---
              precision    recall  f1-score   support

           0       0.81      0.90      0.86      6157
           1       0.89      0.79      0.84      6157

    accuracy                           0.85     12314
   macro avg       0.85      0.85      0.85     12314
weighted avg       0.85      0.85      0.85     12314

ARI: 0.28490398634972514
NMI: 0.2937189590729474

--- HDBSCAN Clustering Evaluation ---
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00         0
           0       0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Semi-Supervised Clustering Pipeline with Grid Search on Phishing Dataset

This code loads and balances a phishing dataset, extracting Catch22 features and generating XGBoost leaf index embeddings.  
It combines these features, reduces their dimensionality using UMAP, and performs agglomerative clustering with different hyperparameters.  
A grid search tests various numbers of clusters and linkage methods, mapping clusters to labels by majority vote.  
Evaluation metrics including accuracy, macro F1, Adjusted Rand Index (ARI), and Normalized Mutual Information (NMI) are printed for each parameter combination.  
The code is designed to identify the best clustering configuration for the balanced phishing data.  


In [48]:
import os
import pandas as pd
import numpy as np
from scipy.io import arff
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report, adjusted_rand_score, normalized_mutual_info_score
from sklearn.cluster import AgglomerativeClustering
from umap import UMAP
from scipy.stats import mode
from imblearn.over_sampling import RandomOverSampler
from xgboost import XGBClassifier
from pycatch22 import catch22_all

# === Load dataset ===
data_path = "/content/phishing_dataset/Training Dataset.arff"
data, meta = arff.loadarff(data_path)
df = pd.DataFrame(data)
df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df["Result"] = df["Result"].astype(int)

# === Prepare balanced data ===
X = df.drop(columns=["Result"]).astype(float)
y = df["Result"].astype(int)
ros = RandomOverSampler(random_state=42)
X_bal, y_bal = ros.fit_resample(X, y)
df_bal = pd.DataFrame(X_bal, columns=X.columns)
df_bal["Result"] = y_bal
X_all = df_bal.drop(columns=["Result"]).astype(float)
y_all = df_bal["Result"].astype(int)
y_all_mapped = y_all.replace({-1:0, 1:1}).values

scaler = StandardScaler()
X_all_scaled = scaler.fit_transform(X_all)

# === Catch22 features ===
catch22_path = "/content/catch22_feats.npy"
if os.path.exists(catch22_path):
    catch22_feats = np.load(catch22_path)
else:
    catch22_feats = np.array([catch22_all(x.values)["values"] for _, x in X_all.iterrows()])
    np.save(catch22_path, catch22_feats)

# === XGBoost embedding ===
xgb = XGBClassifier(n_estimators=200, max_depth=4, learning_rate=0.1,
                    use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_all_scaled, y_all_mapped)
leaf_indices = xgb.apply(X_all_scaled)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
leaf_onehot = encoder.fit_transform(leaf_indices)

# === Combine features ===
X_combined = np.hstack([leaf_onehot, catch22_feats])

# === UMAP reduction ===
umap_model = UMAP(n_components=20, n_neighbors=30, min_dist=0.0, random_state=42)
X_reduced = umap_model.fit_transform(X_combined)

# === Grid Search parameters ===
agglo_params = {
    "n_clusters": [2, 3, 4, 5],
    "linkage": ["ward", "complete", "average"]
}

# === Cluster-label mapping function ===
def map_clusters_to_labels(true_labels, predicted_clusters):
    mapping = {}
    for cluster_id in np.unique(predicted_clusters):
        majority = mode(true_labels[predicted_clusters == cluster_id], keepdims=True).mode[0]
        mapping[cluster_id] = majority
    return np.vectorize(mapping.get)(predicted_clusters)

for n_clusters in agglo_params["n_clusters"]:
    for linkage in agglo_params["linkage"]:
        # ward linkage requires euclidean metric by default, no need to specify metric explicitly
        try:
            agglo = AgglomerativeClustering(
                n_clusters=n_clusters,
                linkage=linkage
            )
            clusters = agglo.fit_predict(X_reduced)
            mapped_preds = map_clusters_to_labels(y_all_mapped, clusters)
            report = classification_report(y_all_mapped, mapped_preds, output_dict=True, zero_division=0)
            ari = adjusted_rand_score(y_all_mapped, clusters)
            nmi = normalized_mutual_info_score(y_all_mapped, clusters)

            print(f"n_clusters={n_clusters}, linkage={linkage}")
            print(f"Accuracy: {report['accuracy']:.3f}")
            print(f"Macro F1: {report['macro avg']['f1-score']:.3f}")
            print(f"ARI: {ari:.3f}")
            print(f"NMI: {nmi:.3f}")
            print("-" * 40)

        except Exception as e:
            print(f"Skipping n_clusters={n_clusters}, linkage={linkage} due to error: {e}")


  df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  warn(


n_clusters=2, linkage=ward
Accuracy: 0.757
Macro F1: 0.752
ARI: 0.263
NMI: 0.228
----------------------------------------
n_clusters=2, linkage=complete
Accuracy: 0.757
Macro F1: 0.752
ARI: 0.264
NMI: 0.227
----------------------------------------
n_clusters=2, linkage=average
Accuracy: 0.504
Macro F1: 0.342
ARI: 0.000
NMI: 0.008
----------------------------------------
n_clusters=3, linkage=ward
Accuracy: 0.757
Macro F1: 0.752
ARI: 0.205
NMI: 0.185
----------------------------------------
n_clusters=3, linkage=complete
Accuracy: 0.757
Macro F1: 0.752
ARI: 0.258
NMI: 0.224
----------------------------------------
n_clusters=3, linkage=average
Accuracy: 0.506
Macro F1: 0.346
ARI: 0.000
NMI: 0.018
----------------------------------------
n_clusters=4, linkage=ward
Accuracy: 0.875
Macro F1: 0.875
ARI: 0.341
NMI: 0.344
----------------------------------------
n_clusters=4, linkage=complete
Accuracy: 0.757
Macro F1: 0.752
ARI: 0.252
NMI: 0.221
----------------------------------------
n_clus

# Semi-Supervised Clustering with K-Fold Cross-Validation on Phishing Dataset

This code performs a 5-fold cross-validation semi-supervised clustering pipeline on a phishing dataset.  
It balances training data via oversampling, extracts Catch22 features and XGBoost leaf embeddings, then reduces dimensionality with UMAP.  
Agglomerative clustering with ward linkage is applied on the test set embeddings, and clusters are mapped to labels by majority vote.  
Evaluation metrics including classification report, Adjusted Rand Index (ARI), and Normalized Mutual Information (NMI) are computed and printed for each fold and summarized at the end.


In [49]:
import os
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report, adjusted_rand_score, normalized_mutual_info_score
from sklearn.cluster import AgglomerativeClustering
from umap import UMAP
from scipy.stats import mode
from imblearn.over_sampling import RandomOverSampler
from xgboost import XGBClassifier
from pycatch22 import catch22_all
from sklearn.model_selection import KFold

# === Load dataset ===
data_path = "/content/phishing_dataset/Training Dataset.arff"
data, meta = arff.loadarff(data_path)
df = pd.DataFrame(data)
df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df["Result"] = df["Result"].astype(int)

# === Prepare X and y ===
X = df.drop(columns=["Result"]).astype(float)
y = df["Result"].astype(int)
y_mapped = y.replace({-1:0, 1:1}).values

# === Initialize KFold ===
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# === Function to map clusters to labels ===
def map_clusters_to_labels(true_labels, pred_clusters):
    mapping = {}
    for cluster_id in np.unique(pred_clusters):
        majority = mode(true_labels[pred_clusters == cluster_id], keepdims=True).mode[0]
        mapping[cluster_id] = majority
    return np.vectorize(mapping.get)(pred_clusters)

# === Store metrics per fold ===
fold_metrics = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
    print(f"\n=== Fold {fold} ===")

    # Split train/test
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y_mapped[train_idx], y_mapped[test_idx]

    # Oversample training only
    ros = RandomOverSampler(random_state=42)
    X_train_bal, y_train_bal = ros.fit_resample(X_train, y_train)
    X_train_bal = pd.DataFrame(X_train_bal, columns=X_train.columns)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_bal)
    X_test_scaled = scaler.transform(X_test)

    # Catch22 features
    catch22_train = np.array([catch22_all(row.values)["values"] for _, row in X_train_bal.iterrows()])
    catch22_test = np.array([catch22_all(row.values)["values"] for _, row in X_test.iterrows()])

    # XGBoost embedding - train classifier on balanced train data
    xgb = XGBClassifier(n_estimators=200, max_depth=4, learning_rate=0.1, eval_metric='logloss', random_state=42)
    xgb.fit(X_train_scaled, y_train_bal)
    train_leaves = xgb.apply(X_train_scaled)
    test_leaves = xgb.apply(X_test_scaled)

    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    train_leaf_onehot = encoder.fit_transform(train_leaves)
    test_leaf_onehot = encoder.transform(test_leaves)

    # Combine features
    X_train_combined = np.hstack([train_leaf_onehot, catch22_train])
    X_test_combined = np.hstack([test_leaf_onehot, catch22_test])

    # UMAP reduction (fit on train, transform test)
    umap_model = UMAP(n_components=20, n_neighbors=30, min_dist=0.0, random_state=42)
    X_train_reduced = umap_model.fit_transform(X_train_combined)
    X_test_reduced = umap_model.transform(X_test_combined)

    # Agglomerative clustering on test set
    agglo = AgglomerativeClustering(n_clusters=5, linkage='ward')
    clusters_test = agglo.fit_predict(X_test_reduced)

    # Map clusters to labels using test true labels
    mapped_test_preds = map_clusters_to_labels(y_test, clusters_test)

    # Evaluation
    report = classification_report(y_test, mapped_test_preds, output_dict=True, zero_division=0)
    ari = adjusted_rand_score(y_test, clusters_test)
    nmi = normalized_mutual_info_score(y_test, clusters_test)

    print(classification_report(y_test, mapped_test_preds, zero_division=0))
    print(f"ARI: {ari:.3f}")
    print(f"NMI: {nmi:.3f}")

    fold_metrics.append({
        "fold": fold,
        "accuracy": report["accuracy"],
        "macro_f1": report["macro avg"]["f1-score"],
        "ARI": ari,
        "NMI": nmi
    })

# === Summary ===
df_metrics = pd.DataFrame(fold_metrics)
print("\n=== K-Fold Cross Validation Summary ===")
print(df_metrics)
print("Mean metrics:")
print(df_metrics.mean())


  df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)



=== Fold 1 ===


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  warn(


              precision    recall  f1-score   support

           0       0.92      0.90      0.91       956
           1       0.93      0.94      0.93      1255

    accuracy                           0.92      2211
   macro avg       0.92      0.92      0.92      2211
weighted avg       0.92      0.92      0.92      2211

ARI: 0.389
NMI: 0.436

=== Fold 2 ===


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  warn(


              precision    recall  f1-score   support

           0       0.89      0.86      0.88       950
           1       0.90      0.92      0.91      1261

    accuracy                           0.89      2211
   macro avg       0.89      0.89      0.89      2211
weighted avg       0.89      0.89      0.89      2211

ARI: 0.325
NMI: 0.394

=== Fold 3 ===


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  warn(


              precision    recall  f1-score   support

           0       0.92      0.85      0.88       997
           1       0.89      0.94      0.91      1214

    accuracy                           0.90      2211
   macro avg       0.90      0.90      0.90      2211
weighted avg       0.90      0.90      0.90      2211

ARI: 0.379
NMI: 0.413

=== Fold 4 ===


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  warn(


              precision    recall  f1-score   support

           0       0.93      0.90      0.92       985
           1       0.92      0.95      0.93      1226

    accuracy                           0.93      2211
   macro avg       0.93      0.92      0.93      2211
weighted avg       0.93      0.93      0.93      2211

ARI: 0.431
NMI: 0.463

=== Fold 5 ===


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  warn(


              precision    recall  f1-score   support

           0       0.91      0.86      0.89      1010
           1       0.89      0.93      0.91      1201

    accuracy                           0.90      2211
   macro avg       0.90      0.90      0.90      2211
weighted avg       0.90      0.90      0.90      2211

ARI: 0.357
NMI: 0.406

=== K-Fold Cross Validation Summary ===
   fold  accuracy  macro_f1       ARI       NMI
0     1  0.922659  0.921047  0.389057  0.436229
1     2  0.894618  0.891972  0.324911  0.394130
2     3  0.899593  0.897823  0.379238  0.413330
3     4  0.926278  0.925128  0.430683  0.463483
4     5  0.898688  0.897450  0.356836  0.405711
Mean metrics:
fold        3.000000
accuracy    0.908367
macro_f1    0.906684
ARI         0.376145
NMI         0.422577
dtype: float64
