In [2]:
import pandas as pd 
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score


In [3]:
train_data = np.load(r"E:\TY\SESD\SESD-PROJECT\data\clean_data\train_data.npy")
train_data.shape

(114000, 15)

In [4]:
size = int(0.3*train_data.shape[0])
df_sample = train_data[:size,:]
df_sample.shape

(34200, 15)

In [5]:
model = AgglomerativeClustering(n_clusters=3)
model.fit(df_sample)
labels = model.labels_

score1 = silhouette_score(df_sample, labels)
score2 = calinski_harabasz_score(df_sample, labels)

print(f"Silhouette Score: {score1:.3f}")
print(f"Calinski Harabasz Score: {score2:.3f}")


Silhouette Score: 0.162
Calinski Harabasz Score: 4594.646


In [6]:
report = {
    "algorithm" : "AgglomerativeClustering",
    "parameter" : {
        "n-cluster" : 3
    },
    "Silhouette Score" : score1,
    "calinski_harabasz_score" : score2

}

report

{'algorithm': 'AgglomerativeClustering',
 'parameter': {'n-cluster': 3},
 'Silhouette Score': 0.16238037764653832,
 'calinski_harabasz_score': 4594.646457244154}

In [7]:
import os
import json

folder_path = "reports"
os.makedirs(folder_path, exist_ok=True)

report_file_path = os.path.join(folder_path, 'AgglomerativeClustering_reports.json')
with open(report_file_path, 'w') as f:
    json.dump(report, f, indent=4)

print(f"reprots stored successfully in: %S", report_file_path)

reprots stored successfully in: %S reports\AgglomerativeClustering_reports.json
