In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score, calinski_harabasz_score


In [2]:
train_data = np.load(r"E:\TY\SESD\SESD-PROJECT\data\clean_data\train_data.npy")
train_data.shape

(114000, 15)

In [3]:
size = int(0.3*train_data.shape[0])
df_sample = train_data[:size,:]
df_sample.shape

(34200, 15)

In [4]:
model = DBSCAN()
model.fit(df_sample)

labels = model.labels_

score1 = silhouette_score(df_sample, labels)
score2 = calinski_harabasz_score(df_sample, labels)

print(f"Silhouette Score: {score1:.3f}")
print(f"Calinski Harabasz Score: {score2:.3f}")

Silhouette Score: -0.417
Calinski Harabasz Score: 6.906


In [5]:
report = {
    "algorithm" : " DBSCAN",
    "parameter" : "default",
    "Silhouette Score" : score1,
    "calinski_harabasz_score" : score2

}

report

{'algorithm': ' DBSCAN',
 'parameter': 'default',
 'Silhouette Score': -0.4165179705724261,
 'calinski_harabasz_score': 6.905748302538726}

In [6]:
import os
import json

folder_path = "reports"
os.makedirs(folder_path, exist_ok=True)

report_file_path = os.path.join(folder_path, 'DBSCAN_reports.json')
with open(report_file_path, 'w') as f:
    json.dump(report, f, indent=4)

print(f"reprots stored successfully in: %S", report_file_path)

reprots stored successfully in: %S reports\DBSCAN_reports.json
