In [None]:

import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns


# Data Loading

In [None]:

data = pd.read_csv('data/data.csv')


# Data Preparation

In [None]:

# Select numerical columns for clustering
numerical_cols = data.select_dtypes(include=[np.number]).columns.tolist()
data_num = data[numerical_cols]

# Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_num)


# K-Means Clustering

In [None]:

# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters_kmeans = kmeans.fit_predict(data_scaled)

# Add cluster labels to the dataset
data['Cluster_KMeans'] = clusters_kmeans


# Hierarchical Clustering

In [None]:

# Apply Hierarchical clustering
hierarchical = AgglomerativeClustering(n_clusters=3)
clusters_hierarchical = hierarchical.fit_predict(data_scaled)

# Add cluster labels to the dataset
data['Cluster_Hierarchical'] = clusters_hierarchical


# DBSCAN Clustering

In [None]:

# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
clusters_dbscan = dbscan.fit_predict(data_scaled)

# Add cluster labels to the dataset
data['Cluster_DBSCAN'] = clusters_dbscan


# Cluster Visualization

In [None]:

# Visualization of the clusters
sns.pairplot(data, hue='Cluster_KMeans', vars=numerical_cols[:4])
plt.show()
