In [None]:
pip install numpy pandas scikit-learn matplotlib seaborn


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE


In [None]:
# Generate synthetic data
data, labels = make_blobs(n_samples=500, centers=4, cluster_std=1.0, random_state=42)

# Visualize the dataset
plt.scatter(data[:, 0], data[:, 1], c='gray', alpha=0.6, s=50)
plt.title("Synthetic Dataset for Clustering")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


In [None]:
# Apply k-means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans_labels = kmeans.fit_predict(data)

# Visualize k-means clustering
plt.scatter(data[:, 0], data[:, 1], c=kmeans_labels, cmap='viridis', alpha=0.6, s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='X', s=200)
plt.title("k-means Clustering")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


In [None]:
# Apply DBSCAN clustering
dbscan = DBSCAN(eps=1.0, min_samples=5)
dbscan_labels = dbscan.fit_predict(data)

# Visualize DBSCAN clustering
plt.scatter(data[:, 0], data[:, 1], c=dbscan_labels, cmap='coolwarm', alpha=0.6, s=50)
plt.title("DBSCAN Clustering")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


In [None]:
# Apply PCA
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data)

# Visualize PCA result
plt.scatter(data_pca[:, 0], data_pca[:, 1], c=kmeans_labels, cmap='viridis', alpha=0.6, s=50)
plt.title("Dimensionality Reduction with PCA")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.show()


In [None]:
# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
data_tsne = tsne.fit_transform(data)

# Visualize t-SNE result
plt.scatter(data_tsne[:, 0], data_tsne[:, 1], c=kmeans_labels, cmap='viridis', alpha=0.6, s=50)
plt.title("Dimensionality Reduction with t-SNE")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()


In [None]:
from sklearn.metrics import silhouette_score

# Compute silhouette scores
kmeans_silhouette = silhouette_score(data, kmeans_labels)
dbscan_silhouette = silhouette_score(data, dbscan_labels) if len(set(dbscan_labels)) > 1 else None

print(f"k-means Silhouette Score: {kmeans_silhouette:.4f}")
print(f"DBSCAN Silhouette Score: {dbscan_silhouette:.4f}" if dbscan_silhouette else "DBSCAN Silhouette Score: Not Applicable")
