In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from sklearn.cluster import DBSCAN

# Load your dataset
data = pd.read_csv('your_dataset.csv')

# Explore the dataset
print(data.head())
print(data.info())


In [None]:
# Check for missing values and handle them if necessary
data.isnull().sum()

# Standardize the data if needed
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)


In [None]:
# Finding the optimum number of clusters using the Elbow Method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(scaled_data)
    wcss.append(kmeans.inertia_)

# Plot the Elbow graph
plt.plot(range(1, 11), wcss)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')  # Within cluster sum of squares
plt.show()

# From the elbow graph, choose the optimal number of clusters and create the K-Means model
optimal_clusters = 3
kmeans_optimal = KMeans(n_clusters=optimal_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans_optimal.fit(scaled_data)


In [None]:
# Silhouette analysis to validate the optimal number of clusters
silhouette_avg = silhouette_score(scaled_data, kmeans_optimal.labels_)
print(f'Silhouette Score for {optimal_clusters} clusters: {silhouette_avg}')
# Generating sample data for DBSCAN
X, _ = make_blobs(n_samples=300, centers=3, cluster_std=0.60, random_state=0)

# Implementing DBSCAN
dbscan = DBSCAN(eps=0.3, min_samples=5)
labels = dbscan.fit_predict(X)

# Visualize DBSCAN clusters
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.title('DBSCAN Clustering')
plt.show()


In [None]:
# Save the K-Means model
import joblib
joblib.dump(kmeans_optimal, 'kmeans_model.pkl')

# Save the DBSCAN model if applicable
joblib.dump(dbscan, 'dbscan_model.pkl')

