In [2]:
import pandas as pd
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv("gun_deaths.csv")

# Preprocess the dataset
df = df.dropna()

# Encode categorical features
label_encoder = LabelEncoder()
df["sex"] = label_encoder.fit_transform(df["sex"])
df["race"] = label_encoder.fit_transform(df["race"])
df["place"] = label_encoder.fit_transform(df["place"])
df["education"] = label_encoder.fit_transform(df["education"])

# Select relevant features for clustering
features_for_clustering = ["year", "month", "sex", "age", "race", "place", "education"]

X = df[features_for_clustering]

# Perform MiniBatchKMeans clustering
kmeans = MiniBatchKMeans(n_clusters=4)
kmeans_labels = kmeans.fit_predict(X)

# Evaluate clustering
kmeans_sil = silhouette_score(X, kmeans_labels)
kmeans_ch = calinski_harabasz_score(X, kmeans_labels)
kmeans_db = davies_bouldin_score(X, kmeans_labels)

print("MiniBatchKMeans")
print("Silhouette score:", kmeans_sil)
print("Calinski-Harabasz score:", kmeans_ch)
print("Davies-Bouldin score:", kmeans_db)




MiniBatchKMeans
Silhouette score: 0.4276634442302008
Calinski-Harabasz score: 229523.95841303852
Davies-Bouldin score: 0.7415881631381896
