In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

# Load your dataset (replace 'your_dataset.csv' with the actual file name)
wine_cluster = pd.read_csv('Wine_clust .csv')

# Extract features (X) from the dataset
X = wine_cluster.iloc[:, 1:]  # Assuming the first column is an identifier

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Function to find the optimal number of clusters using K-means
def find_optimal_kmeans_clusters(data, max_clusters=10):
    silhouette_scores = []
    for n_clusters in range(2, max_clusters + 1):
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        kmeans.fit(data)
        labels = kmeans.labels_
        silhouette_avg = silhouette_score(data, labels)
        silhouette_scores.append(silhouette_avg)

    # Find the optimal number of clusters
    optimal_clusters = np.argmax(silhouette_scores) + 2  # Add 2 because we started from 2 clusters
    return optimal_clusters

# Find the optimal number of clusters for K-means
optimal_kmeans_clusters = find_optimal_kmeans_clusters(X_scaled)
print("Optimal number of clusters (K-means):", optimal_kmeans_clusters)

# Fit K-means with the optimal number of clusters
kmeans_model = KMeans(n_clusters=optimal_kmeans_clusters, random_state=42)
kmeans_labels = kmeans_model.fit_predict(X_scaled)

# Hierarchical clustering
# You can use different linkage methods (e.g., 'ward', 'complete', 'average')
agg_model = AgglomerativeClustering(n_clusters=optimal_kmeans_clusters, linkage='ward')
agg_labels = agg_model.fit_predict(X_scaled)

# DBSCAN clustering
# Assuming you have installed the required libraries
from sklearn.cluster import DBSCAN

# Create a DBSCAN model
dbscan_model = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan_model.fit_predict(X_scaled)

# Print the cluster assignments
print("K-means cluster labels:", kmeans_labels)
print("Hierarchical cluster labels:", agg_labels)
print("DBSCAN cluster labels:", dbscan_labels)




Optimal number of clusters (K-means): 2
K-means cluster labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 1 0 0 0
 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0
 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Hierarchical cluster labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 1 0 0 0
 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
DBSCAN cluster labels: [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -

