Part 1: K-Means Clustering Algorithm (Custom Python Implementation)

In [None]:
# to calculate Euclidean distance in 3D
def euclidean_distance(p1, p2):
    return ((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2 + (p1[2]-p2[2])**2) ** 0.5

# checking if centroids have converged
def converged(centroids, prev_centroids, tolerance=1e-4):
    for c1, c2 in zip(centroids, prev_centroids):
        if euclidean_distance(c1, c2) > tolerance:
            return False
    return True

# to assign points to clusters based on distance from centroids
def assign_clusters(points, centroids):
    clusters = {0: [], 1: [], 2: []}
    for point in points:
        distances = [
            euclidean_distance(point, centroids[0]),
            euclidean_distance(point, centroids[1]),
            euclidean_distance(point, centroids[2])
        ]
        cluster = distances.index(min(distances))
        clusters[cluster].append(point)
    return clusters

# compute new centroids as the mean of points in the cluster
def update_centroids(clusters):
    centroids = []
    for i in range(3):
        if len(clusters[i]) > 0:
            x = sum([p[0] for p in clusters[i]]) / len(clusters[i])
            y = sum([p[1] for p in clusters[i]]) / len(clusters[i])
            z = sum([p[2] for p in clusters[i]]) / len(clusters[i])
            centroid = [x, y, z]
        else:
            centroid = [float(i*10), float(i*10), float(i*10)]
        centroids.append(centroid)
    return centroids

def kmeans_3d(points, initial_centroids, max_iters=100):
    centroids = initial_centroids
    prev_centroids = [[0, 0, 0] for _ in range(3)]
    iteration = 0

    while not converged(centroids, prev_centroids) and iteration < max_iters:
        print(f"\n--- Iteration {iteration + 1} ---")

        # save previous centroids
        prev_centroids = [c[:] for c in centroids]

        # Step 1: Assign points to clusters
        clusters = assign_clusters(points, centroids)

        # Step 2: Print clusters
        for i in range(3):
            print(f"Cluster {i + 1}: {clusters[i]}")

        # Step 3: Update centroids
        centroids = update_centroids(clusters)
        print(f"Updated Centroids: {centroids}")

        iteration += 1

    print("\nClustering completed!")
    return clusters, centroids

def main():
    points = [
        [2, 4, 10], [12, 3, 20], [30, 11, 25], [23, 10, 6],
        [7, 15, 18], [5, 6, 9], [16, 19, 3], [11, 5, 8],
        [9, 12, 7], [14, 3, 16]
    ]

    print("Using initial centroids for 3 clusters:")
    m1 = [2, 4, 10]
    m2 = [12, 3, 20]
    m3 = [30, 11, 25]
    initial_centroids = [m1, m2, m3]

    clusters, final_centroids = kmeans_3d(points, initial_centroids)

    print("\nFinal Clusters and Centroids:")
    for i in range(3):
        print(f"Cluster {i + 1}: {clusters[i]}")
    print(f"Final Centroids: {final_centroids}")

if __name__ == "__main__":
    main()


Using initial centroids for 3 clusters:

--- Iteration 1 ---
Cluster 1: [[2, 4, 10], [5, 6, 9], [16, 19, 3], [11, 5, 8], [9, 12, 7]]
Cluster 2: [[12, 3, 20], [23, 10, 6], [7, 15, 18], [14, 3, 16]]
Cluster 3: [[30, 11, 25]]
Updated Centroids: [[8.6, 9.2, 7.4], [14.0, 7.75, 15.0], [30.0, 11.0, 25.0]]

--- Iteration 2 ---
Cluster 1: [[2, 4, 10], [5, 6, 9], [16, 19, 3], [11, 5, 8], [9, 12, 7]]
Cluster 2: [[12, 3, 20], [23, 10, 6], [7, 15, 18], [14, 3, 16]]
Cluster 3: [[30, 11, 25]]
Updated Centroids: [[8.6, 9.2, 7.4], [14.0, 7.75, 15.0], [30.0, 11.0, 25.0]]

Clustering completed!

Final Clusters and Centroids:
Cluster 1: [[2, 4, 10], [5, 6, 9], [16, 19, 3], [11, 5, 8], [9, 12, 7]]
Cluster 2: [[12, 3, 20], [23, 10, 6], [7, 15, 18], [14, 3, 16]]
Cluster 3: [[30, 11, 25]]
Final Centroids: [[8.6, 9.2, 7.4], [14.0, 7.75, 15.0], [30.0, 11.0, 25.0]]


Part 2: K-Means Clustering Using scikit-learn

In [None]:
pip install scikit-learn



In [None]:
from sklearn.cluster import KMeans
import numpy as np

# Input points (same as your manual version)
points = np.array([
    [2, 4, 10], [12, 3, 20], [30, 11, 25], [23, 10, 6],
    [7, 15, 18], [5, 6, 9], [16, 19, 3], [11, 5, 8],
    [9, 12, 7], [14, 3, 16]
])

# Initial centroids for 3 clusters
centroids = np.array([
    [2, 4, 10],
    [12, 3, 20],
    [30, 11, 25]
])

prev_centroids = None
iteration = 0

# Run until centroids stop changing
while True:
    iteration += 1
    print(f"\n--- Iteration {iteration} ---")

    # Perform one iteration of K-Means using current centroids
    kmeans = KMeans(n_clusters=3, init=centroids, n_init=1, max_iter=1, random_state=0)
    kmeans.fit(points)

    # Get updated clusters and centroids
    labels = kmeans.labels_
    centroids = kmeans.cluster_centers_

    # Print clusters and new centroids
    for i in range(3):
        cluster_points = points[labels == i].tolist()
        print(f"Cluster {i + 1}: {cluster_points}")
    print("Updated Centroids:", centroids.tolist())

    # Stop if centroids haven't changed
    if prev_centroids is not None and np.allclose(prev_centroids, centroids):
        break
    prev_centroids = centroids.copy()

print("\n✅ Clustering Completed!")
for i in range(3):
    print(f"Final Cluster {i + 1}: {points[labels == i].tolist()}")
print("Final Centroids:", centroids.tolist())



--- Iteration 1 ---
Cluster 1: [[2, 4, 10], [5, 6, 9], [16, 19, 3], [11, 5, 8], [9, 12, 7]]
Cluster 2: [[12, 3, 20], [23, 10, 6], [7, 15, 18], [14, 3, 16]]
Cluster 3: [[30, 11, 25]]
Updated Centroids: [[8.600000000000001, 9.2, 7.3999999999999995], [14.0, 7.75, 15.0], [30.0, 11.0, 25.0]]

--- Iteration 2 ---
Cluster 1: [[2, 4, 10], [5, 6, 9], [16, 19, 3], [11, 5, 8], [9, 12, 7]]
Cluster 2: [[12, 3, 20], [23, 10, 6], [7, 15, 18], [14, 3, 16]]
Cluster 3: [[30, 11, 25]]
Updated Centroids: [[8.600000000000001, 9.2, 7.3999999999999995], [14.0, 7.75, 15.0], [30.0, 11.0, 25.0]]

✅ Clustering Completed!
Final Cluster 1: [[2, 4, 10], [5, 6, 9], [16, 19, 3], [11, 5, 8], [9, 12, 7]]
Final Cluster 2: [[12, 3, 20], [23, 10, 6], [7, 15, 18], [14, 3, 16]]
Final Cluster 3: [[30, 11, 25]]
Final Centroids: [[8.600000000000001, 9.2, 7.3999999999999995], [14.0, 7.75, 15.0], [30.0, 11.0, 25.0]]


In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving EEG Eye State - EEG Eye State.csv to EEG Eye State - EEG Eye State (1).csv
User uploaded file "EEG Eye State - EEG Eye State (1).csv" with length 1696098 bytes


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

file_path = '/content/EEG Eye State - EEG Eye State (1).csv' # Make sure this path is correct after uploading
df = pd.read_csv(file_path)

X = df.drop("eyeDetection", axis=1)
y = df["eyeDetection"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
clusters = kmeans.fit_predict(X_scaled)

mapped_clusters = np.where(clusters == 0,
    1 if np.mean(y[clusters == 0]) > 0.5 else 0,
    0 if np.mean(y[clusters == 1]) > 0.5 else 1
)

print("Confusion Matrix:")
print(confusion_matrix(y, mapped_clusters))

print("\nClassification Report:")
print(classification_report(y, mapped_clusters))

Confusion Matrix:
[[8257    0]
 [6723    0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.55      1.00      0.71      8257
           1       0.00      0.00      0.00      6723

    accuracy                           0.55     14980
   macro avg       0.28      0.50      0.36     14980
weighted avg       0.30      0.55      0.39     14980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
