In [1]:
import math
import random
def calculate_euclidean_distance(point1, point2):
    squared_distance = sum([(p1 - p2) ** 2 for p1, p2 in zip(point1, point2)])
    return math.sqrt(squared_distance)

def assign_data_to_clusters(data, centroids):
    clusters = [[] for _ in range(len(centroids))]

    for point in data:
        distances = [calculate_euclidean_distance(point, centroid) for centroid in centroids]
        min_distance = min(distances)
        cluster_index = distances.index(min_distance)
        clusters[cluster_index].append(point)

    return clusters

def update_centroids(clusters):
    new_centroids = []

    for cluster in clusters:
        cluster_size = len(cluster)
        if cluster_size > 0:
            centroid = [sum(dim) / cluster_size for dim in zip(*cluster)]
            new_centroids.append(centroid)

    return new_centroids

def k_means_clustering(data, k, max_iterations=100):
    # Initialize centroids as the first k data points
    centroids = random.sample(data,k)

    for _ in range(max_iterations):
        new_centroids = []
        clusters = assign_data_to_clusters(data, centroids)

        for cluster in clusters:
            centroid = [sum(dim) / len(cluster) for dim in zip(*cluster)]
            new_centroids.append(centroid)

        if new_centroids == centroids:
            break

        centroids = new_centroids

    return clusters, centroids

# Data points
data = [
    [1.713, 1.586], 
    [0.180, 1.786], 
    [0.353, 1.240], 
    [0.940, 1.566],
    [1.486, 0.759], 
    [1.266, 1.106], 
    [1.540, 0.419], 
    [0.459, 1.799], 
    [0.773, 0.186]
]
k = 3

clusters, centroids = k_means_clustering(data, k)

print("Clustered Data:")
for i, cluster in enumerate(clusters):
    print("Cluster", i)
    print("Points:", cluster)
    print("=" * 20)

print("Centroids:")
for i, centroid in enumerate(centroids):
    print("Centroid", i, ":", centroid)

# Determine cluster for a new point (VAR1=0.906, VAR2=0.606)
new_point = [0.906, 0.606]
distances = [calculate_euclidean_distance(new_point, centroid) for centroid in centroids]
closest_cluster_index = distances.index(min(distances))
print("New point belongs to Cluster:", closest_cluster_index)

# Update the cluster and centroid after adding the new point
clusters[closest_cluster_index].append(new_point)
centroids = update_centroids(clusters)

print("Updated Clustered Data:")
for i, cluster in enumerate(clusters):
    print("Cluster", i)
    print("Points:", cluster)
    print("=" * 20)

print("Updated Centroids:")
for i, centroid in enumerate(centroids):
    print("Centroid", i, ":", centroid)

Clustered Data:
Cluster 0
Points: [[1.486, 0.759], [1.54, 0.419], [0.773, 0.186]]
Cluster 1
Points: [[1.713, 1.586], [0.94, 1.566], [1.266, 1.106]]
Cluster 2
Points: [[0.18, 1.786], [0.353, 1.24], [0.459, 1.799]]
Centroids:
Centroid 0 : [1.2663333333333333, 0.4546666666666666]
Centroid 1 : [1.3063333333333333, 1.4193333333333333]
Centroid 2 : [0.33066666666666666, 1.6083333333333332]
New point belongs to Cluster: 0
Updated Clustered Data:
Cluster 0
Points: [[1.486, 0.759], [1.54, 0.419], [0.773, 0.186], [0.906, 0.606]]
Cluster 1
Points: [[1.713, 1.586], [0.94, 1.566], [1.266, 1.106]]
Cluster 2
Points: [[0.18, 1.786], [0.353, 1.24], [0.459, 1.799]]
Updated Centroids:
Centroid 0 : [1.17625, 0.49249999999999994]
Centroid 1 : [1.3063333333333333, 1.4193333333333333]
Centroid 2 : [0.33066666666666666, 1.6083333333333332]
