In [8]:
data = {
    "A": (-4, -3),
    "B": (6, 5),
    "C": (1, -7),
    "D": (-4, -6),
    "E": (4, 6),
    "F": (-1, 5),
    "G": (-3, 0),
    "H": (3, 0),
}

In [17]:
# write k=2 k-means clustering algorithm
# 1. select k points as centroids = G and H
# 2. assign each point to the closest centroid
# 3. compute the new centroids by taking the average of all points assigned to that centroid
# 3.5. print the new centroids and the clusters
# 4. repeat steps 2 and 3 until the centroids don't change

def distance(a, b):
    # euclidean distance
    return ((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) ** .5

def average(points):
    # average of a list of points
    return (
        sum([point[0] for point in points]) / len(points),
        sum([point[1] for point in points]) / len(points),
    )

def kmeans(data, k=2):

    # 1. select k points as centroids = G and H
    centroids = {
        "C1": data["G"],
        "C2": data["H"],
    }

    # 4. repeat steps 2 and 3 until the centroids don't change
    while True:

        # 2. assign each point to the closest centroid
        clusters = {
            "C1": [],
            "C2": [],
        }
        for point in data:
            distances = {}
            for centroid in centroids:
                distances[centroid] = distance(data[point], centroids[centroid])
                # print(f"Distance {point} to {centroid} = {distances[centroid]}")
            closest_centroid = min(distances, key=distances.get)
            clusters[closest_centroid].append(point)

        # 3. compute the new centroids by taking the average of all points assigned to that centroid
        new_centroids = {}
        for centroid in centroids:
            new_centroids[centroid] = average([data[point] for point in clusters[centroid]])

        # 3.5. print the new centroids and the clusters
        print("New centroids:", new_centroids)
        print("Clusters:", clusters)

        # 4. repeat steps 2 and 3 until the centroids don't change
        if centroids == new_centroids:
            break
        centroids = new_centroids

    return clusters


In [18]:
kmeans(data)

New centroids: {'C1': (-3.0, -1.0), 'C2': (3.5, 1.0)}
Clusters: {'C1': ['A', 'D', 'F', 'G'], 'C2': ['B', 'C', 'E', 'H']}
New centroids: {'C1': (-2.5, -4.0), 'C2': (3.0, 4.0)}
Clusters: {'C1': ['A', 'C', 'D', 'G'], 'C2': ['B', 'E', 'F', 'H']}
New centroids: {'C1': (-2.5, -4.0), 'C2': (3.0, 4.0)}
Clusters: {'C1': ['A', 'C', 'D', 'G'], 'C2': ['B', 'E', 'F', 'H']}


{'C1': ['A', 'C', 'D', 'G'], 'C2': ['B', 'E', 'F', 'H']}

In [13]:
import numpy as np

d1 = (130, 84)
d2 = (-0.81, 0.59)

print(np.dot(d1, d2))

-55.740000000000016


In [16]:
print( -55.74 * 0.59)


-32.8866
