In [1]:
import torch
import numpy as np
import random

In [2]:
random.seed(5)

In [3]:
# Function to calculate distance between two points using euclidean distance
def euclidean_distance(point1,point2):
  p = len(point1)
  distance = 0
  for i in range(p):
    distance += (point1[i] - point2[i])**2
  return np.sqrt(distance)

In [4]:
# randomly assign each data point a cluster from 0..K
def initial_cluster(N,K):
  cluster = torch.zeros(N,dtype=int)
  for i in range(N):
    cluster[i] = random.randrange(0,K)
  return cluster

In [5]:
# Function to calculate centroid of each cluster returns a tensor of size (Kxp) : K number of classes and p is features
def calculate_centroid(K,p,data):
  c = initial_cluster(data.size(0),K)
  centroids = torch.zeros(K,p)
  for k in range(K):
    cluster_k = [i for i, val in enumerate(c) if val == k]
    for j in range(p):
      centroids[k,j] = sum(data[i,j] for i in cluster_k) / len(cluster_k) if len(cluster_k) != 0 else 1
  return centroids

In [23]:
# Function to assign cluster to each data point having min distance from the specific centroid
def assign_cluster(clusters,centroids,data,K):
  N = data.size(0)
  print(clusters)
  for i in range(N):
    distances = [euclidean_distance(data[i,:],centroids[j,:]) for j in range(K)]
    clusters[i] = np.argmin(distances)
  print(clusters)


In [26]:
def k_means_clustering(K,data):
  clusters = initial_cluster(data.size(0),K)

  prev_centroid = torch.zeros(K,data.size(1))
  centroid = calculate_centroid(K,data.size(1),data)

  while(torch.all(torch.abs(centroid - prev_centroid) > 0.0001)):
      assign_cluster(clusters,centroid,data,K)
      print(clusters)
      prev_centroid = centroid
      centroid = calculate_centroid(K,data.size(1),data)



In [30]:
data = torch.rand(6, 2)
k_means_clustering(3,data)


tensor([1, 1, 2, 2, 2, 1])
tensor([0, 0, 0, 1, 2, 1])
tensor([0, 0, 0, 1, 2, 1])
tensor([0, 0, 0, 1, 2, 1])
tensor([0, 2, 2, 0, 2, 0])
tensor([0, 2, 2, 0, 2, 0])
tensor([0, 2, 2, 0, 2, 0])
tensor([0, 2, 2, 1, 0, 1])
tensor([0, 2, 2, 1, 0, 1])
tensor([0, 2, 2, 1, 0, 1])
tensor([0, 0, 0, 1, 0, 1])
tensor([0, 0, 0, 1, 0, 1])
tensor([0, 0, 0, 1, 0, 1])
tensor([0, 2, 0, 1, 1, 1])
tensor([0, 2, 0, 1, 1, 1])
tensor([0, 2, 0, 1, 1, 1])
tensor([1, 2, 2, 0, 2, 0])
tensor([1, 2, 2, 0, 2, 0])
tensor([1, 2, 2, 0, 2, 0])
tensor([1, 2, 2, 0, 1, 0])
tensor([1, 2, 2, 0, 1, 0])
tensor([1, 2, 2, 0, 1, 0])
tensor([1, 0, 0, 2, 2, 1])
tensor([1, 0, 0, 2, 2, 1])
tensor([1, 0, 0, 2, 2, 1])
tensor([0, 2, 2, 1, 0, 1])
tensor([0, 2, 2, 1, 0, 1])
tensor([0, 2, 2, 1, 0, 1])
tensor([1, 2, 2, 1, 2, 1])
tensor([1, 2, 2, 1, 2, 1])
tensor([1, 2, 2, 1, 2, 1])
tensor([0, 1, 1, 2, 2, 2])
tensor([0, 1, 1, 2, 2, 2])
tensor([0, 1, 1, 2, 2, 2])
tensor([0, 1, 1, 0, 2, 0])
tensor([0, 1, 1, 0, 2, 0])
tensor([0, 1, 1, 0, 2, 0])
t

  return np.sqrt(distance)


In [None]:

euclidean_distance([0.2,0.9],[0.3,0.4])

np.float64(0.5099019513592785)