In [175]:
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sb
import torch
import matplotlib.pyplot as plt
sb.set()
from sklearn.cluster import KMeans
from math import atan

In [176]:
# training the torch model on the picture
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model.conf = 0.4
img = "img.jpg"
result = model(img)

Using cache found in /Users/chinmay/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-10-1 Python-3.10.5 torch-1.12.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [177]:
#creating a dataframe from the coordinates of the detected people

df = pd.DataFrame(columns=['X', 'Y'])
for x in result.xyxyn[0].numpy():
    if x[-1] == 0:
        xmid = ((x[0] + x[2]) / 2).round(2)
        ymin = -x[1]
        df.loc[len(df.index)] = [xmid, ymin]
df

Unnamed: 0,X,Y
0,0.61,-0.090383
1,0.93,-0.141544
2,0.63,-0.408361
3,0.13,-0.079438
4,0.55,-0.288391
5,0.9,-0.402458
6,0.07,-0.254354
7,0.17,-0.200887
8,0.15,-0.293824
9,0.32,-0.29422


In [None]:
if len(df.index) == 0:
    print("There are no clusters in the room")
    actual_clusters = 0

In [178]:
# use matplotlib's GUI intrface
matplotlib.use("TkAgg")

In [179]:
# Visualize the Data
f, axes = plt.subplots(1, 1, figsize=(16,8))
plt.scatter(x = "X", y = "Y", data = df)
plt.show()

In [180]:
# function to find the slope of a curve formed by points in the LSS-cluster graph
def findSlope(original_list):
    new_list = []
    for p in range(len(original_list) - 1):
        ss1 = original_list[p]
        ss2 = original_list[p + 1]
        slope = ss1-ss2
        new_list.append(round(atan(slope)*180/3.14, 4))
    return new_list

# find the array of differences of consecutive elements in a given array
def difference(original_array):
    new_array = []
    for p in range(len(original_array) - 1):
        element1 = original_array[p]
        element2 = original_array[p + 1]
        slope = element1 - element2
        new_array.append(round(slope, 4))
    new_array.append(original_array[-1])
    return new_array

In [181]:
# Possible number of clusters
min_clust = 1
max_clust = len(df.index)

# Compute Within Cluster Sum of Squares
within_ss = []

for num_clust in range(min_clust, max_clust + 1):
    kmeans = KMeans(n_clusters=num_clust)
    kmeans.fit(df)
    within_ss.append(kmeans.inertia_)

slopes = findSlope(within_ss)
slopes = difference(slopes)

#setting default value of the number of clusters
actual_clusters = 1

for i in range(len(slopes)):
    if slopes[i] < slopes[0]/10:
        actual_clusters = i+1
        break


# Plot Within SS vs Number of Clusters
f, axes = plt.subplots(1, 1, figsize=(16, 4))
plt.plot(range(min_clust, max_clust + 1), within_ss)
plt.xlabel('Number of Clusters')
plt.ylabel('Within Cluster Sum of Squares')
plt.xticks(np.arange(min_clust, max_clust + 1, 1.0))
plt.grid(which='major', axis='y')
plt.show()

In [182]:
num_clust = actual_clusters

# Create Clustering Model using KMeans
kmeans = KMeans(n_clusters = num_clust)

# Fit the Clustering Model on the Data
kmeans.fit(df)

# Print the Cluster Centers
for i, center in enumerate(kmeans.cluster_centers_):
    print("Cluster", i, end=":\t")
    for coord in center:
        print(round(coord, 2), end="\t")
    print()
print()

# Print the Within Cluster Sum of Squares
print("Within Cluster Sum of Squares :", kmeans.inertia_, end="\n")

# Predict the Cluster Labels
labels = kmeans.predict(df)

# Append Labels to the Data
df_labeled = df.copy()
df_labeled["Cluster"] = pd.Categorical(labels)

# Summary of the Cluster Labels
sb.countplot(x=df_labeled["Cluster"])
plt.show()

Cluster 0:	0.92	-0.27	
Cluster 1:	0.17	-0.22	
Cluster 2:	0.6	-0.26	

Within Cluster Sum of Squares : 0.15616298057435996


In [183]:
num_clust = actual_clusters

# Create Clustering Model using KMeans
kmeans = KMeans(n_clusters=num_clust)

# Fit the Clustering Model on the Data
kmeans.fit(df)

# Print the Cluster Centers
for i, center in enumerate(kmeans.cluster_centers_):
    print("Cluster", i, end=":\t")
    for coord in center:
        print(round(coord, 2), end="\t")
    print()
print()

# Predict the Cluster Labels
labels = kmeans.predict(df)

# Append Labels to the Data
df_labeled = df.copy()
df_labeled["Cluster"] = pd.Categorical(labels)

# Summary of the Cluster Labels
sb.countplot(x=df_labeled["Cluster"])
plt.show()

Cluster 0:	0.6	-0.26	
Cluster 1:	0.17	-0.22	
Cluster 2:	0.92	-0.27	



In [184]:
# Visualize the Clusters in the Data
f, axes = plt.subplots(1, 1, figsize=(16,8))
plt.scatter(x = "X", y = "Y", c ="Cluster", cmap = 'viridis', data = df_labeled)
plt.show()