## Proof of concept - geometric features for clustering
By Oliveira, J. B (2022...)

Consider a control image with a number of white circles, with different radius

In [76]:
import cv2 as cv
import numpy as np

In [141]:
bgr = cv.imread('fig_control_1.jpg') #if male were more white than female, ok
print('Dim:' + str(bgr.shape))
cv.imshow('bgr original img', bgr)
cv.waitKey(0)  
cv.destroyAllWindows() 
#bgr = cv.resize(bgr, (780, 1040)) #restrict area cols, lin

Dim:(800, 800, 3)


In [142]:
#desaturation - convert bgr to gray
gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY)

In [79]:
cv.imshow('gray img', gray)
cv.waitKey(0)  
cv.destroyAllWindows() 

In [143]:
#auto otsu
#can be applied to uni e bimodal histograms
# segmentation based on shape, pixels (color), histogram
# binary segmentation - binarization, thresholding

ret,th = cv.threshold(gray,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
print(ret)
cv.imshow('segmented img', th)
cv.waitKey(0)  
cv.destroyAllWindows()

152.0


In [39]:
# sum all white pixels (integral)
n_pixels_white = 0
for i in range(th.shape[0]):
    for j in range(th.shape[1]):
        if th[i,j] == 255:
            n_pixels_white += 1
            
total_area = th.shape[0]*th.shape[1]
interest_area = n_pixels_white/total_area
print('Interest area: ' + str(interest_area*100) + ' %')

Interest area: 6.426875 %


In [144]:
#here the objects areas are convex (closed)

contours, hierarchy = cv.findContours(th, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)
print (str(len(contours)))
print(type(contours))
cv.drawContours(bgr, contours, -1,(0,242,255), 2)
cv.imshow('contours', bgr)
cv.waitKey(0)
cv.destroyAllWindows()


487
<class 'list'>


In [145]:
#https://docs.opencv.org/3.4/dd/d49/tutorial_py_contour_features.html
areas = []
perimeters = []
centroids_x = []
centroids_y = []
for c in contours:
    M = cv.moments(c)
    areas.append(M['m00'])
    perimeters.append(cv.arcLength(c,True)) #if not convex, False
    if M['m00'] != 0:
        centroids_x.append(M['m10']/M['m00'])
        centroids_y.append(M['m01']/M['m00'])
    else:
        centroids_x.append(0)
        centroids_y.append(0)
        
print(len(areas))
print(len(perimeters))
#print(areas) outliers


487
487


In [146]:
features = [areas, perimeters]

import pandas as pd
d = {'area': areas, 'perimiter': perimeters}
#d = {'centroid_x': centroids_x, 'centroid_y': centroids_y, 'area': areas, 'perimiter': perimeters}
df = pd.DataFrame(data=d)
df
print(df.describe())

              area   perimiter
count   487.000000  487.000000
mean     47.635524   12.594081
std     230.155539   23.205022
min       1.000000    4.000000
25%       1.000000    4.000000
50%       2.000000    6.000000
75%       7.000000   10.242641
max    2349.500000  188.509666


In [147]:
from sklearn.cluster import KMeans
import numpy as np
kmeans = KMeans(n_clusters=4)
kmeans.fit(df) # Metodo fit



KMeans(n_clusters=4)

In [148]:
conts = [[],[],[],[]] #contour list

for cont, cnt in enumerate(contours):
    class_ = kmeans.predict([df.iloc[cont]])[0]
    conts[class_].append(cnt)
    
im = cv.drawContours(bgr, conts[0], -1, (0, 0, 0), thickness=2)
im = cv.drawContours(im, conts[1], -1, (20, 150, 232), thickness=2)
im = cv.drawContours(im, conts[2], -1, (82, 184, 10), thickness=2)
im = cv.drawContours(im, conts[3], -1, (0, 262, 255), thickness=2)

print('class 0: ' + str(len(conts[0])) + ' individuals')
print('class 1: ' + str(len(conts[1])) + ' individuals')
print('class 2: ' + str(len(conts[2])) + ' individuals')
print('class 3: ' + str(len(conts[3])) + ' individuals')

cv.imshow('clustered image', im)
cv.waitKey(0)
cv.destroyAllWindows()

class 0: 457 individuals
class 1: 5 individuals
class 2: 7 individuals
class 3: 18 individuals
