## Proof of concept - geometric features for clustering
By Oliveira, J. B (2022...)

Consider a control image with a number of white circles, with different radius

<img src='fig_control_5.jpg'>

In [1]:
import cv2 as cv
import numpy as np

In [2]:
bgr = cv.imread('fig_control_5.jpg') #also por fig_control_1.jpg 
print('Dim:' + str(bgr.shape))
cv.imshow('bgr original img', bgr)
cv.waitKey(0)  
cv.destroyAllWindows() 
#bgr = cv.resize(bgr, (780, 1040)) #restrict area cols, lin

Dim:(800, 800, 3)


In [3]:
#desaturation - convert bgr to gray for limiarization
gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY)

In [4]:
#auto otsu
#can be applied to uni e bimodal histograms
# segmentation based on shape, pixels (color), histogram
# binary segmentation - binarization, thresholding

ret,th = cv.threshold(gray,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
print(ret)
cv.imshow('segmented img', th)
cv.waitKey(0)  
cv.destroyAllWindows()

153.0


In [13]:
# sum all white pixels (integral)
n_pixels_white = 0
for i in range(th.shape[0]):
    for j in range(th.shape[1]):
        if th[i,j] == 255:
            n_pixels_white += 1
            
total_area = th.shape[0]*th.shape[1]
interest_area = n_pixels_white/total_area
print('Interest area: ' + str(interest_area*100) + ' %')

Interest area: 6.426875 %


In [5]:
#here the objects areas are convex (closed)

contours, hierarchy = cv.findContours(th, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)
print (str(len(contours)))
print(type(contours))
cv.drawContours(bgr, contours, -1,(0,242,255), 2)
cv.imshow('contours', bgr)
cv.waitKey(0)
cv.destroyAllWindows()


491
<class 'list'>


In [7]:
#https://docs.opencv.org/3.4/dd/d49/tutorial_py_contour_features.html
areas = []
perimeters = []
centroids_x = []
centroids_y = []
for c in contours:
    M = cv.moments(c)
    areas.append(M['m00'])
    perimeters.append(cv.arcLength(c,True)) #if not convex, False
    if M['m00'] != 0:
        centroids_x.append(M['m10']/M['m00'])
        centroids_y.append(M['m01']/M['m00'])
    else:
        centroids_x.append(0)
        centroids_y.append(0)
        
print(len(areas))
print(len(perimeters))
#print(areas) outliers


491
491


In [14]:
features = [areas, perimeters]

interest_area_contours = sum(areas)/total_area
print(interest_area_contours)

import pandas as pd
d = {'area': areas, 'perimiter': perimeters}
#d = {'centroid_x': centroids_x, 'centroid_y': centroids_y, 'area': areas, 'perimiter': perimeters}
df = pd.DataFrame(data=d)
df
print(df.describe())

0.05844921875
              area   perimiter
count   491.000000  491.000000
mean     76.186354   14.975566
std     329.084536   30.107472
min       0.000000    2.000000
25%       1.000000    4.000000
50%       2.000000    5.656854
75%       8.000000   10.828427
max    2348.500000  188.509666


In [9]:
from sklearn.cluster import KMeans
import numpy as np
kmeans = KMeans(n_clusters=4)
kmeans.fit(df) # Metodo fit



KMeans(n_clusters=4)

In [11]:
conts = [[],[],[],[]] #contour list

for cont, cnt in enumerate(contours):
    class_ = kmeans.predict([df.iloc[cont]])[0]
    conts[class_].append(cnt)
    
im = cv.drawContours(bgr, conts[0], -1, (0, 0, 0), thickness=2)
im = cv.drawContours(im, conts[1], -1, (20, 150, 232), thickness=2)
im = cv.drawContours(im, conts[2], -1, (82, 184, 10), thickness=2)
im = cv.drawContours(im, conts[3], -1, (0, 262, 255), thickness=2)

cv.imwrite('clustered_fig_control_5.jpg',im)
            
print('class 0: ' + str(len(conts[0])) + ' individuals')
print('class 1: ' + str(len(conts[1])) + ' individuals')
print('class 2: ' + str(len(conts[2])) + ' individuals')
print('class 3: ' + str(len(conts[3])) + ' individuals')

cv.imshow('clustered image', im)
cv.waitKey(0)
cv.destroyAllWindows()

class 0: 453 individuals
class 1: 10 individuals
class 2: 10 individuals
class 3: 18 individuals


In [17]:
!pip install fuzzy-c-means
from fcmeans import FCM
fcm = FCM(n_clusters=4)
X = df.to_numpy()
fcm.fit(X)





## In this case, fuzzy c-means gives the same result, since is a toy problem

In [28]:
conts = [[],[],[],[]] #contour list

for cont, cnt in enumerate(contours):
    
    r = df.iloc[cont].to_numpy()
    class_ = fcm.predict(r)
    #print(class_)
    conts[class_[0]].append(cnt)
    
im = cv.drawContours(bgr, conts[0], -1, (0, 0, 0), thickness=2)
im = cv.drawContours(im, conts[1], -1, (20, 150, 232), thickness=2)
im = cv.drawContours(im, conts[2], -1, (82, 184, 10), thickness=2)
im = cv.drawContours(im, conts[3], -1, (0, 262, 255), thickness=2)

cv.imwrite('fuzzy_clustered_fig_control_5.jpg',im)
            
print('class 0: ' + str(len(conts[0])) + ' individuals')
print('class 1: ' + str(len(conts[1])) + ' individuals')
print('class 2: ' + str(len(conts[2])) + ' individuals')
print('class 3: ' + str(len(conts[3])) + ' individuals')

cv.imshow('fuzzy clustered image', im)
cv.waitKey(0)
cv.destroyAllWindows()

class 0: 10 individuals
class 1: 453 individuals
class 2: 18 individuals
class 3: 10 individuals
