In [1]:
import pandas as pd
from tqdm import tqdm 
from glob import glob
from skimage.io import imread
from skimage.measure import label, regionprops_table

In [2]:
def extract_props_from_image(mask):
    
    label_image = label(mask)

    feature_names = ['bbox_area',
                     'solidity',
                     'equivalent_diameter', 
                     'orientation',
                     'convex_area', 
                     'area',
                     'extent',
                     'eccentricity',
                     'major_axis_length',
                     'feret_diameter_max',
                     'perimeter',
                     'minor_axis_length']
    
    return pd.DataFrame(regionprops_table(label_image, properties=feature_names))

In [8]:
out = pd.DataFrame()

for first_mask in tqdm(glob("../auto_selected_masks/*")):
    
    try:
        mask = imread(first_mask)
        
        props = extract_props_from_image(mask)
        props['label'] = first_mask.split('/')[-1].split('.')[0]
   
        out = pd.concat([out, props], ignore_index=True)
    
    except Exception as e:
        print("Erro na imagem:", e, end="\n")

100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 201.40it/s]


In [10]:
out.head()

Unnamed: 0,bbox_area,solidity,equivalent_diameter,orientation,convex_area,area,extent,eccentricity,major_axis_length,feret_diameter_max,perimeter,minor_axis_length,label
0,9855,0.775386,83.492434,0.339413,7061,5475,0.555556,0.902083,135.14763,146.931957,392.149278,58.324599,27
1,18081,0.918288,107.124731,-0.663402,9815,9013,0.498479,0.879804,162.47383,171.679935,441.730014,77.229835,21
2,6300,0.812136,69.200101,0.074987,4631,3761,0.596984,0.826929,94.766761,101.271911,294.308658,53.288002,18
3,14592,0.919951,92.836192,-0.754742,7358,6769,0.463884,0.916614,152.469823,162.742127,394.759451,60.953391,24
4,9483,0.766846,78.566312,-1.002721,6322,4848,0.511231,0.733222,102.017098,122.44999,357.462987,69.370589,19


In [11]:
out.describe()

Unnamed: 0,bbox_area,solidity,equivalent_diameter,orientation,convex_area,area,extent,eccentricity,major_axis_length,feret_diameter_max,perimeter,minor_axis_length
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,9117.88,0.860694,79.695448,-0.218083,5958.92,5117.92,0.579931,0.798561,112.925522,120.96229,333.115939,62.786426
std,3410.654352,0.076187,13.109065,0.616587,1853.434041,1683.267827,0.094759,0.115948,25.34139,26.679512,62.175927,9.634617
min,3528.0,0.641276,57.602624,-1.300481,2737.0,2606.0,0.38307,0.479739,63.129719,65.924199,199.681241,42.62635
25%,6600.0,0.814311,71.729778,-0.719947,4651.0,4041.0,0.506125,0.733222,94.766761,101.271911,282.936075,58.324599
50%,9306.0,0.888548,78.566312,-0.078695,5737.0,4848.0,0.581904,0.842113,111.689962,120.673112,345.806133,61.162023
75%,10080.0,0.918288,89.370194,0.111687,7061.0,6273.0,0.64594,0.878433,135.14763,144.100659,387.019336,71.406195
max,18081.0,0.952137,107.124731,1.306549,9815.0,9013.0,0.738662,0.916614,162.47383,171.679935,441.730014,79.137288


In [6]:
out.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   bbox_area            25 non-null     int64  
 1   solidity             25 non-null     float64
 2   equivalent_diameter  25 non-null     float64
 3   orientation          25 non-null     float64
 4   convex_area          25 non-null     int64  
 5   area                 25 non-null     int64  
 6   extent               25 non-null     float64
 7   eccentricity         25 non-null     float64
 8   major_axis_length    25 non-null     float64
 9   feret_diameter_max   25 non-null     float64
 10  perimeter            25 non-null     float64
 11  minor_axis_length    25 non-null     float64
 12  label                25 non-null     object 
dtypes: float64(9), int64(3), object(1)
memory usage: 2.7+ KB


In [12]:
out.to_csv('tmp/1_feature_extraction.csv', index=None)