In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math as m
import mahotas as mt
import os
import pandas as pd
cv2.__version__

'3.4.1'

In [2]:
path = "C:/Users/suhas/Documents/Mini Project/Leaves/"

## Create Columns and DataFrame

In [3]:
columns = ['common name','red_mean', 'red_var', 'blue_mean', 'blue_var', 'green_mean', 'green_var', 'area', 'perimeter', 'length', 'width', 'aspect ratio', 'form factor', 'rectangularity', 'perimeter ratio of length and width', 'contrast', 'correlation', 'entropy', 'inverse difference moments']

In [4]:
columns

['common name',
 'red_mean',
 'red_var',
 'blue_mean',
 'blue_var',
 'green_mean',
 'green_var',
 'area',
 'perimeter',
 'length',
 'width',
 'aspect ratio',
 'form factor',
 'rectangularity',
 'perimeter ratio of length and width',
 'contrast',
 'correlation',
 'entropy',
 'inverse difference moments']

In [5]:
df = pd.DataFrame(columns=columns)

In [6]:
df

Unnamed: 0,common name,red_mean,red_var,blue_mean,blue_var,green_mean,green_var,area,perimeter,length,width,aspect ratio,form factor,rectangularity,perimeter ratio of length and width,contrast,correlation,entropy,inverse difference moments


## Create Label Function

In [7]:
def label(img_name):
    num = int(img_name[-8:-4])
    if num >= 1001 and num <= 1059:
        return 'pubescent bamboo'
    elif num >= 1060 and num <= 1122:
        return 'chinese horse chestnut'
    elif num >= 1552 and num <= 1616:
        return 'anhui barberry'
    elif num >= 1123 and num <= 1194:
        return 'chinese redbud'
    elif num >= 1195 and num <= 1267:
        return 'true indigo'
    elif num >= 1268 and num <= 1323:
        return 'japanese maple'
    elif num >= 1324 and num <= 1385:
        return 'nanmu'
    elif num >= 1386 and num <= 1437:
        return 'castor aralia'
    elif num >= 1497 and num <= 1551:
        return 'chinese cinnamon'
    elif num >= 1438 and num <= 1496:
        return 'goldenrain tree'
    elif num >= 2001 and num <= 2050:
        return 'big-fruited holly'
    elif num >= 2051 and num <= 2113:
        return 'japanese cheesewood'
    elif num >= 2114 and num <= 2165:
        return 'wintersweet'
    elif num >= 2166 and num <= 2230:
        return 'camphor tree'
    elif num >= 2231 and num <= 2290:
        return 'japan arrowwood'
    elif num >= 2291 and num <= 2346:
        return 'sweet osmanthus'
    elif num >= 2347 and num <= 2423:
        return 'deodar'
    elif num >= 2424 and num <= 2485:
        return 'gingko'
    elif num >= 2486 and num <= 2546:
        return 'crepe myrtle'
    elif num >= 2547 and num <= 2612:
        return 'oleander'
    elif num >= 2616 and num <= 2675:
        return 'yew plum pine'
    elif num >= 3001 and num <= 3055:
        return 'japanese flowering cherry'
    elif num >= 3056 and num <= 3110:
        return 'glossy privet'
    elif num >= 3111 and num <= 3175:
        return 'chinese toon'
    elif num >= 3176 and num <= 3229:
        return 'peach'
    elif num >= 3230 and num <= 3281:
        return 'ford woodlotus'
    elif num >= 3282 and num <= 3334:
        return 'trident maple'
    elif num >= 3335 and num <= 3389:
        return 'beales barberry'
    elif num >= 3390 and num <= 3446:
        return 'southern magnolia'
    elif num >= 3447 and num <= 3510:
        return 'canadian poplar'
    elif num >= 3511 and num <= 3563:
        return 'chinese tulip tree'
    elif num >= 3566 and num <= 3621:
        return 'tangerine'

## Iterating Over Images and extracting features

In [8]:
index = 0
for folder in os.scandir(path):
    if "leaf" in str(folder):
        for image in os.scandir(os.path.join(path, folder)):
            # Getting Name of Plant
            name = folder.name.split('_')[1]

            # Getting img
            img = cv2.imread(os.path.join(path, folder, image))

            # Getting Colour Based Features
            # Red
            red = img.copy()
            red[:, :, 0] = 0
            red[:, :, 1] = 0
            red_mean = np.mean(red[:, :, 2])
            red_var = np.std(red[:, :, 2])

            # Blue
            blue = img.copy()
            blue[:, :, 1] = 0
            blue[:, :, 2] = 0 
            blue_mean = np.mean(blue[:, :, 0])
            blue_var = np.std(blue[:, :, 0])

            # Green
            green = img.copy()
            green[:, :, 0] = 0
            green[:, :, 2] = 0
            green_mean = np.mean(green[:, :, 1])
            green_var = np.std(green[:, :, 1])

            # Pre-processing for remaining features
            imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            imgBlur = cv2.GaussianBlur(imgGray, (3, 3), 0)
            thresh, imgBW = cv2.threshold(imgBlur, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
            imgInv = cv2.bitwise_not(imgBW)
            kernel = np.ones((50, 50))
            imgClosed = cv2.morphologyEx(imgInv, cv2.MORPH_CLOSE, kernel)
            _, contours, _ = cv2.findContours(imgClosed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            l = 0
            for i in contours:
                if len(i) >l:
                    l = len(i)
                    cnt = i

            # Shape Based Features
            area = cv2.contourArea(cnt)
            per = cv2.arcLength(cnt, True)
            rect = cv2.minAreaRect(cnt)
            (x, y), (w, h), angle = rect

            # Morphological Features
            ar = float(h) / w
            ff = (4 * m.pi * area) / (m.pow(per, 2))
            rectangularity = (h * w) / area
            #nf = dia / h
            #pr = per / dia
            prlw = per / (w + h)

            # Texture Based Features
            textures = mt.features.haralick(imgGray)
            textures  = textures.mean(axis=0)
            contrast = textures[1]
            correlation = textures[2]
            entropy = textures[8]
            idf = textures[4]

            df = df.append({'common name': name, 'red_mean': red_mean, 'red_var': red_var, 'blue_mean': blue_mean, 'blue_var': blue_var, 'green_mean': green_mean, 'green_var': green_var, 'area': area, 'perimeter': per, 'length': h, 'width': w, 'aspect ratio': ar, 'form factor': ff, 'rectangularity':rectangularity, 'perimeter ratio of length and width': prlw, 'contrast': contrast, 'correlation': correlation, 'entropy': entropy, 'inverse difference moments': idf}, ignore_index = True)

In [23]:
df

Unnamed: 0,common name,red_mean,red_var,blue_mean,blue_var,green_mean,green_var,area,perimeter,length,width,aspect ratio,form factor,rectangularity,perimeter ratio of length and width,contrast,correlation,entropy,inverse difference moments
0,anhui barberry,234.469870,53.357817,230.050427,64.794238,242.292892,33.301246,247343.5,2581.716941,315.243317,1118.116455,0.281941,0.466330,1.425058,1.801165,10.269227,0.997198,1.913799,0.897709
1,anhui barberry,239.078922,54.041678,237.209074,59.977314,244.533445,35.623070,150157.5,2290.212327,1046.269165,215.477997,4.855573,0.359754,1.501410,1.815112,5.256504,0.998636,1.334322,0.934745
2,anhui barberry,218.290143,69.075408,214.253183,76.172502,221.822949,63.520733,416083.0,3027.253080,1318.957520,438.283661,3.009370,0.570548,1.389332,1.722731,5.071890,0.999424,3.169692,0.838952
3,anhui barberry,217.351257,67.043355,212.549161,74.406684,223.302094,57.402874,447874.0,3194.875445,483.000000,1385.000000,0.348736,0.551389,1.493623,1.710319,4.559479,0.999406,4.024736,0.798812
4,anhui barberry,218.144054,74.602190,211.533462,87.120435,232.043578,46.657105,370707.0,2981.713976,1299.985229,417.071838,3.116934,0.523973,1.462576,1.736526,5.480717,0.999228,2.863736,0.849442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3867,yew plum pie,234.242239,59.867682,233.146252,62.838043,239.387608,45.404740,200744.5,3622.616771,1659.696533,189.951294,8.737485,0.192224,1.570461,1.958544,13.831646,0.997414,1.959992,0.903182
3868,yew plum pie,238.842217,53.379723,238.281136,55.069947,243.006186,39.912744,157450.0,2872.634016,1333.122192,167.578979,7.955187,0.239768,1.418884,1.914195,9.128245,0.997812,1.500554,0.927156
3869,yew plum pie,239.346296,52.635050,238.858972,54.053523,243.508021,38.926842,152291.0,2967.340517,1336.829590,163.496201,8.176518,0.217345,1.435190,1.977797,9.435902,0.997643,1.471075,0.929787
3870,yew plum pie,238.184639,55.654106,237.471164,57.896858,242.454985,41.833093,156123.0,3173.535598,1484.108521,143.116180,10.369956,0.194800,1.360465,1.950275,9.475949,0.997927,1.532577,0.927440


In [26]:
df.to_csv('features.csv')