In [9]:
import os
import cv2
import numpy as np
import pandas as pd
import mahotas as mt
from matplotlib import pyplot as plt
%matplotlib inline

In [14]:
ds_path = "C:/Users/KIIT/Desktop/Leaf_disease_detection/PlantVillage/plant_folder/Pepper__bell___Bacterial_spot"
img_files = os.listdir(ds_path)

In [15]:
def create_dataset():
    names = ['mean_r','mean_g','mean_b','stddev_r','stddev_g','stddev_b', \
             'contrast','correlation','inverse_difference_moments','entropy'
            ]
    df = pd.DataFrame([], columns=names)
    for file in img_files:
        imgpath = ds_path + "\\" + file
        main_img = cv2.imread(imgpath)
        
        #Preprocessing
        img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
        gs = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        blur = cv2.GaussianBlur(gs, (25,25),0)
        ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        kernel = np.ones((50,50),np.uint8)
        closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
        
        #Color features
        red_channel = img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        blue_channel[blue_channel == 255] = 0
        green_channel[green_channel == 255] = 0
        red_channel[red_channel == 255] = 0
        
        red_mean = np.mean(red_channel)
        green_mean = np.mean(green_channel)
        blue_mean = np.mean(blue_channel)
        
        red_std = np.std(red_channel)
        green_std = np.std(green_channel)
        blue_std = np.std(blue_channel)
        
        #Texture features
        textures = mt.features.haralick(gs)
        ht_mean = textures.mean(axis=0)
        contrast = ht_mean[1]
        correlation = ht_mean[2]
        inverse_diff_moments = ht_mean[4]
        entropy = ht_mean[8]
        
        vector = [red_mean,green_mean,blue_mean,red_std,green_std,blue_std,\
                  contrast,correlation,inverse_diff_moments,entropy
                 ]
        
        df_temp = pd.DataFrame([vector],columns=names)
        df = df.append(df_temp)
        print(file)
    return df

In [16]:
dataset = create_dataset()

0022d6b7-d47c-4ee2-ae9a-392a53f48647___JR_B.Spot 8964.JPG
006adb74-934f-448f-a14f-62181742127b___JR_B.Spot 3395.JPG
00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132.JPG
01613cd0-d3cd-4e96-945c-a312002037bf___JR_B.Spot 3262.JPG
0169b9ac-07b9-4be1-8b85-da94481f05a4___NREC_B.Spot 9169.JPG
018e494e-d2eb-468b-9d02-40219d9f4921___JR_B.Spot 9045.JPG
01940b6d-7dea-4889-a7b8-a35f4e9bba34___NREC_B.Spot 9120.JPG
01dfb88b-cd5a-420c-b163-51f5fe07b74d___JR_B.Spot 9091.JPG
01ebc916-4793-40a3-b5e4-a32687e4fa3d___NREC_B.Spot 9125.JPG
024623ab-be81-4d99-a653-c3be1495ce8e___JR_B.Spot 8889.JPG
02baf62e-11e2-4dde-97fb-e369b57d55d3___JR_B.Spot 8971.JPG
032aa7c4-0a95-4f09-b0bb-1242f4f32d82___NREC_B.Spot 1889.JPG
03bb7042-3fd5-42e1-aa2c-d51d6843704c___JR_B.Spot 8925.JPG
0448a226-3d3a-494d-a026-a92b048689c7___JR_B.Spot 8897.JPG
0495b4d4-66a4-438c-862d-c483f08e4a95___JR_B.Spot 3288.JPG
04d46cfb-9cc8-4083-82af-ca2bb57c8182___NREC_B.Spot 1814.JPG
05287bcb-610b-440f-9337-c4ce98bc3bbe___JR_B.Spot 3327.JPG
0558

In [22]:
dataset.shape

(997, 10)

In [23]:
type(dataset)

pandas.core.frame.DataFrame

In [24]:
dataset.to_csv("plant_village.csv")