In [1]:
from PIL import Image
from PIL import ImageDraw
import timeit
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
from skimage.feature import greycomatrix, greycoprops
import matplotlib.image as mpimg 
from math import sqrt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import pandas as pd
from sklearn import preprocessing
import random
from sklearn.model_selection import train_test_split  
from sklearn.naive_bayes import GaussianNB  
from sklearn.metrics import accuracy_score

In [2]:
map_8bit_to_3bit = [i // 32 for i in range(256)]
model = {}
model_test = {}
neighbor = 3
time_modelling_start = timeit.default_timer()
model["Bacterial_spot"] = []
model["Early_blight"] = []
model["Late_blight"] = []
model["Leaf_Mold"] = []
model["Septoria_leaf_spot"] = []
model["Spider_mites Two-spotted_spider_mite"] = []
model["Target_Spot"] = []
model["Tomato_Yellow_Leaf_Curl_Virus"] = []
model["Tomato_mosaic_virus"] = []
model["healthy"] = []
model_test["Bacterial_spot"] = []
model_test["Early_blight"] = []
model_test["Late_blight"] = []
model_test["Leaf_Mold"] = []
model_test["Septoria_leaf_spot"] = []
model_test["Spider_mites Two-spotted_spider_mite"] = []
model_test["Target_Spot"] = []
model_test["Tomato_Yellow_Leaf_Curl_Virus"] = []
model_test["Tomato_mosaic_virus"] = []
model_test["healthy"] = []

In [3]:
#Pre Processing
def load_img(img_path):
    return Image.open(img_path).convert('L')

def get_img_size(img):
    return img.size

def print_img(img):
    plt.imshow(img)
    
def get_img_colors(img, sampling_count):
    img_width, img_height = get_img_size(img)
    
    sample_width = int(img_width / sampling_count)
    sample_height = int(img_height / sampling_count)
    
    half_sample_width = int(sample_width/2)
    half_sample_height = int(sample_height/2)

    img_colors = []

    width_constraint = img_width - (2 * sample_width)
    height_constraint = img_height - (2 * sample_height)
   
    row = 0
    for point_x in range(half_sample_width, img_width, sample_width):
        if(row == sampling_count):
            break
        column = 0
        for point_y in range(half_sample_height, img_height, sample_height):
            if(column == sampling_count):
                break
            img_colors.append(img.getpixel((point_x, point_y)))
                
            column += 1
                
        row += 1
            
    return img_colors

def construct_img(img_colors, img_dimension, sampling_count):
    img_new = Image.new('RGB', (img_dimension), (255, 255, 255))
    draw = ImageDraw.Draw(img_new)
    
    img_new_width = img_dimension[0]
    img_new_height = img_dimension[1]

    sample_width = int(img_new_width/sampling_count)
    sample_height = int(img_new_height/sampling_count)
    loop_count = 0

    img_colors_len = len(img_colors)

    for w in range(0, img_new_width, sample_width):
        for h in range(0, img_new_height, sample_height):
            if(loop_count == img_colors_len):
                break
            
            current_color = (img_colors[loop_count], img_colors[loop_count], img_colors[loop_count])
            draw.rectangle((w, h, w+sample_width, h+sample_height), fill=current_color)
            loop_count += 1
    
    return img_new

def get_3bit_colors(img_colors):
    loop_count = 0
    
    for img_color in img_colors:
        img_colors[loop_count] = map_8bit_to_3bit[img_color]
        
        loop_count += 1
        
    return img_colors

def get_img_matrix(img_colors):
    img_matrix = [] 
    loop_count = 0
    
    loop_count = 0
    img_square_dimension = int(math.sqrt(len(img_colors)))
    
    for row in range(img_square_dimension):
        temp_row = []
        for col in range(img_square_dimension):
            temp_row.append(img_colors[loop_count])
            
            loop_count += 1
        img_matrix.append(temp_row)
        
    return img_matrix

In [4]:
#Features Extraction
def feature_extract(img, sampling_count, class_name):
    glcm_component=[]
    img_colors = get_img_colors(img, sampling_count)
    img_3bit_colors = get_3bit_colors(img_colors)
    img_matrix = get_img_matrix(img_3bit_colors)
    glcm_matrix=greycomatrix(img_matrix, distances=[1], angles=[0], levels=12, symmetric=False, normed=False)
    component = ['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity']
    for x in component:
        glcm_component.append(greycoprops(glcm_matrix, x)[0][0])
    if(class_name == "tulis"):
        glcm_component.append(1)
    else:
        glcm_component.append(0)
    return(glcm_component)

model_test["Bacterial_spot"] = []
model_test["Early_blight"] = []
model_test["Late_blight"] = []
model_test["Leaf_Mold"] = []
model_test["Septoria_leaf_spot"] = []
model_test["Spider_mites Two-spotted_spider_mite"] = []
model_test["Target_Spot"] = []
model_test["Tomato_Yellow_Leaf_Curl_Virus"] = []
model_test["Tomato_mosaic_virus"] = []
model_test["healthy"] = []

In [5]:

#Modelling
def get_class_names(training_folder_path):
    return os.listdir(training_folder_path)

def make_a_model(class_name, features, learning_rate):
    random_splitter = random.uniform(0, 1)
    if(random_splitter <= learning_rate):
        model[class_name].append(features)
    else:
        model_test[class_name].append(features)
        
def to_dataframe(model):
    models = pd.DataFrame(model['Bacterial_spot'] + model['Early_blight']+model['Late_blight'] + model['Leaf_Mold'] +model['Septoria_leaf_spot'] +model['Spider_mites Two-spotted_spider_mite'] + model['Septoria_leaf_spot'] + model['Target_Spot'] + model['Tomato_Yellow_Leaf_Curl_Virus']+ model['Tomato_mosaic_virus'] + model['healthy'], columns=['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity', 'class'])
    return models

In [6]:

#Classification
def naive_bayes(image):
    prob = 1
    probability = 0.00
    for class_name in class_names:
        for i in range(5):
            a = (image[i]-mean[class_name][i])**2/(2*variance[class_name][i])
            b = 1/(np.sqrt(2*3.14*variance[class_name][i]))
            
            prob=prob * (b* np.exp(0-a))
        
        prob=1
        if(prob>probability):
            probability = prob
            probability_class = class_name
    return probability_class

In [7]:
#Validation
def validate(sampling_count):
    right = 0
    total = 0
    for class_name in class_names:
        validate_img_paths = glob.glob(validation_folder_path + class_name + '/' +img_type)
        for images in validate_img_paths:
            img = load_img(images)
            training_img_class_name = class_name
            image = feature_extract(img, sampling_count, training_img_class_name)
            predict = naive_bayes(image)
            class_image = class_name
            if(predict == class_name):
                   right+=1
            total +=1
        accuracy = (right/total) * 100
    print("Accuracy: " + str(accuracy) +"%")

In [8]:

#Training Testing

learning_rate = 1
sampling_count = 128
img_type = '*.JPG'
root_path = './'
training_folder_path = '../input/tomato/dataset/train/'
validation_folder_path = '../input/tomato/dataset/val/'
test_folder_path = '../input/testdata/test'

class_names = get_class_names(training_folder_path)
print(class_names)
for i in range(1):
    for class_name in class_names:
        training_img_paths = glob.glob(training_folder_path + class_name + '/' + img_type)
        #print(training_img_paths)
        for training_img_path in training_img_paths:
            training_img = load_img(training_img_path)
            training_img_class_name = class_name
            feature = feature_extract(training_img, sampling_count, training_img_class_name)
            make_a_model(class_name, feature, learning_rate)
        
    models = to_dataframe(model)
    models_test = to_dataframe(model_test)

#     testings()
# validate(128)



time_modelling_stop = timeit.default_timer()
print('Time elapsed to make this model is ' + str(time_modelling_stop - time_modelling_start) + ' seconds.')

['Late_blight', 'Tomato_Yellow_Leaf_Curl_Virus', 'Septoria_leaf_spot', 'Early_blight', 'Spider_mites Two-spotted_spider_mite', 'healthy', 'Bacterial_spot', 'Target_Spot', 'Tomato_mosaic_virus', 'Leaf_Mold']
Time elapsed to make this model is 378.8819686700001 seconds.


In [9]:
models

Unnamed: 0,contrast,homogeneity,energy,correlation,ASM,dissimilarity,class
0,0.400775,0.825710,0.366480,0.863886,0.134308,0.357222,0
1,0.417261,0.838210,0.412657,0.739594,0.170286,0.339136,0
2,0.504491,0.797317,0.354792,0.801992,0.125878,0.421690,0
3,0.399299,0.829407,0.384942,0.798917,0.148180,0.350824,0
4,0.366019,0.844278,0.402350,0.804889,0.161885,0.320497,0
...,...,...,...,...,...,...,...
10905,2.654097,0.533766,0.201736,0.500198,0.040697,1.205278,0
10906,2.277128,0.557487,0.216168,0.531242,0.046728,1.105746,0
10907,2.635950,0.555904,0.197364,0.531501,0.038952,1.160064,0
10908,2.089136,0.590748,0.210255,0.630201,0.044207,1.018393,0


In [10]:
x = models.iloc[:, [0, 5]].values  
y = models.iloc[:, 6].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 5)
sc = StandardScaler()  
x_train = sc.fit_transform(x_train)  
x_test = sc.transform(x_test)
classifier = GaussianNB()  
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy_score(y_test, y_pred)


1.0

In [11]:
x


array([[0.4007751 , 0.35722195],
       [0.41726132, 0.33913632],
       [0.50449065, 0.42169045],
       ...,
       [2.6359498 , 1.16006398],
       [2.08913632, 1.01839321],
       [1.98646654, 0.97982283]])