In [None]:
########################################## IMPORTING LIBRARIES ################################################

import os
import cv2
import numpy as np
import pandas as pd
import mahotas as mt
from matplotlib import pyplot as plt
import string
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
%matplotlib inline

In [None]:
ds_path = "/home/khanna/Desktop/54_ML_in_agricultural_domain/Dataset/"
img_files = os.listdir(ds_path)
vector_1= None

In [None]:
##################################### FOR PREPROCESSING ########################################################

def create_dataset():
    names = ['area','perimeter','physiological_length','physiological_width',\
             'aspect_ratio','rectangularity','circularity', \
             'mean_r','mean_g','mean_b','stddev_r','stddev_g','stddev_b', \
             'contrast','correlation','inverse_difference_moments','entropy'
            ]
    df = pd.DataFrame([], columns=names)
    for i in range(1,600):
        imgpath = ds_path + "/" +str(i)+".jpg"
        main_img = cv2.imread(imgpath)
        if(main_img is not None):
            img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)         
            gs = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)

            blur = cv2.GaussianBlur(gs, (25,25),0)
            ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

            kernel = np.ones((50,50),np.uint8)
            closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)

            #Shape features
            contours,hierarchy = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
            cnt = contours[0]
            M = cv2.moments(cnt)
            area = cv2.contourArea(cnt)
            perimeter = cv2.arcLength(cnt,True)
            x,y,w,h = cv2.boundingRect(cnt)
            aspect_ratio = float(w)/h
            if(area):
                rectangularity = w*h/area
                circularity = ((perimeter)**2)/area

            #Color features
            red_channel = img[:,:,0]
            green_channel = img[:,:,1]
            blue_channel = img[:,:,2]
            blue_channel[blue_channel == 255] = 0
            green_channel[green_channel == 255] = 0
            red_channel[red_channel == 255] = 0

            red_mean = np.mean(red_channel)
            green_mean = np.mean(green_channel)
            blue_mean = np.mean(blue_channel)

            red_std = np.std(red_channel)
            green_std = np.std(green_channel)
            blue_std = np.std(blue_channel)

            #Texture features
            #used to find textures in the pictures
            textures = mt.features.haralick(gs)
            ht_mean = textures.mean(axis=0)
            contrast = ht_mean[1]
            correlation = ht_mean[2]
            inverse_diff_moments = ht_mean[4]
            entropy = ht_mean[8]

            vector = [area,perimeter,w,h,aspect_ratio,rectangularity,circularity,\
                      red_mean,green_mean,blue_mean,red_std,green_std,blue_std,\
                      contrast,correlation,inverse_diff_moments,entropy
                     ]

            df_temp = pd.DataFrame([vector],columns=names)
            df = df.append(df_temp)
    return df

#____________________________________________________________________________________________________________

def vector_calc():
    f=input()
    imgpath = ds_path + "/"+str(f)+".jpg"
    print(imgpath)
    main_img = cv2.imread(imgpath)
#     print("1")
    if(main_img is not None):
        img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB) 
        gs = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)  
        blur = cv2.GaussianBlur(gs, (25,25),0)
        ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        kernel = np.ones((50,50),np.uint8)
        closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
        #Shape features
        contours,hierarchy = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        cnt = contours[0]
        M = cv2.moments(cnt)
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt,True)
        x,y,w,h = cv2.boundingRect(cnt)
        aspect_ratio = float(w)/h
#         print("2")
        if(area):
#             print("3")
            rectangularity = w*h/area
            circularity = ((perimeter)**2)/area
        #Color features
#         print("4")
        red_channel = img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        blue_channel[blue_channel == 255] = 0
        green_channel[green_channel == 255] = 0
        red_channel[red_channel == 255] = 0
        red_mean = np.mean(red_channel)
        green_mean = np.mean(green_channel)
        blue_mean = np.mean(blue_channel)
        red_std = np.std(red_channel)
        green_std = np.std(green_channel)
        blue_std = np.std(blue_channel)
        #Texture features
        #used to find textures in the pictures
        textures = mt.features.haralick(gs)
        ht_mean = textures.mean(axis=0)
        contrast = ht_mean[1]
        correlation = ht_mean[2]
        inverse_diff_moments = ht_mean[4]
        entropy = ht_mean[8]
        global vector_1
        vector_1 = [area,perimeter,w,h,aspect_ratio,rectangularity,circularity,\
                  red_mean,green_mean,blue_mean,red_std,green_std,blue_std,\
                  contrast,correlation,inverse_diff_moments,entropy
                 ]
        



In [None]:
# dataset = create_dataset()
# dataset.to_csv("train_features.csv")


In [None]:
vector_calc()
print(vector_1)

In [None]:
############################################## DATASET PREPARATION #############################################

dataset = pd.read_csv("trained_features.csv")
dataset1 = pd.read_csv("bp_train_features.csv")
dataset2 = pd.read_csv("p_train_features.csv")
dataset3= pd.read_csv("t_train_features.csv")
breakpoints = [1,80,81,200,201,600]
breakpoints1 = [1,40,41,80]
breakpoints2 = [1,40,41,80,81,120]
breakpoints3 = [1,40,41,80,81,120,121,160,161,200,201,240,241,280,281,320,321,360,361,399]
maindir = "/home/khanna/Desktop/Major2"
ds_path = maindir + "/M"
img_files = os.listdir(ds_path)
svm_clf = None
knn = None
knn_bp = None
knn_p = None
knn_t = None
accuracy_bp = None
accuracy_p = None
accuracy_t = None


In [None]:
target_list = []
for j in range(1,600):
    target_num = j
    #print(target_num)
    flag = 0
    i = 0 
    for i in range(0,len(breakpoints),2):
        if((target_num >= breakpoints[i]) and (target_num <= breakpoints[i+1])):
            flag = 1
            break
    if(flag==1):
        target = int((i/2))
        target_list.append(target)
y = np.asarray(target_list)
X = dataset.iloc[:,1:]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 142)
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [None]:
######################################### MODELS FOR LEAF IDENTIFICATION #######################################
def svm_gs():
    print("Starting....")
    clf = svm.SVC()
    print("Starting....")
    clf.fit(X_train,y_train)
    print("Starting....")
    y_pred = clf.predict(X_test)
    print("Starting....")
    from sklearn import metrics
    print("Starting....")
    from sklearn.model_selection import GridSearchCV
    parameters = [{'kernel': ['rbf'],
                   'gamma': [1e-4, 1e-3, 0.01, 0.1, 0.2, 0.5],
                   'C': [1, 10, 100, 1000]},
                  {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}
                 ]
    print("Starting....")
    global svm_clf
    print("Starting....")
    svm_clf = GridSearchCV(svm.SVC(decision_function_shape='ovr'), parameters, cv=5)
    print("Starting....1")
    svm_clf.fit(X_train, y_train)
    print("Starting....2")
    y_pred_svm = svm_clf.predict(X_test)
    print("Starting....3")
    #print("Grid Search Accuracy:",metrics.accuracy_score(y_test, y_pred_svm))
    #print(y_pred_svm)
    #print(X_test)

#____________________________________________________________________________________________________________

def knn():
    print("Starting....")
    global knn;
    knn = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train)
    accuracy = knn.score(X_test, y_test) 
    y_pred=knn.predict(X_test)
    from sklearn import metrics
    #print ("KNN Accuracy:", accuracy) 
    
#____________________________________________________________________________________________________________

def svm_calc(array):
    global svm_clf
    ans=svm_clf.predict(array)
    return ans
def knn_calc(array):
    global knn
    ans= knn.predict(array)
    return ans

#____________________________________________________________________________________________________________

def identify(ans): 
    if(ans==0):
      str ="Bell Pepper"
    elif(ans==1):
      str="Potato" 
    else:
      str="Tomato"
    return str

#____________________________________________________________________________________________________________

def predict_ans(dis):
    if(dis==0):    
        str = "Diseased Bell Pepper with Bacterial Spot"
    elif(dis==1):    
        str = "Healthy Bell Pepper"
    elif(dis==2):    
        str = "Diseased Potato with Early Blight"
    elif(dis==3):    
        str = "Healthy Potato"
    elif(dis==4):    
        str = "Diseased Potato with Late Blight"
    elif(dis==5):    
        str = "Diseased Tomato with Target Spot"
    elif(dis==6):    
        str = "Diseased Tomato with Mosaic Virus"
    elif(dis==7):    
        str = "Diseased Tomato with Yellow Leaf Curl Virus"
    elif(dis==8):    
        str = "Diseased Tomato with Bacterial Spot"
    elif(dis==9):    
        str = "Diseased Tomato with Early Blight"
    elif(dis==10):    
        str = "Healthy Tomato"
    elif(dis==11):    
        str = "Diseased Tomato with Late Blight"
    elif(dis==12):    
        str = "Diseased Tomato with Leaf Mold"
    elif(dis==13):    
        str = "Diseased Tomato with Seporia Leaf Spot"
    elif(dis==14):    
        str = "Diseased Tomato with Spider Mites"
    return str

In [None]:
################################## PREPARING MODEL FOR LEAF IDENTIFICATION ####################################

# svm_gs()
knn()

In [None]:
######################################### TESTING FOR SINGLE ITEM ################################################

X_query = [vector_1]

choice =2
if(choice == 1):
    ans = svm_calc(X_query)
elif(choice == 2):
    ans = knn_calc(X_query)
mainAns= identify(ans)

In [None]:
######################################## MODELS FOR DISEASE DETECTION ########################################

def for_bp():
    target_list = []
    for j in range(1,81):
        target_num = j
        #print(target_num)
        flag = 0
        i = 0 
        for i in range(0,len(breakpoints1),2):
            if((target_num >= breakpoints1[i]) and (target_num <= breakpoints1[i+1])):
                flag = 1
                break
        if(flag==1):
            target = int((i/2))
            target_list.append(target)
    y = np.array(target_list)
    X = dataset1.iloc[:,1:]
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 142)
    from sklearn.preprocessing import StandardScaler
    sc_X = StandardScaler()
    X_train = sc_X.fit_transform(X_train)
    X_test = sc_X.transform(X_test)
    global knn_bp
    knn_bp = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train)
    global accuracy_bp
    accuracy_bp = knn_bp.score(X_test, y_test) 
    y_pred=knn_bp.predict(X_test)
    #print(y_pred)
    #from sklearn import metrics
    #print("KNN Metrics:")
    #print(metrics.classification_report(y_test, y_pred))
    #print ("KNN Accuracy:", accuracy_bp) 

#____________________________________________________________________________________________________________

def for_p():
    target_list = []
    for j in range(1,121):
        target_num = j
        #print(target_num)
        flag = 0
        i = 0 
        for i in range(0,len(breakpoints2),2):
            if((target_num >= breakpoints2[i]) and (target_num <= breakpoints2[i+1])):
                flag = 1
                break
        if(flag==1):
            target = int((i/2))
            target_list.append(target)
    y = np.array(target_list)
    X = dataset2.iloc[:,1:]
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 142)
    from sklearn.preprocessing import StandardScaler
    sc_X = StandardScaler()
    X_train = sc_X.fit_transform(X_train)
    X_test = sc_X.transform(X_test)
    global knn_p
    knn_p = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train)
    global accuracy_p
    accuracy_p = knn_p.score(X_test, y_test) 
    y_pred=knn_p.predict(X_test)
    #print(y_pred)
    #from sklearn import metrics
    #print("KNN Metrics:")
    #print(metrics.classification_report(y_test, y_pred))
    #print ("KNN Accuracy:", accuracy_p)
    
#____________________________________________________________________________________________________________
        
def for_t():
    target_list = []
    for j in range(1,401):
        target_num = j
        #print(target_num)
        flag = 0
        i = 0 
        for i in range(0,len(breakpoints3),2):
            if((target_num >= breakpoints3[i]) and (target_num <= breakpoints3[i+1])):
                flag = 1
                break
        if(flag==1):
            target = int((i/2))
            target_list.append(target)
    y = np.array(target_list)
    X = dataset3 .iloc[:,1:]
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 142)
    from sklearn.preprocessing import StandardScaler
    sc_X = StandardScaler()
    X_train = sc_X.fit_transform(X_train)
    X_test = sc_X.transform(X_test)
    global knn_t
    knn_t = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train)
    global accuracy_t
    accuracy_t = knn_t.score(X_test, y_test) 
    y_pred=knn_t.predict(X_test)
    #print(y_pred)
    from sklearn import metrics
    #print("KNN Metrics:")
    #print(metrics.classification_report(y_test, y_pred))
    #print ("KNN Accuracy:", accuracy_t)

In [None]:
################################### PREPARING MODEL FOR DISEASE CLASSIFICATION ###################################

# for_bp() 
# for_p()
# for_t()    

if(ans==0):    
    ans1 = knn_bp.predict(X_query)
    dis = ans1
    acc= accuracy_bp
elif(ans==1):
    ans2 = knn_p.predict(X_query)
    dis = ans2 + 2
    acc= accuracy_p
elif(ans==2):
    ans3 = knn_t.predict(X_query)
    dis = ans3 + 6
    acc= accuracy_t

string = predict_ans(dis)

In [None]:
################################################# ANSWER #####################################################

print("Leaf Prediction Result :")
print("This is a "+ mainAns +" leaf")
print(" ")
print("Disease Classification Result :")
print("This is a "+string )
print(" ")
print("Accuracy :")
print(acc*100 )