In [95]:
import pandas as pd
import os
from skimage.transform import resize
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt

In [96]:
Categories_Durum=['CFP-CK1','CFP-CK2'] # [Infected, Healthy]
Categories_Bread=['CFP-CK3','CFP-CK4'] # [Infected, Healthy]
Categories_Complete=['CFP-CK1','CFP-CK3','CFP-CK2','CFP-CK4']  # [Infected, Infected, Healthy, Healthy]
Test_Categories_Bread=['CFP-B179', 'CFP-B223'] # 39/103 or 37.8% infected; 21/105 or 20% infected

datadir='/student/sch923/Thesis/data/test_wheat_2021/Wheat/TestSamples' 
#path which contains all the categories of images

In [97]:
flat_arr, target_arr=[],[]

# creating species specific data sets
Cat_Durum_Bread = [Categories_Durum,Categories_Bread]
for Categories in Cat_Durum_Bread:
    flat_arr.append(list())
    target_arr.append(list())
    for i in Categories:
        print(f'loading... category : {i}')    
        path=os.path.join(datadir,i) 
        image_count = 0
        for img in os.listdir(path):  
            img_array=imread(os.path.join(path,img))
            img_resized=resize(img_array,(150,150,3))  
            flat_arr[len(flat_arr)-1].append(img_resized.flatten())      
            target_arr[len(flat_arr)-1].append(Categories.index(i))
            image_count += 1
        print(f'loaded category: {i} successfully, found {image_count} images')

loading... category : CFP-CK1
loaded category: CFP-CK1 successfully, found 43 images
loading... category : CFP-CK2
loaded category: CFP-CK2 successfully, found 48 images
loading... category : CFP-CK3
loaded category: CFP-CK3 successfully, found 24 images
loading... category : CFP-CK4
loaded category: CFP-CK4 successfully, found 31 images


In [98]:
flat_arr_Complete, target_arr_Complete=[], []

index_corrector = lambda index : 0 if index<2 else 1 # Lambda function for correcting infected/healthy index (dependent on order folder names in Categories_Complete, ie [infected, healthy])

# creating complete (both species) data set
for i in Categories_Complete:
    print(f'loading... category : {i}')    
    path=os.path.join(datadir,i) 
    image_count = 0
    for img in os.listdir(path):        
        img_array=imread(os.path.join(path,img))
        img_resized=resize(img_array,(150,150,3))  
        flat_arr_Complete.append(img_resized.flatten())      
        target_arr_Complete.append(index_corrector(Categories_Complete.index(i)))
        image_count += 1
    print(f'loaded category: {i} successfully, found {image_count} images')

loading... category : CFP-CK1
loaded category: CFP-CK1 successfully, found 43 images
loading... category : CFP-CK3
loaded category: CFP-CK3 successfully, found 24 images
loading... category : CFP-CK2
loaded category: CFP-CK2 successfully, found 48 images
loading... category : CFP-CK4
loaded category: CFP-CK4 successfully, found 31 images


In [99]:
flat_arr_test, target_arr_test=[], []

# creating test set with mixed data
for i in Test_Categories_Bread:
    print(f'loading... category : {i}')    
    path=os.path.join(datadir,i) 
    image_count = 0
    for img in os.listdir(path):        
        img_array=imread(os.path.join(path,img))
        img_resized=resize(img_array,(150,150,3))  
        flat_arr_test.append(img_resized.flatten())
        target_arr_test.append(0) #Set all images to the same category as we don't know classification
        image_count += 1
    print(f'loaded category: {i} successfully, found {image_count} images')


loading... category : CFP-B179
loaded category: CFP-B179 successfully, found 103 images
loading... category : CFP-B223
loaded category: CFP-B223 successfully, found 96 images


In [100]:
# Data wrapper transformation for species specific data
list_arr_fun = lambda l : np.array(l)

flat_data=list(map(list_arr_fun, flat_arr))
target=list(map(list_arr_fun, target_arr))

df_Durum=pd.DataFrame(flat_data[0])
df_Bread=pd.DataFrame(flat_data[1])
df_Durum['Target']=target[0]
df_Bread['Target']=target[1]

x_Durum=df_Durum.iloc[:,:-1] #input data 
y_Durum=df_Durum.iloc[:,-1] #output data

x_Bread=df_Bread.iloc[:,:-1] #input data 
y_Bread=df_Bread.iloc[:,-1] #output data

In [101]:
# Data wrapper transformation for complete data

flat_data_Complete=np.array(flat_arr_Complete)
target_Complete=np.array(target_arr_Complete)
df_Complete=pd.DataFrame(flat_data_Complete) #dataframe
df_Complete['Target']=target_Complete
x_Complete=df_Complete.iloc[:,:-1] #input data 
y_Complete=df_Complete.iloc[:,-1] #output data

In [111]:
# Data wrapper transformation for test data

flat_data_test=np.array(flat_arr_test)
target_arr_test=np.array(target_arr_test)
df_test=pd.DataFrame(flat_data_test) #dataframe
df_test['Target']=target_arr_test
x_test=df_test.iloc[:,:-1] #input data 
y_test=df_test.iloc[:,-1] #output data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91 entries, 0 to 90
Columns: 67500 entries, 0 to 67499
dtypes: float64(67500)
memory usage: 46.9 MB
x_Durum None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Columns: 67500 entries, 0 to 67499
dtypes: float64(67500)
memory usage: 28.3 MB
x_Bread None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 146 entries, 0 to 145
Columns: 67500 entries, 0 to 67499
dtypes: float64(67500)
memory usage: 75.2 MB
x_Complete None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199 entries, 0 to 198
Columns: 67500 entries, 0 to 67499
dtypes: float64(67500)
memory usage: 102.5 MB
x_test None


In [104]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
param_grid={'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}
svc=svm.SVC(probability=True)
model_Durum=GridSearchCV(svm.SVC(probability=True),param_grid)
model_Bread=GridSearchCV(svm.SVC(probability=True),param_grid)
model_Complete=GridSearchCV(svm.SVC(probability=True),param_grid)
# model.best_params_ contains the best parameters obtained from GridSearchCV

In [112]:
from sklearn.model_selection import train_test_split
import time

x_train_Durum,x_test_Durum,y_train_Durum,y_test_Durum=train_test_split(x_Durum,y_Durum,test_size=0.20,random_state=77,stratify=y_Durum)
print('Durum Splitted Successfully')
x_train_Bread,x_test_Bread,y_train_Bread,y_test_Bread=train_test_split(x_Bread,y_Bread,test_size=0.20,random_state=77,stratify=y_Bread)
print('Bread Splitted Successfully')
x_train_Complete,x_test_Complete,y_train_Complete,y_test_Complete=train_test_split(x_Complete,y_Complete,test_size=0.20,random_state=77,stratify=y_Complete)
print('Complete Splitted Successfully')

x_train_test,x_test_test,_,_=train_test_split(x_test,y_test,test_size=0.20,random_state=77,stratify=y_test)
print('Test Splitted Successfully')

Durum Splitted Successfully
Bread Splitted Successfully
Complete Splitted Successfully
Test Splitted Successfully


In [106]:
print("Starting to train Durum model...")
start = time.time()
model_Durum.fit(x_train_Durum,y_train_Durum)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("The Durum model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

print("Starting to train Bread model...")
start = time.time()
model_Bread.fit(x_train_Bread,y_train_Bread)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("The Bread model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

print("Starting to train Complete model...")
start = time.time()
model_Complete.fit(x_train_Complete,y_train_Complete)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("The Complete model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))


Starting to train Durum model...
The Durum model trained in: 00:04:41.42
Starting to train Bread model...
The Bread model trained in: 00:02:53.68
Starting to train Complete model...
The Complete model trained in: 00:08:36.39


In [107]:
from sklearn.metrics import accuracy_score

y_pred_Durum_1=model_Durum.predict(x_test_Durum)
y_pred_Durum_2=model_Durum.predict(x_test_Bread)
y_pred_Durum_3=model_Durum.predict(x_test_Complete)
y_pred_Durum_4=model_Durum.predict(x_test)

y_pred_Bread_1=model_Bread.predict(x_test_Bread)
y_pred_Bread_2=model_Bread.predict(x_test_Durum)
y_pred_Bread_3=model_Bread.predict(x_test_Complete)
y_pred_Bread_4=model_Bread.predict(x_test)

y_pred_Complete_1=model_Complete.predict(x_test_Complete)
y_pred_Complete_2=model_Complete.predict(x_test_Bread)
y_pred_Complete_3=model_Complete.predict(x_test_Durum)
y_pred_Complete_4=model_Complete.predict(x_test)

In [113]:
# Durum predictions
print("=========== DURUM TESTS ===========\n")
print("Model (Durum x Durum)")
print("The predicted Data is :")
print(y_pred_Durum_1.tolist())
print("The actual data is:")
print(y_test_Durum.tolist())
print(f"The Durum model on Durum Data is {accuracy_score(y_pred_Durum_1,y_test_Durum)*100}% accurate\n")

print("Model (Durum x Bread)")
print("The predicted Data is :")
print(y_pred_Durum_2.tolist())
print("The actual data is:")
print(y_test_Bread.tolist())
print(f"The Durum model on Bread Data is {accuracy_score(y_pred_Durum_2,y_test_Bread)*100}% accurate\n")

print("Model (Durum x Complete)")
print("The predicted Data is :")
print(y_pred_Durum_3.tolist())
print("The actual data is:")
print(y_test_Complete.tolist())
print(f"The Durum model on Complete Data is {accuracy_score(y_pred_Durum_3,y_test_Complete)*100}% accurate")

print("Model (Durum x Test)")
print("The predicted Data is :")
print(y_pred_Durum_4.tolist())
print("The actual data is:")
print(y_test.tolist())
print(f"The Durum model on Complete Data is {accuracy_score(y_pred_Durum_4,y_test)*100}% accurate")
print("\n===================================\n")

# Bread predictions
print("=========== BREAD TESTS ===========\n")
print("Model (Bread x Bread)")
print("The predicted Data is :")
print(y_pred_Bread_1.tolist())
print("The actual data is:")
print(y_test_Bread.tolist())
print(f"The Bread model on Bread Data is {accuracy_score(y_pred_Bread_1,y_test_Bread)*100}% accurate\n")

print("Model (Bread x Durum)")
print("The predicted Data is :")
print(y_pred_Bread_2.tolist())
print("The actual data is:")
print(y_test_Durum.tolist())
print(f"The Bread model on Durum Data is {accuracy_score(y_pred_Bread_2,y_test_Durum)*100}% accurate\n")

print("Model (Bread x Complete)")
print("The predicted Data is :")
print(y_pred_Bread_3.tolist())
print("The actual data is:")
print(y_test_Complete.tolist())
print(f"The Bread model on Complete Data is {accuracy_score(y_pred_Bread_3,y_test_Complete)*100}% accurate")

print("Model (Bread x Test)")
print("The predicted Data is :")
print(y_pred_Bread_4.tolist())
print("The actual data is:")
print(y_test.tolist())
print(f"The Durum model on Complete Data is {accuracy_score(y_pred_Bread_4,y_test)*100}% accurate")
print("\n===================================\n")

# Complete predictions
print("=========== COMPLETE TESTS ===========\n")
print("Model (Complete x Complete)")
print("The predicted Data is :")
print(y_pred_Complete_1.tolist())
print("The actual data is:")
print(y_test_Complete.tolist())
print(f"The Complete model on Complete Data is {accuracy_score(y_pred_Complete_1,y_test_Complete)*100}% accurate\n")

print("Model (Complete x Bread)")
print("The predicted Data is :")
print(y_pred_Complete_2.tolist())
print("The actual data is:")
print(y_test_Bread.tolist())
print(f"The Complete model on Bread Data is {accuracy_score(y_pred_Complete_2,y_test_Bread)*100}% accurate\n")

print("Model (Complete x Durum)")
print("The predicted Data is :")
print(y_pred_Complete_3.tolist())
print("The actual data is:")
print(y_test_Durum.tolist())
print(f"The Complete model on Durum Data is {accuracy_score(y_pred_Complete_3,y_test_Durum)*100}% accurate")

print("Model (Complete x Test)")
print("The predicted Data is :")
print(y_pred_Complete_4.tolist())
print("The actual data is:")
print(y_test.tolist())
print(f"The Durum model on Complete Data is {accuracy_score(y_pred_Complete_4,y_test)*100}% accurate")
print("\n===================================\n")



Model (Durum x Durum)
The predicted Data is :
[1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]
The actual data is:
[1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]
The Durum model on Durum Data is 100.0% accurate

Model (Durum x Bread)
The predicted Data is :
[0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1]
The actual data is:
[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1]
The Durum model on Bread Data is 90.9090909090909% accurate

Model (Durum x Complete)
The predicted Data is :
[1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1]
The actual data is:
[1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1]
The Durum model on Complete Data is 93.33333333333333% accurate
Model (Durum x Test)
The predicted Data is :
[1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1