In [3]:
import pandas as pd
import os
import time
from skimage.transform import resize
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt

import logging
import sys
from logging.handlers import TimedRotatingFileHandler

from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [4]:


FORMATTER = logging.Formatter("%(asctime)s — %(name)s — %(levelname)s — %(message)s")
LOG_FILE = "SVM.log"

def get_console_handler():
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(FORMATTER)
    return console_handler

def get_file_handler():
    file_handler = TimedRotatingFileHandler(LOG_FILE, when='midnight')
    file_handler.setFormatter(FORMATTER)
    return file_handler

def get_logger(logger_name):
    logger = logging.getLogger(logger_name)
    
    if (logger.hasHandlers()): # important as removes duplicate loggers (and thus duplicate log entries)
        logger.handlers.clear()
    logger.setLevel(logging.DEBUG) # better to have too much log than not enough
    logger.addHandler(get_console_handler())
    logger.addHandler(get_file_handler())
    # with this pattern, it's rarely necessary to propagate the error up to parent
    logger.propagate = False
    return logger

logger = get_logger("SVM")

In [16]:
Categories_Durum=['CFP-CK1','CFP-CK2'] # [Infected, Healthy]
Categories_Bread=['CFP-CK3','CFP-CK4'] # [Infected, Healthy]
Test_Categories_Bread=['CFP-B179','CFP-B223'] # 39/103 or 37.8% infected; 21/105 or 20% infected

All_Categories = [Categories_Durum, Categories_Bread,Test_Categories_Bread]

datadir='/student/sch923/Thesis/data/test_wheat_2021/Wheat/TestSamples' 
#path which contains all the categories of images

In [6]:
flat_arr, target_arr=[],[]

# creating species specific data sets
for Categories in All_Categories:
    flat_arr.append(list())
    target_arr.append(list())
    for i in Categories:
        logger.info(f'loading... category : {i}')    
        path=os.path.join(datadir,i) 
        image_count = 0
        for img in os.listdir(path):  
            img_array=imread(os.path.join(path,img))
            img_resized=resize(img_array,(150,150,3))  
            flat_arr[len(flat_arr)-1].append(img_resized.flatten())      
            target_arr[len(flat_arr)-1].append(Categories.index(i))
            image_count += 1
        logger.info(f'loaded category: {i} successfully, found {image_count} images')

2021-06-10 15:31:06,021 — SVM — INFO — loading... category : CFP-CK1
2021-06-10 15:31:12,231 — SVM — INFO — loaded category: CFP-CK1 successfully, found 43 images
2021-06-10 15:31:12,234 — SVM — INFO — loading... category : CFP-CK2
2021-06-10 15:31:19,100 — SVM — INFO — loaded category: CFP-CK2 successfully, found 48 images
2021-06-10 15:31:19,102 — SVM — INFO — loading... category : CFP-CK3
2021-06-10 15:31:22,571 — SVM — INFO — loaded category: CFP-CK3 successfully, found 24 images
2021-06-10 15:31:22,574 — SVM — INFO — loading... category : CFP-CK4
2021-06-10 15:31:27,017 — SVM — INFO — loaded category: CFP-CK4 successfully, found 31 images


In [8]:
flat_arr_test, target_arr_test=[], []

# creating test set with mixed data
for i in Test_Categories_Bread:
    logger.info(f'loading... category : {i}')    
    path=os.path.join(datadir,i) 
    image_count = 0
    for img in os.listdir(path):        
        img_array=imread(os.path.join(path,img))
        img_resized=resize(img_array,(150,150,3))  
        flat_arr_test.append(img_resized.flatten())
        target_arr_test.append(0) #Set all images to the same category as we don't know classification
        image_count += 1
    logger.info(f'loaded category: {i} successfully, found {image_count} images')


2021-06-10 15:31:47,666 — SVM — INFO — loading... category : CFP-B179
2021-06-10 15:32:02,396 — SVM — INFO — loaded category: CFP-B179 successfully, found 103 images


In [9]:
# Data wrapper transformation for species specific data
list_arr_fun = lambda l : np.array(l)

flat_data=list(map(list_arr_fun, flat_arr))
target=list(map(list_arr_fun, target_arr))

df_Durum=pd.DataFrame(flat_data[0])
df_Bread=pd.DataFrame(flat_data[1])
df_Complete=pd.DataFrame(flat_data[0].extend(flat_data[1]))
df_Durum['Target']=target[0]
df_Bread['Target']=target[1]
df_Completedf_Bread['Target']=target[0].extend(target[1])

x_Durum=df_Durum.iloc[:,:-1] #input data 
y_Durum=df_Durum.iloc[:,-1] #output data

x_Bread=df_Bread.iloc[:,:-1] #input data 
y_Bread=df_Bread.iloc[:,-1] #output data

x_Complete=df_Complete.iloc[:,:-1] #input data 
y_Complete=df_Complete.iloc[:,-1] #output data

In [11]:
# Data wrapper transformation for test data

flat_data_test=np.array(flat_arr_test)
target_arr_test=np.array(target_arr_test)
df_test=pd.DataFrame(flat_data_test) #dataframe
df_test['Target']=target_arr_test
x_test=df_test.iloc[:,:-1] #input data 
y_test=df_test.iloc[:,-1] #output data

In [12]:

param_grid={'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}
svc=svm.SVC(probability=True)
model_Durum=GridSearchCV(svm.SVC(probability=True),param_grid)
model_Bread=GridSearchCV(svm.SVC(probability=True),param_grid)
model_Complete=GridSearchCV(svm.SVC(probability=True),param_grid)
# model.best_params_ contains the best parameters obtained from GridSearchCV

In [13]:


x_train_Durum,x_test_Durum,y_train_Durum,y_test_Durum=train_test_split(x_Durum,y_Durum,test_size=0.20,random_state=77,stratify=y_Durum)
logger.info('Durum Splitted Successfully')
x_train_Bread,x_test_Bread,y_train_Bread,y_test_Bread=train_test_split(x_Bread,y_Bread,test_size=0.20,random_state=77,stratify=y_Bread)
logger.info('Bread Splitted Successfully')
x_train_Complete,x_test_Complete,y_train_Complete,y_test_Complete=train_test_split(x_Complete,y_Complete,test_size=0.20,random_state=77,stratify=y_Complete)
logger.info('Complete Splitted Successfully')

x_train_test,x_test_test,_,_=train_test_split(x_test,y_test,test_size=0.20,random_state=77,stratify=y_test)
logger.info('Test Splitted Successfully')

2021-06-10 15:32:02,866 — SVM — INFO — Durum Splitted Successfully
2021-06-10 15:32:02,882 — SVM — INFO — Bread Splitted Successfully
2021-06-10 15:32:02,911 — SVM — INFO — Complete Splitted Successfully
2021-06-10 15:32:02,933 — SVM — INFO — Test Splitted Successfully


In [14]:
logger.info("Starting to train Durum model...")
start = time.time()
model_Durum.fit(x_train_Durum,y_train_Durum)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
logger.info("The Durum model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

logger.info("Starting to train Bread model...")
start = time.time()
model_Bread.fit(x_train_Bread,y_train_Bread)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
logger.info("The Bread model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

logger.info("Starting to train Complete model...")
start = time.time()
model_Complete.fit(x_train_Complete,y_train_Complete)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
logger.info("The Complete model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))


2021-06-10 15:32:02,947 — SVM — INFO — Starting to train Durum model...
2021-06-10 15:34:21,797 — SVM — INFO — The Durum model trained in: 00:02:18.85
2021-06-10 15:34:21,799 — SVM — INFO — Starting to train Bread model...
2021-06-10 15:36:13,723 — SVM — INFO — The Bread model trained in: 00:01:51.92
2021-06-10 15:36:13,725 — SVM — INFO — Starting to train Complete model...
2021-06-10 15:39:37,088 — SVM — INFO — The Complete model trained in: 00:03:23.36


In [15]:


y_pred_Durum_1=model_Durum.predict(x_test_Durum)
y_pred_Durum_2=model_Durum.predict(x_test_Bread)
y_pred_Durum_3=model_Durum.predict(x_test_Complete)
y_pred_Durum_4=model_Durum.predict(x_test)

y_pred_Bread_1=model_Bread.predict(x_test_Bread)
y_pred_Bread_2=model_Bread.predict(x_test_Durum)
y_pred_Bread_3=model_Bread.predict(x_test_Complete)
y_pred_Bread_4=model_Bread.predict(x_test)

y_pred_Complete_1=model_Complete.predict(x_test_Complete)
y_pred_Complete_2=model_Complete.predict(x_test_Bread)
y_pred_Complete_3=model_Complete.predict(x_test_Durum)
y_pred_Complete_4=model_Complete.predict(x_test)

In [44]:
# Durum predictions
logger.info("=========== DURUM TESTS ===========\n")
logger.info("Model (Durum x Durum)")
logger.info("The predicted Data is :")
logger.info(y_pred_Durum_1.tolist())
logger.info("The actual data is:")
logger.info(y_test_Durum.tolist())
logger.info(f"The Durum model on Durum Data is {accuracy_score(y_pred_Durum_1,y_test_Durum)*100}% accurate\n")

logger.info("Model (Durum x Bread)")
logger.info("The predicted Data is :")
logger.info(y_pred_Durum_2.tolist())
logger.info("The actual data is:")
logger.info(y_test_Bread.tolist())
logger.info(f"The Durum model on Bread Data is {accuracy_score(y_pred_Durum_2,y_test_Bread)*100}% accurate\n")

logger.info("Model (Durum x Complete)")
logger.info("The predicted Data is :")
logger.info(y_pred_Durum_3.tolist())
logger.info("The actual data is:")
logger.info(y_test_Complete.tolist())
logger.info(f"The Durum model on Complete Data is {accuracy_score(y_pred_Durum_3,y_test_Complete)*100}% accurate")

logger.info("Model (Durum x Test)")
logger.info("The predicted Data is :")
logger.info(y_pred_Durum_4.tolist())
logger.info("The actual data is:")
logger.info(y_test.tolist())
logger.info(f"Runnnig the Durum model on Test Data, the model predicts {accuracy_score(y_pred_Durum_4,y_test)*100}% are infected")
logger.info("\n===================================\n")

# Bread predictions
logger.info("=========== BREAD TESTS ===========\n")
logger.info("Model (Bread x Bread)")
logger.info("The predicted Data is :")
logger.info(y_pred_Bread_1.tolist())
logger.info("The actual data is:")
logger.info(y_test_Bread.tolist())
logger.info(f"The Bread model on Bread Data is {accuracy_score(y_pred_Bread_1,y_test_Bread)*100}% accurate\n")

logger.info("Model (Bread x Durum)")
logger.info("The predicted Data is :")
logger.info(y_pred_Bread_2.tolist())
logger.info("The actual data is:")
logger.info(y_test_Durum.tolist())
logger.info(f"The Bread model on Durum Data is {accuracy_score(y_pred_Bread_2,y_test_Durum)*100}% accurate\n")

logger.info("Model (Bread x Complete)")
logger.info("The predicted Data is :")
logger.info(y_pred_Bread_3.tolist())
logger.info("The actual data is:")
logger.info(y_test_Complete.tolist())
logger.info(f"The Bread model on Complete Data is {accuracy_score(y_pred_Bread_3,y_test_Complete)*100}% accurate")

logger.info("Model (Bread x Test)")
logger.info("The predicted Data is :")
logger.info(y_pred_Bread_4.tolist())
logger.info("The actual data is:")
logger.info(y_test.tolist())
logger.info(f"Runnnig the Bread model on Test Data, the model predicts {accuracy_score(y_pred_Bread_4,y_test)*100}% are infected")
logger.info("\n===================================\n")

# Complete predictions
logger.info("=========== COMPLETE TESTS ===========\n")
logger.info("Model (Complete x Complete)")
logger.info("The predicted Data is :")
logger.info(y_pred_Complete_1.tolist())
logger.info("The actual data is:")
logger.info(y_test_Complete.tolist())
logger.info(f"The Complete model on Complete Data is {accuracy_score(y_pred_Complete_1,y_test_Complete)*100}% accurate\n")

logger.info("Model (Complete x Bread)")
logger.info("The predicted Data is :")
logger.info(y_pred_Complete_2.tolist())
logger.info("The actual data is:")
logger.info(y_test_Bread.tolist())
logger.info(f"The Complete model on Bread Data is {accuracy_score(y_pred_Complete_2,y_test_Bread)*100}% accurate\n")

logger.info("Model (Complete x Durum)")
logger.info("The predicted Data is :")
logger.info(y_pred_Complete_3.tolist())
logger.info("The actual data is:")
logger.info(y_test_Durum.tolist())
logger.info(f"The Complete model on Durum Data is {accuracy_score(y_pred_Complete_3,y_test_Durum)*100}% accurate")

logger.info("Model (Complete x Test)")
logger.info("The predicted Data is :")
logger.info(y_pred_Complete_4.tolist())
logger.info("The actual data is:")
logger.info(y_test.tolist())
logger.info(f"Runnnig the complete model on Test Data, the model predicts {accuracy_score(y_pred_Complete_4,y_test)*100}% are infected")
logger.info("\n===================================\n")



2021-06-10 10:49:21,278 — SVM — INFO — Model (Durum x Durum)
2021-06-10 10:49:21,279 — SVM — INFO — The predicted Data is :
2021-06-10 10:49:21,280 — SVM — INFO — [1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]
2021-06-10 10:49:21,282 — SVM — INFO — The actual data is:
2021-06-10 10:49:21,283 — SVM — INFO — [1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]
2021-06-10 10:49:21,286 — SVM — INFO — The Durum model on Durum Data is 100.0% accurate

2021-06-10 10:49:21,287 — SVM — INFO — Model (Durum x Bread)
2021-06-10 10:49:21,288 — SVM — INFO — The predicted Data is :
2021-06-10 10:49:21,290 — SVM — INFO — [0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1]
2021-06-10 10:49:21,292 — SVM — INFO — The actual data is:
2021-06-10 10:49:21,293 — SVM — INFO — [0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1]
2021-06-10 10:49:21,296 — SVM — INFO — The Durum model on Bread Data is 90.9090909090909% accurate

2021-06-10 10:49:21,297 — SVM — INFO — Model (Durum x Complete)
2021-06-10 10:49:21,298 — SVM — INFO — T