In [81]:
# Data Processing Imports
import pandas as pd
import os
from skimage.transform import resize
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt

# Logger Imports
import time
import logging
import sys
from logging.handlers import TimedRotatingFileHandler

# Model Specific Imports
from sklearn.metrics import accuracy_score
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from sklearn import svm
from sklearn.model_selection import GridSearchCV

%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [63]:

FORMATTER = logging.Formatter("%(asctime)s — %(name)s — %(levelname)s — %(message)s")
LOG_FILE = "LR.log"

def get_console_handler():
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(FORMATTER)
    return console_handler

def get_file_handler():
    file_handler = TimedRotatingFileHandler(LOG_FILE, when='midnight')
    file_handler.setFormatter(FORMATTER)
    return file_handler

def get_logger(logger_name):
    logger = logging.getLogger(logger_name)
    
    if (logger.hasHandlers()): # important as removes duplicate loggers (and thus duplicate log entries)
        logger.handlers.clear()
    logger.setLevel(logging.DEBUG) # better to have too much log than not enough
    logger.addHandler(get_console_handler())
    logger.addHandler(get_file_handler())
    # with this pattern, it's rarely necessary to propagate the error up to parent
    logger.propagate = False
    return logger

logger = get_logger("LR")

In [None]:
Categories_Durum=['CFP-CK1','CFP-CK2'] # [Infected, Healthy]
Categories_Bread=['CFP-CK3','CFP-CK4'] # [Infected, Healthy]
Categories_Test=['CFP-B179','CFP-B223'] # 39/103 or 37.8% infected; 21/105 or 20% infected

All_Categories = [Categories_Durum, Categories_Bread,Categories_Test]

datadir='/student/sch923/Thesis/data/test_wheat_2021/Wheat/TestSamples' 
#path which contains all the categories of images

In [60]:
flat_arr, target_arr=[],[]

# creating species specific data sets
for Categories in All_Categories:
    flat_arr.append(list())
    target_arr.append(list())
    for i in Categories:
        logger.info(f'loading... category : {i}')    
        path=os.path.join(datadir,i) 
        image_count = 0
        for img in os.listdir(path):  
            img_array=imread(os.path.join(path,img))
            img_resized=resize(img_array,(150,150,3))  
            flat_arr[len(flat_arr)-1].append(img_resized.flatten())

            if i in Categories_Test:
                target_arr[len(flat_arr)-1].append(0) # As only % infection is known, all are assumed to be infected
            else:
                target_arr[len(flat_arr)-1].append(Categories.index(i))
            image_count += 1
        logger.info(f'loaded category: {i} successfully, found {image_count} images')

2021-06-17 10:43:08,182 — LR — INFO — loading... category : CFP-CK1
2021-06-17 10:43:14,313 — LR — INFO — loaded category: CFP-CK1 successfully, found 43 images
2021-06-17 10:43:14,314 — LR — INFO — loading... category : CFP-CK2
2021-06-17 10:43:21,111 — LR — INFO — loaded category: CFP-CK2 successfully, found 48 images
2021-06-17 10:43:21,113 — LR — INFO — loading... category : CFP-CK3
2021-06-17 10:43:24,506 — LR — INFO — loaded category: CFP-CK3 successfully, found 24 images
2021-06-17 10:43:24,507 — LR — INFO — loading... category : CFP-CK4
2021-06-17 10:43:28,890 — LR — INFO — loaded category: CFP-CK4 successfully, found 31 images
2021-06-17 10:43:28,891 — LR — INFO — loading... category : CFP-B179
2021-06-17 10:43:43,453 — LR — INFO — loaded category: CFP-B179 successfully, found 103 images
2021-06-17 10:43:43,454 — LR — INFO — loading... category : CFP-B223
2021-06-17 10:43:57,056 — LR — INFO — loaded category: CFP-B223 successfully, found 96 images


In [62]:
list_arr_fun = lambda l : np.array(l)
flat_data=list(map(list_arr_fun, flat_arr))
target=list(map(list_arr_fun, target_arr))

df_Durum=pd.DataFrame(flat_data[0])
df_Bread=pd.DataFrame(flat_data[1])
df_Complete=pd.DataFrame(np.append(flat_data[0], flat_data[1], axis=0))
df_Test=pd.DataFrame(flat_data[2])

df_Durum['Target']=target[0]
df_Bread['Target']=target[1]
df_Complete['Target']=np.append(target[0], target[1], axis=0)
df_Test['Target']=target[2]

x_Durum=df_Durum.iloc[:,:-1] #input data 
y_Durum=df_Durum.iloc[:,-1] #output data

x_Bread=df_Bread.iloc[:,:-1] #input data 
y_Bread=df_Bread.iloc[:,-1] #output data

x_Complete=df_Complete.iloc[:,:-1] #input data 
y_Complete=df_Complete.iloc[:,-1] #output data

x_Test=df_Complete.iloc[:,:-1] #input data 
y_Test=df_Complete.iloc[:,-1] #output data


In [69]:
# Data Splitting
logger.info('======= Start Of Data Split =======')
x_train_Durum,x_test_Durum,y_train_Durum,y_test_Durum=train_test_split(x_Durum,y_Durum,test_size=0.20,random_state=77,stratify=y_Durum)
logger.info('Durum Splitted Successfully')
x_train_Bread,x_test_Bread,y_train_Bread,y_test_Bread=train_test_split(x_Bread,y_Bread,test_size=0.20,random_state=77,stratify=y_Bread)
logger.info('Bread Splitted Successfully')
x_train_Complete,x_test_Complete,y_train_Complete,y_test_Complete=train_test_split(x_Complete,y_Complete,test_size=0.20,random_state=77,stratify=y_Complete)
logger.info('Complete Splitted Successfully')
x_train_Test,x_test_Test,_,_=train_test_split(x_Test,y_Test,test_size=0.20,random_state=77,stratify=y_Test)
logger.info('Test Splitted Successfully')
logger.info('======= End Of Data Split =======\n')

2021-06-17 10:45:49,717 — LR — INFO — Durum Splitted Successfully
2021-06-17 10:45:49,736 — LR — INFO — Bread Splitted Successfully
2021-06-17 10:45:49,770 — LR — INFO — Complete Splitted Successfully
2021-06-17 10:45:49,799 — LR — INFO — Test Splitted Successfully



In [66]:
# Linear Regression Models



In [82]:
# SVM Models

logger.info('======= Start Of SVM Model Generation =======')
param_grid={'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}
svc=svm.SVC(probability=True)
SVM_model_Durum=GridSearchCV(svm.SVC(probability=True),param_grid)
SVM_model_Bread=GridSearchCV(svm.SVC(probability=True),param_grid)
SVM_model_Complete=GridSearchCV(svm.SVC(probability=True),param_grid)

logger.info('Starting to train SVM Durum model...')
start = time.time()
SVM_model_Durum.fit(x_train_Durum,y_train_Durum)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
logger.info("The Durum model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

logger.info("Starting to train SVM Bread model...")
start = time.time()
SVM_model_Bread.fit(x_train_Bread,y_train_Bread)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
logger.info("The Bread model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

logger.info("Starting to train SVM Complete model...")
start = time.time()
SVM_model_Complete.fit(x_train_Complete,y_train_Complete)
end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
logger.info("The Complete model trained in: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

2021-06-17 10:50:16,783 — LR — INFO — Starting to train SVM Durum model...


KeyboardInterrupt: 