## Authorship Information
__author__ = "Arif Haidari"<br>
__credits__ = ["Bernd Brinkmann", "Luigi Menale", "Alex Tavkhelidze", "Romain Lesieur"]<br>
__status__ = "Development"<br>
__project__ = "Plant Recognition"<br>
__scope__ = "DataScientest's Bootcamp in Data Science"

In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from skimage.feature import hog, local_binary_pattern
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

In [None]:
# Conclusion so far:
# Combination of HOG and Color Histograme --> feature extraction

In [None]:

# extracting features
# Function to extract combined HOG and color histogram features
def extract_hog_color_hist_features(image, resize=(256, 256)):
    image = cv2.resize(image, resize)
    # Extract HOG features
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hog_features = hog(gray_image, orientations=9, pixels_per_cell=(8, 8),
                       cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False)

    # Extract color histogram features
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()

    # Combine HOG and color histogram features
    combined_features = np.hstack((hog_features, hist))
    return combined_features

In [None]:
# XGBoost (XGBClassifier):
# Looking at literature, XGBClassifier is the ideal classifier algorithm compare to SVM
# So I use the XGBClassifier to train a model using the optimal method for feature extraction

In [None]:


data_dir = '/content/drive/MyDrive/raw_dataset/apple_recognition/'

labels = []
features = []

for label in os.listdir(data_dir):
    label_dir = os.path.join(data_dir, label)
    if not os.path.isdir(label_dir):
        continue
    for image_file in os.listdir(label_dir):
        image_path = os.path.join(label_dir, image_file)
        image = cv2.imread(image_path)
        if image is not None:
            # get image features
            hog_features = extract_hog_color_hist_features(image)
            features.append(hog_features)
            labels.append(label)


# Convert to numpy arrays
X = np.array(features)
target = np.array(labels)


le = LabelEncoder()
labels_encoded = le.fit_transform(target)

X_train, X_test, y_train, y_test = train_test_split(X, labels_encoded, test_size=0.2, random_state=42)

# XGBoost model
xgb_model = XGBClassifier(n_estimators=100,
                               max_depth=3,
                               learning_rate=0.1,
                               n_jobs=-1,
                               subsample=0.8,
                               colsample_bytree=0.8,
                               objective='multi:softmax',
                               num_class=len(np.unique(labels_encoded)))

# Train the model
xgb_model.fit(X_train, y_train, eval_metric='mlogloss', eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)

# Predictions
y_pred = xgb_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))




Accuracy: 0.970108695652174
Classification Report:
                          precision    recall  f1-score   support

      Apple___Apple_scab       0.98      0.93      0.95        99
       Apple___Black_rot       0.99      1.00      0.99        75
Apple___Cedar_apple_rust       0.96      0.99      0.97        95
         Apple___healthy       0.96      0.97      0.96        99

                accuracy                           0.97       368
               macro avg       0.97      0.97      0.97       368
            weighted avg       0.97      0.97      0.97       368



In [None]:
data_dir = '/content/drive/MyDrive/raw_dataset/apple_recognition/'

labels = []
features = []

for label in os.listdir(data_dir):
    label_dir = os.path.join(data_dir, label)
    if not os.path.isdir(label_dir):
        continue
    for image_file in os.listdir(label_dir):
        image_path = os.path.join(label_dir, image_file)
        image = cv2.imread(image_path)
        if image is not None:
            # get image features
            hog_features = extract_hog_color_hist_features(image)
            features.append(hog_features)
            labels.append(label)


# Convert to numpy arrays
X = np.array(features)
target = np.array(labels)


le = LabelEncoder()
labels_encoded = le.fit_transform(target)

X_train, X_test, y_train, y_test = train_test_split(X, labels_encoded, test_size=0.2, random_state=42)

In [None]:
# Save the model

import pickle

model_filename = '/content/drive/MyDrive/colab_notebook/xgboost_model.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(xgb_model, model_file)


In [None]:
# import the model
import pickle

model_filename = '/content/drive/MyDrive/colab_notebook/xgboost_model.pkl'

loaded_model = None
with open(model_filename, 'rb') as model_file:
    loaded_model = pickle.load(model_file)


y_pred_deploy = loaded_model.predict(X_test)

# Evaluate the deployed model
print("Accuracy (deployed model):", accuracy_score(y_test, y_pred_deploy))
print("Classification Report (deployed model):")
print(classification_report(y_test, y_pred_deploy, target_names=le.classes_))

In [None]:
# Test model with real data:

# Load the test image
test_image_path = '/content/drive/MyDrive/raw_dataset/test/AppleScab3.JPG'
test_image = cv2.imread(test_image_path)
test_image = cv2.resize(test_image, (256, 256))  # Resize the image to 256x256

# Extract features from the test image
test_features = extract_hog_color_hist_features(test_image)

# Reshape the features array to match the input format of the XGBoost model
test_features_reshaped = test_features.reshape(1, -1)


# predict with loaded_model
predicted_label = xgb_model.predict(test_features_reshaped)[0]

# Decode the predicted label
predicted_class = le.inverse_transform([predicted_label])[0]

# Print the predicted label
print("Predicted class:", predicted_class)

# Correct prediction:
# AppleCedarRust1.JPG
# Predicted class: Apple___Cedar_apple_rust
# AppleScab3.JPG
# Predicted class: Apple___Apple_scab
# image (954).JPG
# Predicted class: Apple___healthy

# photo from google:
# 2-black_rot:
# Predicted class: Apple___Black_rot
# 1-cedar_rust.jpeg
# Predicted class: Apple___Cedar_apple_rust
# 2-cedar_rust.jpeg
# Predicted class: Apple___Cedar_apple_rust
# ==============================

# Wrong Prediction - Limitation:
# photo from Internet
# 1-black_rot
# Predicted class: Apple___Cedar_apple_rust
# 3-cedar_rust.jpeg
# Predicted class: Apple___Black_rot
# apple-1 ----> black leaf
# Predicted class: Apple___Cedar_apple_rust
# apple-2 ---> more than one leaves
# Predicted class: Apple___Cedar_apple_rust
# apple-3 ----> more than one leaves
# Predicted class: Apple___Cedar_apple_rust
# apple-4 ---> with noice -- with rain drops on it
# Predicted class: Apple___Apple_scab
# apple-5
# Predicted class: Apple___Cedar_apple_rust

Predicted class: Apple___Apple_scab


In [None]:
# Use a pre-trained CNN to extract features from the images. VGG16 is pre-trained on large datasets like ImageNet.

In [3]:

from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.preprocessing.image import img_to_array

# Function to extract features using VGG16
def extract_features_vgg16(image, model):
    image = cv2.resize(image, (256, 256))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)
     # Extract features
    features = model.predict(image)
    return features.flatten()

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

data_dir = '/content/drive/MyDrive/raw_dataset/apple_recognition/'

labels = []
features = []

for label in os.listdir(data_dir):
    label_dir = os.path.join(data_dir, label)
    if not os.path.isdir(label_dir):
        continue
    for image_file in os.listdir(label_dir):
        image_path = os.path.join(label_dir, image_file)
        image = cv2.imread(image_path)
        if image is not None:
            # get image features
            vgg16_features = extract_features_vgg16(image, model)
            features.append(vgg16_features)
            labels.append(label)

# Convert to numpy arrays
X = np.array(features)
target = np.array(labels)

le = LabelEncoder()
labels_encoded = le.fit_transform(target)

X_train, X_test, y_train, y_test = train_test_split(X, labels_encoded, test_size=0.2, random_state=42)

# XGBoost model
xgb_model = XGBClassifier(n_estimators=100,
                          max_depth=3,
                          learning_rate=0.1,
                          subsample=0.8, n_jobs=-1,
                          colsample_bytree=0.8,
                          objective='multi:softmax',
                          num_class=len(np.unique(labels_encoded)))

# Train the model
xgb_model.fit(X_train, y_train, eval_metric='mlogloss', eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)

# Predictions
y_pred = xgb_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))






Accuracy: 0.967391304347826
Classification Report:
                          precision    recall  f1-score   support

      Apple___Apple_scab       0.96      0.94      0.95        99
       Apple___Black_rot       0.97      0.95      0.96        75
Apple___Cedar_apple_rust       0.99      0.98      0.98        95
         Apple___healthy       0.95      1.00      0.98        99

                accuracy                           0.97       368
               macro avg       0.97      0.97      0.97       368
            weighted avg       0.97      0.97      0.97       368



In [4]:
# Save the model

import pickle

model_filename = '/content/drive/MyDrive/colab_notebook/xgb_model_vgg16.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(xgb_model, model_file)

In [14]:
# Test model with real data:

# Load the test image
test_image_path = '/content/drive/MyDrive/raw_dataset/test/apple-1.jpeg'
test_image = cv2.imread(test_image_path)
test_image = cv2.resize(test_image, (256, 256))  # Resize the image to 256x256

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features from the test image
test_features = extract_features_vgg16(test_image, model)

# Reshape the features array to match the input format of the XGBoost model
test_features_reshaped = test_features.reshape(1, -1)


# predict with loaded_model
predicted_label = xgb_model.predict(test_features_reshaped)[0]

# Decode the predicted label
predicted_class = le.inverse_transform([predicted_label])[0]

# Print the predicted label
print("Predicted class:", predicted_class)

# Correct prediction:
# AppleCedarRust1.JPG
# Predicted class: Apple___Cedar_apple_rust
# AppleScab3.JPG
# Predicted class: Apple___Apple_scab
# image (954).JPG
# Predicted class: Apple___healthy

# images from internet
# 1-cedar_rust.jpeg
# Predicted class: Apple___Cedar_apple_rust

# =============

# Wrong Prediction

# images from internet
# 3-cedar_rust.jpeg
# Predicted class: Apple___Apple_scab
# 2-cedar_rust.jpeg
# Predicted class: Apple___Apple_scab
# 2-black_rot.jpeg
# Predicted class: Apple___Apple_scab
# 1-black_rot.jpeg
# Predicted class: Apple___Apple_scab
# apple-1 ----> black leaf
# Predicted class: Apple___Cedar_apple_rust

Predicted class: Apple___Cedar_apple_rust


In [None]:
# prevoiusly output with image size (224, 224):
# Accuracy: 0.9538043478260869
# Classification Report:
#                           precision    recall  f1-score   support

#       Apple___Apple_scab       0.92      0.94      0.93        99
#        Apple___Black_rot       0.97      0.89      0.93        75
# Apple___Cedar_apple_rust       1.00      0.98      0.99        95
#          Apple___healthy       0.93      0.99      0.96        99

#                 accuracy                           0.95       368
#                macro avg       0.96      0.95      0.95       368
#             weighted avg       0.95      0.95      0.95       368