In [None]:
#@title Initial imports

from google.colab import drive
drive.mount('/content/mydrive')
import numpy as np
import pandas as pd
import os
from pathlib import Path
import os.path
import time
import matplotlib.pyplot as plt
import cv2
import seaborn as sns
sns.set_style('darkgrid')
import shutil
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf
# from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Activation,Dropout,Conv2D, MaxPooling2D,BatchNormalization
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from IPython.display import Image, display
import matplotlib.cm as cm
import PIL
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
import warnings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 90)
warnings.simplefilter("ignore")

Mounted at /content/mydrive


In [None]:
#@title Mapping database with classes

sdir = # Add path directory to image dataset
csvpath = # Add path directory to image annotations
df=pd.read_csv(csvpath)
df.columns=['filepaths', 'labels' ]
df['filepaths']=df['filepaths'].apply(lambda x: os.path.join(sdir,x))
print(df.head())

trsplit=.9
vsplit=.05
dsplit =vsplit/(1-trsplit)
strat=df['labels']
train_df, dummy_df=train_test_split(df, train_size=.9, shuffle=True, random_state=123, stratify=strat)
strat=dummy_df['labels']
test_df, valid_df=train_test_split(dummy_df, train_size=dsplit, shuffle=True, random_state=123, stratify=strat)
print('train_df length:', len(train_df), '  test_df length:', len(test_df), '  valid_df length:', len(valid_df))
classes=list(train_df['labels'].unique())
class_count = len(classes)
groups=df.groupby('labels')
print('{0:^30s} {1:^13s}'.format('CLASS', 'IMAGE COUNT'))
for label in train_df['labels'].unique():
    group=groups.get_group(label)
    print('{0:^30s} {1:^13s}'.format(label, str(len(group))))

                                           filepaths              labels
0  /content/mydrive/MyDrive/MI Project/Train/salm...          Salmonella
1  /content/mydrive/MyDrive/MI Project/Train/cocc...         Coccidiosis
2  /content/mydrive/MyDrive/MI Project/Train/cocc...         Coccidiosis
3  /content/mydrive/MyDrive/MI Project/Train/salm...          Salmonella
4  /content/mydrive/MyDrive/MI Project/Train/ncd....  New Castle Disease
train_df length: 7260   test_df length: 403   valid_df length: 404
            CLASS               IMAGE COUNT 
         Coccidiosis               2476     
          Salmonella               2625     
           Healthy                 2404     
      New Castle Disease            562     


In [None]:
#@title Trimming dataset to balance

def trim(df, max_size, min_size, column):
    df=df.copy()
    original_class_count= len(list(df[column].unique()))
    print ('Original Number of classes in dataframe:', original_class_count)
    sample_list=[]
    groups=df.groupby(column)
    for label in df[column].unique():
        group=groups.get_group(label)
        sample_count=len(group)
        if sample_count> max_size:
            strat=group[column]
            samples,_=train_test_split(group, train_size=max_size, shuffle=True, random_state=123, stratify=strat)
            sample_list.append(samples)
        elif sample_count>= min_size:
            sample_list.append(group)
    df=pd.concat(sample_list, axis=0).reset_index(drop=True)
    final_class_count= len(list(df[column].unique()))
    if final_class_count != original_class_count:
        print ('*** WARNING ***  dataframe has a reduced number of classes' )
    balance=list(df[column].value_counts())
    print(balance)
    return df

max_samples=500
min_samples=0
column = 'labels'
train_df=trim(train_df, max_samples, min_samples, column)
img_size=(224,224)
working_dir=r'./'

Original Number of classes in dataframe: 4
[500, 500, 500, 500]


In [None]:
!pip install mahotas

Collecting mahotas
  Downloading mahotas-1.4.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mahotas
Successfully installed mahotas-1.4.15


In [None]:
#@title Feature Extraction
# Define feature extraction functions
import mahotas
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

def fd_haralick(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick

def fd_histogram(image, bins=8):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [None]:
#@title Random Forest
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier

# Extract features from images in the dataset
def extract_features(image):
    global_feature = np.hstack([fd_histogram(image), fd_haralick(image), fd_hu_moments(image)])
    return global_feature

train_features = []
train_labels = []
# Iterate through the training dataset to extract features
for image_path, label in zip(train_df['filepaths'], train_df['labels']):
    image = cv2.imread(image_path)
    features = extract_features(image)
    train_features.append(features)
    train_labels.append(label)

# Normalize the extracted features
scaler = MinMaxScaler(feature_range=(0, 1))
train_features_normalized = scaler.fit_transform(train_features)

# Train the Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=9)
rf_clf.fit(train_features_normalized, train_labels)

# Calculate accuracy on the training set
train_accuracy_rf = rf_clf.score(train_features_normalized, train_labels)
print("Training Accuracy (Random Forest):", train_accuracy_rf)

test_features = []

for image_path in test_df['filepaths']:
    image = cv2.imread(image_path)
    features = extract_features(image)
    test_features.append(features)

# Normalize the test features
test_features_normalized = scaler.transform(test_features)

# Make predictions on the test set
test_predictions_rf = rf_clf.predict(test_features_normalized)

# Calculate accuracy on the test set
test_accuracy_rf = rf_clf.score(test_features_normalized, test_df['labels'])
print("Testing Accuracy (Random Forest):", test_accuracy_rf)


valid_features = []
for image_path in valid_df['filepaths']:
    image = cv2.imread(image_path)
    features = extract_features(image)
    valid_features.append(features)

# Normalize the validation features
valid_features_normalized = scaler.transform(valid_features)

# Make predictions on the validation set
valid_predictions_rf = rf_clf.predict(valid_features_normalized)

# Calculate accuracy on the validation set
valid_accuracy_rf = rf_clf.score(valid_features_normalized, valid_df['labels'])
print("Validation Accuracy (Random Forest):", valid_accuracy_rf)

from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision, recall, and f1-score on the test set
precision_rf = precision_score(test_df['labels'], test_predictions_rf, average='weighted')
recall_rf = recall_score(test_df['labels'], test_predictions_rf, average='weighted')
f1_rf = f1_score(test_df['labels'], test_predictions_rf, average='weighted')

print("Precision (Random Forest):", precision_rf)
print("Recall (Random Forest):", recall_rf)
print("F1 Score (Random Forest):", f1_rf)

Training Accuracy (Random Forest): 0.9995
Testing Accuracy (Random Forest): 0.8833746898263027
Validation Accuracy (Random Forest): 0.900990099009901
Precision (Random Forest): 0.8946658547398896
Recall (Random Forest): 0.8833746898263027
F1 Score (Random Forest): 0.885954577799786


In [None]:
#@title KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Extract features from images in the dataset
def extract_features(image):
    global_feature = np.hstack([fd_histogram(image), fd_haralick(image), fd_hu_moments(image)])
    return global_feature

# Extract features from images in the dataset
train_features = []
train_labels = []
for image_path, label in zip(train_df['filepaths'], train_df['labels']):
    image = cv2.imread(image_path)
    features = extract_features(image)
    train_features.append(features)
    train_labels.append(label)

# Normalize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)

# Initialize KNN classifier
knn_clf = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k)
knn_clf.fit(train_features_normalized, train_labels)

# Calculate training accuracy
train_accuracy_knn = knn_clf.score(train_features_normalized, train_labels)
print("Training Accuracy (KNN):", train_accuracy_knn)

# Predict labels for validation images
valid_features = []
valid_labels = []
for image_path, label in zip(valid_df['filepaths'], valid_df['labels']):
    image = cv2.imread(image_path)
    features = extract_features(image)
    valid_features.append(features)
    valid_labels.append(label)

# Normalize validation features
valid_features_normalized = scaler.transform(valid_features)

# Make predictions on validation set
valid_predictions_knn = knn_clf.predict(valid_features_normalized)

# Calculate validation accuracy
valid_accuracy_knn = knn_clf.score(valid_features_normalized, valid_labels)
print("Validation Accuracy (KNN):", valid_accuracy_knn)

# Predict labels for test images
test_features = []
for image_path in test_df['filepaths']:
    image = cv2.imread(image_path)
    features = extract_features(image)
    test_features.append(features)

# Normalize test features
test_features_normalized = scaler.transform(test_features)

# Make predictions on test set
test_predictions_knn = knn_clf.predict(test_features_normalized)

# Calculate testing accuracy
test_accuracy_knn = knn_clf.score(test_features_normalized, test_df['labels'])
print("Testing Accuracy (KNN):", test_accuracy_knn)

from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision, recall, and f1-score on the validation set
precision_knn = precision_score(valid_labels, valid_predictions_knn, average='weighted')
recall_knn = recall_score(valid_labels, valid_predictions_knn, average='weighted')
f1_knn = f1_score(valid_labels, valid_predictions_knn, average='weighted')

print("Precision (KNN):", precision_knn)
print("Recall (KNN):", recall_knn)
print("F1 Score (KNN):", f1_knn)


Training Accuracy (KNN): 0.8825
Validation Accuracy (KNN): 0.8514851485148515
Testing Accuracy (KNN): 0.8287841191066998
Precision (KNN): 0.8679210020683934
Recall (KNN): 0.8514851485148515
F1 Score (KNN): 0.8560162390173869
