In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import AUC, Precision, Recall

In [3]:
IMAGE_SIZE = (220, 220)
LABEL_COLUMNS = [
    'Early Blight', 'Healthy', 'Late Blight', 'Leaf Miner', 'Leaf Mold',
    'Mosaic Virus', 'Septoria', 'Spider Mites', 'Yellow Leaf Curl Virus'
]

In [4]:
def z_score_normalize(image):
    mean = np.mean(image)
    std = np.std(image)
    if std == 0: std = 1e-6
    return (image - mean) / std

def load_and_preprocess_image(path):
    image = cv2.imread(path)
    if image is None:
        raise FileNotFoundError(f"Image not found: {path}")
    image = cv2.resize(image, IMAGE_SIZE)
    image = image.astype(np.float32)
    return z_score_normalize(image)

def load_images_from_dataframe(df):
    X, Y = [], []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        try:
            img = load_and_preprocess_image(row['filepath'])
            X.append(img)
            Y.append(row['labels'])
        except FileNotFoundError as e:
            print(e)
    return np.array(X, dtype=np.float32), np.array(Y, dtype=np.float32)

In [5]:
def prepare_dataframe(folder_path):
    df = pd.read_csv(os.path.join(folder_path, '_classes.csv'))
    df.columns = df.columns.str.strip()
    df['filename'] = df['filename'].str.strip()
    df['filepath'] = df['filename'].apply(lambda x: os.path.join(folder_path, x).replace('\\', '/'))
    df = df[df['filepath'].apply(os.path.exists)].reset_index(drop=True)
    df['labels'] = df[LABEL_COLUMNS].values.tolist()
    return df

In [6]:
print("Loading CSVs...")
df_train = prepare_dataframe("train")
df_valid = prepare_dataframe("valid")
df_test  = prepare_dataframe("test")

Loading CSVs...


In [7]:
print("Loading images...")
X_train, Y_train = load_images_from_dataframe(df_train)
X_valid, Y_valid = load_images_from_dataframe(df_valid)
X_test, Y_test   = load_images_from_dataframe(df_test)

Loading images...


100%|██████████| 9039/9039 [00:21<00:00, 411.35it/s]
100%|██████████| 843/843 [00:08<00:00, 99.22it/s] 
100%|██████████| 165/165 [00:01<00:00, 105.84it/s]
