# Importing Libraries

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import timm
import torchvision.models as models
import torchvision.transforms as transforms
import cv2
from PIL import Image
from skimage.feature import hog, local_binary_pattern

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
DATASETS = {
    "1_UCI_Dataset": "./1_UCI_Dataset",
    "2_Rice_Leaf_Disease_Images": "./2_Rice_Leaf_Disease_Images",
    "3_Rice_Disease_Image_Dataset": "./3_Rice_Disease_Image_Dataset",
}

# Ensure output directories exist
os.makedirs("dataset_features", exist_ok=True)
for path in DATASETS.values():
    os.makedirs(path, exist_ok=True)

In [9]:
# Define image size
image_size = (224, 224)

# Define CNN models
cnn_models = {
    'ResNet-50': models.resnet50(pretrained=True),
    'SE-ResNet-50': timm.create_model('seresnet50', pretrained=True),
    'ResNeXt-50': timm.create_model('resnext50_32x4d', pretrained=True),
    'ResNeSt-50': timm.create_model('resnest50d', pretrained=True),
}

In [10]:
# Image transformations
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [11]:
# Load CNN model and remove last FC layer
def load_cnn_model(model_name):
    if model_name not in cnn_models:
        raise ValueError(f"Model '{model_name}' not found in supported models!")

    model = cnn_models[model_name]
    model = nn.Sequential(*list(model.children())[:-1])  # Remove last FC layer
    model.eval()
    return model

# Extract CNN features
def extract_cnn_features(image, model):
    image = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = model(image)
    return features.flatten().numpy()

# Extract handcrafted features (HOG, LBP, Color Histogram)
def extract_handcrafted_features(image):
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # HOG Features
    hog_features = hog(image_gray, pixels_per_cell=(16, 16), cells_per_block=(1, 1), feature_vector=True)

    # LBP Features
    lbp = local_binary_pattern(image_gray, P=24, R=3, method="uniform")
    lbp_hist, _ = np.histogram(lbp, bins=np.arange(0, 27), density=True)

    # Color Histogram (RGB)
    hist_r = cv2.calcHist([image], [0], None, [256], [0, 256]).flatten()
    hist_g = cv2.calcHist([image], [1], None, [256], [0, 256]).flatten()
    hist_b = cv2.calcHist([image], [2], None, [256], [0, 256]).flatten()

    # Normalize and concatenate features
    hist_rgb = np.concatenate([hist_r, hist_g, hist_b]) / np.linalg.norm(hist_r)
    
    return np.concatenate([hog_features, lbp_hist, hist_rgb])

In [24]:
# Process dataset for all CNN models
def process_dataset(dataset_name, dataset_path):
    print(f"\n📂 Extracting features from: {dataset_name}")

    # Create subdirectories for each model inside dataset folder
    model_dirs = {}
    for model_name in cnn_models.keys():
        model_dir = os.path.join("dataset_features", dataset_name, model_name)
        os.makedirs(model_dir, exist_ok=True)
        model_dirs[model_name] = model_dir

    # Initialize feature storage for all classes (Fix: Move this outside class loop)
    features_per_model = {model: [] for model in cnn_models.keys()}
    labels_per_model = {model: [] for model in cnn_models.keys()}
    label_mapping = {}  # To map class names to numeric labels
    class_id = 0

    # Iterate over class folders (Diseases)
    for class_name in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_name)

        if not os.path.isdir(class_path):
            continue  # Skip non-directory files

        print(f"🔹 Processing class: {class_name}")

        # Assign a numeric label to this class
        if class_name not in label_mapping:
            label_mapping[class_name] = class_id
            class_id += 1

        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)

            # Read image
            image = cv2.imread(img_path)
            if image is None:
                print(f"❌ Skipping corrupt or unreadable image: {img_path}")
                continue

            # Extract handcrafted features
            handcrafted_feat = extract_handcrafted_features(image)

            # Extract features from each CNN model
            for model_name in cnn_models.keys():
                model = load_cnn_model(model_name)
                cnn_feat = extract_cnn_features(Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)), model)

                # Combine CNN and handcrafted features
                combined_features = np.concatenate([cnn_feat, handcrafted_feat])

                features_per_model[model_name].append(combined_features)
                labels_per_model[model_name].append(label_mapping[class_name])  # Store numeric label

    # Save features for each model **after processing all classes**
    for model_name in cnn_models.keys():
        if features_per_model[model_name]:  # Ensure there's data before saving
            np.save(os.path.join(model_dirs[model_name], "features.npy"), np.array(features_per_model[model_name]))
            np.save(os.path.join(model_dirs[model_name], "labels.npy"), np.array(labels_per_model[model_name]))
            print(f"✅ Features saved in {dataset_name}/{model_name}/ (Features per image: {len(features_per_model[model_name][0])})")

In [25]:
for dataset_name, dataset_path in DATASETS.items():
    process_dataset(dataset_name, dataset_path)

print("\n🎉 Feature extraction completed for all datasets!")


📂 Extracting features from: 1_UCI_Dataset
🔹 Processing class: Bacterial leaf blight
🔹 Processing class: Brown spot
🔹 Processing class: Leaf smut
✅ Features saved in 1_UCI_Dataset/ResNet-50/ (Features per image: 4606)
✅ Features saved in 1_UCI_Dataset/SE-ResNet-50/ (Features per image: 4606)
✅ Features saved in 1_UCI_Dataset/ResNeXt-50/ (Features per image: 4606)
✅ Features saved in 1_UCI_Dataset/ResNeSt-50/ (Features per image: 4606)

📂 Extracting features from: 2_Rice_Leaf_Disease_Images
🔹 Processing class: Bacterialblight
🔹 Processing class: Blast
🔹 Processing class: Brownspot
🔹 Processing class: Tungro
✅ Features saved in 2_Rice_Leaf_Disease_Images/ResNet-50/ (Features per image: 4606)
✅ Features saved in 2_Rice_Leaf_Disease_Images/SE-ResNet-50/ (Features per image: 4606)
✅ Features saved in 2_Rice_Leaf_Disease_Images/ResNeXt-50/ (Features per image: 4606)
✅ Features saved in 2_Rice_Leaf_Disease_Images/ResNeSt-50/ (Features per image: 4606)

📂 Extracting features from: 3_Rice_Disea