# Importing Libraries

In [42]:
import os
import cv2
from collections import defaultdict

import torch
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np
import os
from skimage.feature import local_binary_pattern, hog
from collections import defaultdict
from PIL import Image

import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score


# Resizing

In [5]:
# Define dataset paths
train_dir = "Dataset_Final/Train"
test_dir = "Dataset_Final/test"

In [6]:
# Function to check images in a directory
def check_images(directory):
    image_sizes = defaultdict(int)
    invalid_images = []
    class_distribution = defaultdict(int)

    for class_folder in os.listdir(directory):
        class_path = os.path.join(directory, class_folder)
        
        if not os.path.isdir(class_path):
            continue  # Skip non-directory files
        
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            
            # Read image using OpenCV
            img = cv2.imread(img_path)
            
            if img is None:
                invalid_images.append(img_path)
                continue
            
            height, width, _ = img.shape
            image_sizes[(width, height)] += 1
            class_distribution[class_folder] += 1

    return image_sizes, invalid_images, class_distribution


In [7]:
# Run checks on train and test sets
train_sizes, train_invalids, train_classes = check_images(train_dir)
test_sizes, test_invalids, test_classes = check_images(test_dir)

# Print Results
print("\n--- Train Dataset Checks ---")
print("Unique Image Sizes:", train_sizes)
print("Corrupt Images:", train_invalids)
print("Class Distribution:", train_classes)

print("\n--- Test Dataset Checks ---")
print("Unique Image Sizes:", test_sizes)
print("Corrupt Images:", test_invalids)
print("Class Distribution:", test_classes)

# If images are not the same size, suggest resizing them
if len(train_sizes) > 1 or len(test_sizes) > 1:
    print("\n⚠️ Warning: Images have inconsistent sizes. Consider resizing before training.")


--- Train Dataset Checks ---
Unique Image Sizes: defaultdict(<class 'int'>, {(300, 300): 1268, (480, 640): 4269, (640, 480): 2, (1200, 1600): 271, (224, 224): 3393, (1600, 1200): 13, (1200, 929): 1, (1200, 949): 1, (455, 607): 1, (720, 1600): 2, (4000, 3000): 2, (1000, 469): 1, (1269, 1200): 1, (390, 1270): 1, (3000, 4000): 3, (1283, 962): 1, (438, 1074): 1, (1185, 693): 1, (2448, 3264): 7, (589, 786): 1, (778, 1037): 1, (761, 810): 1, (1046, 1200): 1, (1203, 1200): 1, (1106, 830): 1, (1018, 763): 1})
Corrupt Images: []
Class Distribution: defaultdict(<class 'int'>, {'BacterialBlight': 1268, 'BacterialLeafBlight': 1200, 'BacterialLeafStreak': 1200, 'Blast': 1391, 'BrownSpot': 1200, 'Normal': 1786, 'SheathBlight': 1201})

--- Test Dataset Checks ---
Unique Image Sizes: defaultdict(<class 'int'>, {(300, 300): 316, (480, 640): 1053, (640, 480): 2, (224, 224): 869, (1200, 1600): 62, (1600, 1200): 2, (1200, 1072): 1, (829, 966): 1, (1200, 823): 1, (570, 760): 1})
Corrupt Images: []
Class D

In [13]:
output_base_dir = "Data224"  
if not os.path.exists(output_base_dir):
    os.makedirs(output_base_dir)

In [14]:
def resize_and_save_images(source_dir, target_dir, target_size=(224, 224)):
    os.makedirs(target_dir, exist_ok=True)  # Create target directory if it doesn't exist

    for class_folder in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_folder)
        target_class_path = os.path.join(target_dir, class_folder)

        if not os.path.isdir(class_path):
            continue  # Skip non-directory files

        os.makedirs(target_class_path, exist_ok=True)  # Create class folder in target directory

        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            target_img_path = os.path.join(target_class_path, img_file)

            # Read image
            img = cv2.imread(img_path)
            if img is None:
                print(f"Skipping corrupt image: {img_path}")
                continue

            # Resize image
            resized_img = cv2.resize(img, target_size)

            # Save image in new folder
            cv2.imwrite(target_img_path, resized_img)

In [15]:
# Resize train and test images
resize_and_save_images(train_dir, os.path.join(output_base_dir, "train"))
resize_and_save_images(test_dir, os.path.join(output_base_dir, "test"))

print("✅ All images resized and saved in 'Data/' successfully!")

✅ All images resized and saved in 'Data/' successfully!


In [16]:
new_train_dir = "Data224/train"
new_test_dir = "Data224/test"

In [17]:
# Run checks on train and test sets
train_sizes, train_invalids, train_classes = check_images(new_train_dir)
test_sizes, test_invalids, test_classes = check_images(new_test_dir)

# Print Results
print("\n--- Train Dataset Checks ---")
print("Unique Image Sizes:", train_sizes)
print("Corrupt Images:", train_invalids)
print("Class Distribution:", train_classes)

print("\n--- Test Dataset Checks ---")
print("Unique Image Sizes:", test_sizes)
print("Corrupt Images:", test_invalids)
print("Class Distribution:", test_classes)

# If images are not the same size, suggest resizing them
if len(train_sizes) > 1 or len(test_sizes) > 1:
    print("\n⚠️ Warning: Images have inconsistent sizes. Consider resizing before training.")


--- Train Dataset Checks ---
Unique Image Sizes: defaultdict(<class 'int'>, {(224, 224): 9246})
Corrupt Images: []
Class Distribution: defaultdict(<class 'int'>, {'BacterialBlight': 1268, 'BacterialLeafBlight': 1200, 'BacterialLeafStreak': 1200, 'Blast': 1391, 'BrownSpot': 1200, 'Normal': 1786, 'SheathBlight': 1201})

--- Test Dataset Checks ---
Unique Image Sizes: defaultdict(<class 'int'>, {(224, 224): 2308})
Corrupt Images: []
Class Distribution: defaultdict(<class 'int'>, {'BacterialBlight': 316, 'BacterialLeafBlight': 300, 'BacterialLeafStreak': 300, 'Blast': 347, 'BrownSpot': 300, 'Normal': 446, 'SheathBlight': 299})


# HFF Based Classification

In [29]:
train_dir = "Data224/train"
test_dir = "Data224/test"
image_size = (224, 224)

In [30]:
# Load Pretrained ResNet50
resnet_model = models.resnet50(pretrained=True)
resnet_model = nn.Sequential(*list(resnet_model.children())[:-1])  # Remove last FC layer
resnet_model.eval()  # Set to evaluation mode



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [None]:
# Image Transformations for ResNet50
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [23]:
# Function to extract ResNet features
def extract_resnet_features(image):
    image = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = resnet_model(image)
    return features.flatten().numpy()

In [24]:
# Function to extract handcrafted features (HOG, LBP, Color Histogram)
def extract_handcrafted_features(image):
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # HOG Feature Extraction
    hog_features = hog(image_gray, pixels_per_cell=(16, 16), cells_per_block=(1, 1), feature_vector=True)

    # LBP Feature Extraction
    lbp = local_binary_pattern(image_gray, P=24, R=3, method="uniform")
    lbp_hist, _ = np.histogram(lbp, bins=np.arange(0, 27), density=True)

    # Color Histogram (RGB)
    hist_r = cv2.calcHist([image], [0], None, [256], [0, 256]).flatten()
    hist_g = cv2.calcHist([image], [1], None, [256], [0, 256]).flatten()
    hist_b = cv2.calcHist([image], [2], None, [256], [0, 256]).flatten()

    # Normalize and concatenate features
    hist_rgb = np.concatenate([hist_r, hist_g, hist_b]) / np.linalg.norm(hist_r)
    
    return np.concatenate([hog_features, lbp_hist, hist_rgb])

In [31]:
# Function to process dataset (train or test)
def process_dataset(dataset_dir, dataset_type):
    features = []
    labels = []
    class_mapping = {}
    class_id = 0

    for class_folder in os.listdir(dataset_dir):
        class_path = os.path.join(dataset_dir, class_folder)
        if not os.path.isdir(class_path):
            continue  # Skip non-directory files

        if class_folder not in class_mapping:
            class_mapping[class_folder] = class_id
            class_id += 1

        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            image = cv2.imread(img_path)

            if image is None:
                print(f"Skipping invalid image: {img_path}")
                continue
            
            # Extract ResNet and Handcrafted Features
            resnet_feat = extract_resnet_features(Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)))
            handcrafted_feat = extract_handcrafted_features(image)
            
            # Combine both feature sets
            combined_features = np.concatenate([resnet_feat, handcrafted_feat])

            features.append(combined_features)
            labels.append(class_mapping[class_folder])

    # Convert to NumPy arrays and save
    features = np.array(features)
    labels = np.array(labels)
    
    np.save(f"hff_features_{dataset_type}.npy", features)
    np.save(f"hff_labels_{dataset_type}.npy", labels)
    
    print(f"✅ {dataset_type.capitalize()} Feature Extraction Completed! Extracted {features.shape[1]} features per image.")


In [32]:
# Process both train and test datasets
process_dataset(train_dir, "train")
process_dataset(test_dir, "test")

✅ Train Feature Extraction Completed! Extracted 4606 features per image.
✅ Test Feature Extraction Completed! Extracted 4606 features per image.


In [34]:
X_train = np.load("hff_features_train.npy")
y_train = np.load("hff_labels_train.npy")
X_test = np.load("hff_features_test.npy")
y_test = np.load("hff_labels_test.npy")

In [35]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [43]:
# Create DataLoaders
batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

In [44]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [45]:
# Define the Neural Network Classifier
class HFFClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(HFFClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x

In [46]:
# Initialize Model
input_size = X_train.shape[1]
num_classes = len(set(y_train.numpy()))
model = HFFClassifier(input_size, num_classes)

# Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [47]:
# Training Loop
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct_train = 0
    total_train = 0

    # Training phase
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # Compute training accuracy
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == targets).sum().item()
        total_train += targets.size(0)

    train_accuracy = correct_train / total_train * 100  # Train Accuracy %

    # Evaluation phase (Test Accuracy)
    model.eval()
    correct_test = 0
    total_test = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            correct_test += (predicted == targets).sum().item()
            total_test += targets.size(0)

    test_accuracy = correct_test / total_test * 100  # Test Accuracy %

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}, "
          f"Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

print("✅ Training Completed!")

# Save Model
torch.save(model.state_dict(), "hff_model.pth")
print("✅ Model Saved Successfully!")

Epoch [1/30], Loss: 1.5902, Train Accuracy: 57.60%, Test Accuracy: 56.85%
Epoch [2/30], Loss: 1.4832, Train Accuracy: 68.17%, Test Accuracy: 73.31%
Epoch [3/30], Loss: 1.4347, Train Accuracy: 73.12%, Test Accuracy: 72.05%
Epoch [4/30], Loss: 1.4298, Train Accuracy: 73.72%, Test Accuracy: 74.52%
Epoch [5/30], Loss: 1.4148, Train Accuracy: 74.98%, Test Accuracy: 69.19%
Epoch [6/30], Loss: 1.4005, Train Accuracy: 76.49%, Test Accuracy: 77.69%
Epoch [7/30], Loss: 1.4035, Train Accuracy: 76.12%, Test Accuracy: 76.65%
Epoch [8/30], Loss: 1.4015, Train Accuracy: 76.35%, Test Accuracy: 76.95%
Epoch [9/30], Loss: 1.3921, Train Accuracy: 77.20%, Test Accuracy: 76.86%
Epoch [10/30], Loss: 1.3771, Train Accuracy: 78.81%, Test Accuracy: 78.90%
Epoch [11/30], Loss: 1.3809, Train Accuracy: 78.47%, Test Accuracy: 77.47%
Epoch [12/30], Loss: 1.3706, Train Accuracy: 79.45%, Test Accuracy: 80.16%
Epoch [13/30], Loss: 1.3643, Train Accuracy: 80.19%, Test Accuracy: 73.66%
Epoch [14/30], Loss: 1.3720, Train

In [48]:
from sklearn.metrics import accuracy_score

# Evaluate Model
model.eval()
y_pred = []

with torch.no_grad():
    for inputs, _ in test_loader:
        outputs = model(inputs)
        predictions = torch.argmax(outputs, dim=1).numpy()
        y_pred.extend(predictions)

# Compute Accuracy
accuracy = accuracy_score(y_test.numpy(), np.array(y_pred))
print(f"✅ Test Accuracy: {accuracy * 100:.2f}%")

✅ Test Accuracy: 86.53%
