# Assignment 6 - Group Assignment 1

## A bit of info before you start.


> ***First and foremost, dont use AI to code. Use it to understand and then write code yourself. Use AI to verify if the code you wrote it correct, that will help you learn far far more.***

> ***Use cv2 functions only or atleast wherever possible.***

> **This assignment marks 1/3rd of your project. And hence, it is the first part of the final submission from your team.**







## Create a DataLoader Class

Use the pytorch docs sent on group and deepen your understanding using sample practice, or even AI.

Finally, create a dataloader for the Fruits-360 dataset ( https://www.kaggle.com/datasets/moltean/fruits ).
In <code>__getitem__</code> , include the processing to create LBP image, canny image, and find the 6 color features and 6 shape features.
Best implementation should include a proper division of tasks between the method itself and utility functions.
Also brainstorm about the data structures you use to process and pass the info. Use cv2 functions for best performance.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm
import matplotlib.pyplot as plt
import pandas as pd
import cv2 as cv2
import numpy as np
!pip install kaggle
import os
import zipfile




In [3]:
class ImageProcessingUtils:

    @staticmethod
    def compute_canny_edges(image, threshold1=100, threshold2=200):
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
        edges = cv2.Canny(gray, threshold1, threshold2)
        return edges

    @staticmethod
    def get_lbp_image(image):
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        lbp = np.zeros_like(gray, dtype=np.uint8)
        neighbours = [(-1,-1),(-1,0),(-1,1),(0,1),
                      (1,1),(1,0),(1,-1),(0,-1)]

        for idx, (dy, dx) in enumerate(neighbours):
            shifted = np.roll(np.roll(gray, dy, axis=0), dx, axis=1)
            # Explicitly cast to uint8 before bitwise operations
            lbp |= (((shifted >= gray).astype(np.uint8)) << (7 - idx))

        return lbp

    @staticmethod
    def get_binary_mask(image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        _, binary = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)

        kernel = np.ones((3, 3), np.uint8)
        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

        return binary

In [4]:
from IPython.core.formatters import Configurable
class Fruits360Dataset(Dataset):
    def __init__(self, root_dir, image_size=100):
        self.dataset = ImageFolder(root_dir)
        self.image_size = image_size

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        # ImageFolder gives PIL image
        pil_image, label = self.dataset[idx]

        # Convert PIL -> NumPy
        image = np.array(pil_image)

        # Convert RGB -> BGR for OpenCV
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Resize using OpenCV
        image = cv2.resize(image, (self.image_size, self.image_size))

        # ---- Preprocessing ----
        lbp_image = ImageProcessingUtils.get_lbp_image(image)
        canny_image = ImageProcessingUtils.compute_canny_edges(image)
        binary_mask = ImageProcessingUtils.get_binary_mask(image)
        binary_mask = torch.from_numpy(binary_mask).unsqueeze(0)

        # Normalize original image
        image = image.astype(np.float32) / 255.0

        # Convert to torch tensors
        image = torch.from_numpy(image).permute(2, 0, 1)
        lbp_image = torch.from_numpy(lbp_image).unsqueeze(0)
        canny_image = torch.from_numpy(canny_image).unsqueeze(0)

        sample = {
            "image": image,
            "lbp": lbp_image,
            "canny": canny_image,
            "mask": binary_mask,
            "label": label
        }

        return sample


In [5]:
import kagglehub

path = kagglehub.dataset_download("moltean/fruits")
print(path)


Using Colab cache for faster access to the 'fruits' dataset.
/kaggle/input/fruits


In [6]:
import os
print(os.listdir(path))


['fruits-360_100x100', 'fruits-360_3-body-problem', 'fruits-360_dataset_meta', 'fruits-360_original-size', 'fruits-360_multi']


In [7]:
base_path = os.path.join(path, "fruits-360_100x100")
print(os.listdir(base_path))


['fruits-360']


In [8]:
root_dir = os.path.join(path, "fruits-360_100x100", "fruits-360", "Training")

dataset = Fruits360Dataset(root_dir=root_dir)

dataloader = DataLoader(
    dataset=dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)


In [9]:
len(dataset)

124716

In [10]:
sample = dataset[4865]
image = sample['image']
label = sample['label']
image

tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]])

In [11]:
sample = dataset[100]
image = sample['image']
label = sample['label']
image.shape

torch.Size([3, 100, 100])

In [12]:
for batch in dataloader:
  images = batch['image']
  labels = batch['label']
  print(images.shape)
  print(labels.shape)
  break

torch.Size([32, 3, 100, 100])
torch.Size([32])


In [13]:
import cv2
import numpy as np
import math


def get_fruit_mask(image_bgr: np.ndarray) -> np.ndarray:
    # Convert to HSV
    hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV)
    # White background has low saturation and high value
    # Keep pixels that are NOT white
    lower = np.array([0, 30, 0])     # allow colors
    upper = np.array([180, 255, 255])
    mask = cv2.inRange(hsv, lower, upper)
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

    return mask


def extract_color_features(image_bgr: np.ndarray, mask: np.ndarray = None) -> np.ndarray:

    hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV)

    h, s, v = cv2.split(hsv)

    if mask is None:
        mask = get_fruit_mask(image_bgr)

    h_pixels = h[mask > 0]
    s_pixels = s[mask > 0]
    v_pixels = v[mask > 0]

    if len(h_pixels) == 0:
        return np.zeros(6, dtype=np.float32)

    mean_hue = np.mean(h_pixels)
    std_hue = np.std(h_pixels)

    mean_sat = np.mean(s_pixels)
    std_sat = np.std(s_pixels)

    mean_val = np.mean(v_pixels)
    std_val = np.std(v_pixels)


    color_features = np.array(
        [
            mean_hue,
            std_hue,
            mean_sat,
            std_sat,
            mean_val,
            std_val,
        ],
        dtype=np.float32,
    )

    return color_features

In [14]:
def extract_shape_features(image_bgr: np.ndarray) -> np.ndarray:
    mask = get_fruit_mask(image_bgr)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Step 1: Generate fruit mask
    mask = get_fruit_mask(image_bgr)

    # Step 2: Find contours from the mask
    contours, _ = cv2.findContours(
        mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )

    if len(contours) == 0:
        return np.zeros(6, dtype=np.float32)

    cnt = max(contours, key=cv2.contourArea)
    area = cv2.contourArea(cnt)

    h, w = mask.shape
    area_ratio = area / (h * w)

    x, y, bw, bh = cv2.boundingRect(cnt)
    aspect_ratio = bw / bh if bh != 0 else 0

    hull = cv2.convexHull(cnt)
    hull_area = cv2.contourArea(hull)
    solidity = area / hull_area if hull_area > 0 else 0

    perimeter = cv2.arcLength(cnt, True)
    circularity = (4 * math.pi * area) / (perimeter * perimeter + 1e-6)

    moments = cv2.moments(cnt)
    hu = cv2.HuMoments(moments)
    hu1 = -np.sign(hu[0]) * np.log10(abs(hu[0]))
    hu2 = -np.sign(hu[1]) * np.log10(abs(hu[1]))

    return np.array([area_ratio, aspect_ratio, solidity, circularity, hu1[0], hu2[0]], dtype=np.float32)


In [15]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

train_path = os.path.join(path, "fruits-360_100x100", "fruits-360", "Training")
test_path = os.path.join(path, "fruits-360_100x100", "fruits-360", "Test")

print("Train Path:", train_path)
print("Test Path:", test_path)

# Dataset init
train_dataset = Fruits360Dataset(train_path)
test_dataset = Fruits360Dataset(test_path)

# Train
train_idx, val_idx = train_test_split(
    np.arange(len(train_dataset)),
    test_size=0.1,
    shuffle=True,
    random_state=42
)

train_subset = Subset(train_dataset, train_idx)
val_subset = Subset(train_dataset, val_idx)

print("Training Samples:", len(train_subset))
print("Validation Samples:", len(val_subset))
print("Test Samples:", len(test_dataset))

# Dataloader
train_loader = DataLoader(
    train_subset,
    batch_size=64,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_subset,
    batch_size=64,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("DataLoaders Ready!")

batch = next(iter(train_loader))
for k, v in batch.items():
    if torch.is_tensor(v):
        print(k, v.shape)
    else:
        print(k, v)


Train Path: /kaggle/input/fruits/fruits-360_100x100/fruits-360/Training
Test Path: /kaggle/input/fruits/fruits-360_100x100/fruits-360/Test
Training Samples: 112244
Validation Samples: 12472
Test Samples: 41577
DataLoaders Ready!




image torch.Size([64, 3, 100, 100])
lbp torch.Size([64, 1, 100, 100])
canny torch.Size([64, 1, 100, 100])
mask torch.Size([64, 1, 100, 100])
label torch.Size([64])
