In [3]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# Dataset path
dataset_path = r"./Rice_Image_Dataset"
categories = ["arborio", "basmati", "ipsala", "jasmine", "karacadag","Goan Asago"]

data = []
labels = []

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if image is None:
        print(f"Error: Unable to read image {image_path}")
        return None  # Ensure function doesn't break

    image2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(image2gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        contour = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        x, y, w, h = cv2.boundingRect(contour)
        
        aspectRatio = w / h if h > 0 else 0
        extent = area / (w * h) if w * h > 0 else 0
        
        hull = cv2.convexHull(contour)
        convexArea = cv2.contourArea(hull)
        
        compactness = (perimeter ** 2) / area if area > 0 else 0
        roundness = (4 * np.pi * area) / (perimeter ** 2) if perimeter > 0 else 0

        # Ensure the contour has at least 5 points for ellipse fitting
        if len(contour) >= 5:
            ellipse = cv2.fitEllipse(contour)
            majorAxis, minorAxis = max(ellipse[1]), min(ellipse[1])
            eccentricity = np.sqrt(1 - (minorAxis / majorAxis) ** 2) if majorAxis > 0 else 0
        else:
            majorAxis = minorAxis = eccentricity = 0
    else:
        majorAxis = minorAxis = eccentricity = area = perimeter = aspectRatio = extent = convexArea = compactness = roundness = 0

    return [area, perimeter, majorAxis, minorAxis, eccentricity,
            convexArea, extent, aspectRatio, compactness, roundness]

def process_image(file_path, category):
    try:
        features = extract_features(file_path)
        if features is not None:
            return features, category
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
    return None

# Multithreading for faster processing
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = []
    for category in tqdm(categories, desc="Processing Categories"):
        folder_path = os.path.join(dataset_path, category)
        if not os.path.exists(folder_path):
            print(f"Folder not found: {folder_path}")
            continue

        file_list = os.listdir(folder_path)
        for file_name in file_list:
            file_path = os.path.join(folder_path, file_name)
            if os.path.isfile(file_path):
                future = executor.submit(process_image, file_path, category)
                futures.append(future)

    for future in tqdm(as_completed(futures), total=len(futures), desc="Extracting Features"):
        result = future.result()
        if result:
            features, category = result
            data.append(features)
            labels.append(category)

Processing Categories: 100%|█████████████████████████████████████████████████████████████| 6/6 [00:41<00:00,  6.86s/it]
Extracting Features: 100%|█████████████████████████████████████████████████████| 78000/78000 [00:26<00:00, 2961.29it/s]


In [7]:
import pandas as pd
features_names=["Area", "Perimeter", "Major Axis Length", "Minor Axis Length", "Eccentricity",
                 "Convex Area", "Extent", "Aspect Ratio", "Compactness", "Roundness"]

df=pd.DataFrame(data,columns=features_names)
df['class']=labels
df.to_csv("rice.csv",index=False)