In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.feature import graycomatrix, graycoprops

def extract_features(image_path):
    """Extract color and texture features from an image"""
    img = cv2.imread(image_path)
    img = cv2.resize(img, (128, 128))  # Resize for consistency
    
    features = []
    
    # Color features (RGB and HSV histograms)
    for channel in range(3):
        hist = cv2.calcHist([img], [channel], None, [16], [0, 256])
        features.extend(hist.flatten())
        
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    for channel in range(3):
        hist = cv2.calcHist([hsv], [channel], None, [16], [0, 256])
        features.extend(hist.flatten())
    
    # Texture features (GLCM)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    texture_features = [
        graycoprops(glcm, 'contrast').item(),
        graycoprops(glcm, 'energy').item(),
        graycoprops(glcm, 'homogeneity').item(),
        graycoprops(glcm, 'correlation').item()
    ]
    features.extend(texture_features)
    
    return features

def create_dataset_csv(root_dir, output_csv):
    """Convert image directory structure to CSV"""
    rows = []
    
    splits = ['train', 'test', 'val']
    classes = ['Green', 'Green_Yellow', 'Yellow']
    
    for split in splits:
        split_path = os.path.join(root_dir, split, 'img')
        for cls in classes:
            class_path = os.path.join(split_path, cls)
            for img_name in tqdm(os.listdir(class_path), desc=f"Processing {split}/{cls}"):
                img_path = os.path.join(class_path, img_name)
                try:
                    features = extract_features(img_path)
                    rows.append({
                        'split': split,
                        'label': cls,
                        'features': features,
                        'image_path': img_path
                    })
                except Exception as e:
                    print(f"Error processing {img_path}: {str(e)}")
    
    # Create DataFrame
    df = pd.DataFrame(rows)
    
    # Expand features into separate columns
    feature_cols = [f'feature_{i}' for i in range(len(features))]
    df[feature_cols] = pd.DataFrame(df['features'].tolist(), index=df.index)
    df.drop(columns=['features'], inplace=True)
    
    # Save to CSV
    df.to_csv(output_csv, index=False)
    print(f"Dataset saved to {output_csv} with {len(df)} entries")

# Usage
create_dataset_csv('ripeness', 'ripeness_dataset2.csv')

Processing train/Green: 100%|██████████| 668/668 [00:03<00:00, 210.64it/s]
Processing train/Green_Yellow: 100%|██████████| 348/348 [00:01<00:00, 212.28it/s]
Processing train/Yellow: 100%|██████████| 363/363 [00:01<00:00, 204.65it/s]
Processing test/Green: 100%|██████████| 135/135 [00:00<00:00, 189.81it/s]
Processing test/Green_Yellow: 100%|██████████| 81/81 [00:00<00:00, 224.43it/s]
Processing test/Yellow: 100%|██████████| 80/80 [00:00<00:00, 234.01it/s]
Processing val/Green: 100%|██████████| 137/137 [00:00<00:00, 224.59it/s]
Processing val/Green_Yellow: 100%|██████████| 82/82 [00:00<00:00, 234.97it/s]
Processing val/Yellow: 100%|██████████| 79/79 [00:00<00:00, 236.72it/s]
  df[feature_cols] = pd.DataFrame(df['features'].tolist(), index=df.index)


Dataset saved to ripeness_dataset2.csv with 1973 entries
