In [7]:
import cv2
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from PIL import Image
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops

## Shape Features

In [8]:
def get_shape_features(img):
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Apply thresholding
    ret, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    features = {}
    
    if contours:
        # Get the largest contour
        largest = max(contours, key=cv2.contourArea)
        
        # Shape metrics
        area = cv2.contourArea(largest)
        perimeter = cv2.arcLength(largest, True)
        
        # Circularity: 4*pi*area/perimeter^2 (1 for perfect circle, less for other shapes)
        if perimeter > 0:
            features['circularity'] = 4 * np.pi * area / (perimeter ** 2)
        else:
            features['circularity'] = 0
            
        # Aspect ratio of bounding rectangle
        x, y, w, h = cv2.boundingRect(largest)
        features['aspect_ratio'] = float(w) / h if h > 0 else 0
        
        # Convexity: ratio of contour perimeter to convex hull perimeter
        hull = cv2.convexHull(largest)
        hull_perimeter = cv2.arcLength(hull, True)
        features['convexity'] = perimeter / hull_perimeter if hull_perimeter > 0 else 0
        
        # Solidity: ratio of contour area to convex hull area
        hull_area = cv2.contourArea(hull)
        features['solidity'] = area / hull_area if hull_area > 0 else 0
        
        # Extent: ratio of contour area to bounding rectangle area
        features['extent'] = area / (w * h) if (w * h) > 0 else 0
        
        # Calculate moments for shape properties
        moments = cv2.moments(largest)
        
        # Calculate center of mass
        if moments['m00'] != 0:
            cx = moments['m10'] / moments['m00']
            cy = moments['m01'] / moments['m00']
            
            # Normalized central moments
            features['norm_central_moment_20'] = moments['nu20']
            features['norm_central_moment_11'] = moments['nu11']
            features['norm_central_moment_02'] = moments['nu02']
            features['norm_central_moment_30'] = moments['nu30']
        else:
            features['norm_central_moment_20'] = 0
            features['norm_central_moment_11'] = 0
            features['norm_central_moment_02'] = 0
            features['norm_central_moment_30'] = 0
        
    else:
        # If no contours found, set default values
        features['circularity'] = 0
        features['aspect_ratio'] = 0
        features['convexity'] = 0
        features['solidity'] = 0
        features['extent'] = 0
        features['norm_central_moment_20'] = 0
        features['norm_central_moment_11'] = 0
        features['norm_central_moment_02'] = 0
        features['norm_central_moment_30'] = 0
    
    return features

## Texture Features

In [9]:
def get_texture_features(img):
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    features = {}
    
    # Parameters for LBP
    radius = 2
    n_points = 8 * radius
    
    # Compute LBP
    lbp = local_binary_pattern(gray, n_points, radius, method="uniform")
    
    # Calculate histogram of LBP values
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-10)  # Normalize
    
    # Instead of using all histogram bins, use statistical measures derived from LBP
    features['lbp_entropy'] = -np.sum(hist * np.log2(hist + 1e-10))
    features['lbp_energy'] = np.sum(hist ** 2)
    features['lbp_max'] = np.max(hist)
    features['lbp_uniformity'] = np.sum(hist * hist)
    
    # GLCM (Gray-Level Co-occurrence Matrix) features
    # Scale down to fewer gray levels to reduce computation
    gray_scaled = (gray / 32).astype(np.uint8)
    
    # Calculate GLCM 
    distances = [1]
    angles = [0, np.pi/2]  # Just horizontal and vertical directions
    glcm = graycomatrix(gray_scaled, distances, angles, levels=8, symmetric=True, normed=True)
    
    # Calculate GLCM properties
    properties = ['contrast', 'homogeneity', 'correlation', 'energy']
    for prop in properties:
        prop_values = graycoprops(glcm, prop).flatten()
        for i, value in enumerate(prop_values):
            features[f'glcm_{prop}_{i}'] = value
    
    # Haralick texture features
    # Compute image gradients for texture directionality
    gx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    gy = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    
    # Magnitude and angle
    mag, angle = cv2.cartToPolar(gx, gy)
    
    # Directional statistics
    features['gradient_mean_magnitude'] = np.mean(mag)
    features['gradient_std_magnitude'] = np.std(mag)
    
    # Compute histogram of oriented gradients (simplified, not full HOG)
    angle_bins = 8
    angle_hist, _ = np.histogram(angle, bins=angle_bins, range=(0, 2*np.pi))
    angle_hist = angle_hist.astype("float")
    angle_hist /= (angle_hist.sum() + 1e-10)  # Normalize
    
    # Gradient direction statistics
    features['gradient_direction_entropy'] = -np.sum(angle_hist * np.log2(angle_hist + 1e-10))
    features['gradient_direction_energy'] = np.sum(angle_hist ** 2)
    
    return features

## Main Function

In [10]:
def load_image(path):
    img = Image.open(path).convert("RGB")
    return np.array(img)

In [11]:
feature_extractors = {
    'shape': get_shape_features,
    'texture': get_texture_features,
}

In [12]:
def extract_features(mode, feature_type):
    meta_path = os.path.join(mode, f"{mode}_metadata.csv")
    df = pd.read_csv(meta_path)

    all_features = []
    feature_extractor = feature_extractors.get(feature_type)

    for path in tqdm(df['image_path'], desc=f"Extracting shape features for {mode}"):
        full_path = os.path.join(mode, path)
        try:
            img = load_image(full_path)
            features = feature_extractor(img)
            features['image_path'] = path
            all_features.append(features)
        except Exception as e:
            print(f"Error processing {path}: {e}")
            all_features.append({'image_path': path})

    features_df = pd.DataFrame(all_features)
    cols = ['image_path'] + [col for col in features_df.columns if col != 'image_path']
    features_df = features_df[cols]

    output_dir = os.path.join(mode, "Features")
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f"{feature_type}_features.csv")
    features_df.to_csv(output_path, index=False)

In [13]:
extract_features('train', 'shape')
extract_features('test', 'shape')

Extracting shape features for train: 100%|██████████| 5488/5488 [00:04<00:00, 1358.12it/s]
Extracting shape features for test: 100%|██████████| 2353/2353 [00:01<00:00, 1374.64it/s]


In [14]:
extract_features('train', 'texture')
extract_features('test', 'texture')

Extracting shape features for train: 100%|██████████| 5488/5488 [00:11<00:00, 471.85it/s]
Extracting shape features for test: 100%|██████████| 2353/2353 [00:04<00:00, 493.13it/s]
