In [2]:
import cv2
import numpy as np
import os
import math
import pandas as pd
import time

# --- Configuration ---
# IMPORTANT: ADJUST THESE PATHS
IMAGE_FOLDER = "C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/good_data" # Your image directory
OUTPUT_CSV_PATH = "lane_detection_features.csv" # Name of the CSV file to save features

# IMPORTANT: PASTE YOUR OPTIMAL PARAMETERS HERE
# These values should come from pressing 's' in the previous tuning script
# and copying the printed output.
HARDCODED_PARAMETERS = {
    # HSV Color Space
    "hsv_lower": np.array([5, 100, 100]),
    "hsv_upper": np.array([22, 255, 255]),

    # CLAHE Parameters
    "clahe_clip_limit": 93.0, # Example: from trackbar 93 -> 93.0 (or just 93)
    "clahe_tile_grid_size": 8, # Example: from trackbar 8 -> 8

    # Processing Scale (0.01 to 1.0)
    "processing_scale_percent": 1.0, # Example: from trackbar 100 -> 1.0

    # Morphological Kernel for Color Mask
    "color_morph_kernel_size": 5,

    # Edge Pre-processing Filter
    "use_bilateral_filter": 1, # 1 for True, 0 for False
    "bilateral_d": 9,
    "bilateral_sigma_color": 75,
    "bilateral_sigma_space": 75,

    # Canny Edge Detector Thresholds
    "canny_thresh1": 50,
    "canny_thresh2": 150,

    # Morphological Kernel for Edge Mask
    "edge_morph_kernel_size": 7,

    # Hough Line Transform Parameters
    "hough_threshold": 50,
    "hough_min_length": 30,
    "hough_max_gap": 50,

    # Line Filtering Parameters
    "max_line_angle_deg": 20, # Filter lines by angle (e.g., 20 degrees from horizontal)

    # Image Cropping Parameters (in percentage of original image dimensions)
    "crop_percent_top": 75,
    "crop_percent_left": 0,
    "crop_percent_right": 0,
}

# --- Feature Extraction Function ---
def process_image_and_extract_features(image_path, params):
    """
    Processes a single image using the given parameters and extracts features.
    Returns a dictionary of features for this image.
    """
    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"Skipping: Could not read image {os.path.basename(image_path)}")
            return None

        original_h, original_w, _ = image.shape

        # --- 1. Apply Cropping ---
        crop_y_start = int(original_h * params["crop_percent_top"] / 100)
        crop_x_start = int(original_w * params["crop_percent_left"] / 100)
        crop_x_end = original_w - int(original_w * params["crop_percent_right"] / 100)
        if crop_x_start >= crop_x_end: # Fallback for invalid crop
            crop_x_start = 0
            crop_x_end = original_w
        if crop_y_start >= original_h: # Fallback for invalid crop
            crop_y_start = original_h - 1

        cropped_image = image[crop_y_start:original_h, crop_x_start:crop_x_end].copy()
        cropped_h, cropped_w, _ = cropped_image.shape

        # --- 2. Resize for Processing Performance ---
        processing_width = int(cropped_w * params["processing_scale_percent"])
        processing_height = int(cropped_h * params["processing_scale_percent"])
        if processing_width < 1: processing_width = 1
        if processing_height < 1: processing_height = 1

        cropped_image_for_processing = cv2.resize(cropped_image, (processing_width, processing_height), interpolation=cv2.INTER_LINEAR)

        # --- 3. Illumination Normalization (CLAHE) ---
        hsv_image = cv2.cvtColor(cropped_image_for_processing, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv_image)
        
        clahe = cv2.createCLAHE(clipLimit=params["clahe_clip_limit"],
                                tileGridSize=(params["clahe_tile_grid_size"], params["clahe_tile_grid_size"]))
        v_clahe = clahe.apply(v)
        
        normalized_hsv = cv2.merge([h, s, v_clahe])
        normalized_image = cv2.cvtColor(normalized_hsv, cv2.COLOR_HSV2BGR)

        # --- 4. Color Masking (HSV on CLAHE-processed image) ---
        hsv_normalized = cv2.cvtColor(normalized_image, cv2.COLOR_BGR2HSV)
        lower_hsv = params["hsv_lower"]
        upper_hsv = params["hsv_upper"]
        color_mask = cv2.inRange(hsv_normalized, lower_hsv, upper_hsv)

        # --- 5. Morphological Operations on the Color Mask ---
        color_morph_kernel = np.ones((params["color_morph_kernel_size"], params["color_morph_kernel_size"]), np.uint8)
        color_mask_morphed = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, color_morph_kernel, iterations=1)
        color_mask_morphed = cv2.dilate(color_mask_morphed, color_morph_kernel, iterations=1)
        color_mask_morphed = cv2.morphologyEx(color_mask_morphed, cv2.MORPH_CLOSE, color_morph_kernel, iterations=1)
        
        # --- 6. Edge Detection (Canny) ---
        gray_normalized = cv2.cvtColor(normalized_image, cv2.COLOR_BGR2GRAY)
        
        if params["use_bilateral_filter"]:
            filtered_gray = cv2.bilateralFilter(gray_normalized, params["bilateral_d"],
                                                 params["bilateral_sigma_color"], params["bilateral_sigma_space"])
        else:
            filtered_gray = cv2.GaussianBlur(gray_normalized, (5, 5), 0)

        edge_mask = cv2.Canny(filtered_gray, params["canny_thresh1"], params["canny_thresh2"])
        edge_morph_kernel = np.ones((params["edge_morph_kernel_size"], params["edge_morph_kernel_size"]), np.uint8)
        edge_mask_morphed = cv2.morphologyEx(edge_mask, cv2.MORPH_CLOSE, edge_morph_kernel, iterations=1)

        # --- 7. Combine Masks ---
        final_mask = cv2.bitwise_and(color_mask_morphed, edge_mask_morphed)

        # --- 8. Hough Line Transform ---
        lines = cv2.HoughLinesP(final_mask, 1, np.pi / 180,
                                params["hough_threshold"],
                                minLineLength=params["hough_min_length"],
                                maxLineGap=params["hough_max_gap"])

        # --- 9. Feature Extraction ---
        features = {
            "filename": os.path.basename(image_path),
            "deviation_score": 0.0, # Default to 0.0
            "lane_centroid_x_cropped_px": 0.0, # Default to 0.0
            "num_detected_lines": 0,
            "avg_line_slope_deg": 0.0, # Default to 0.0
            "avg_line_length_px": 0.0, # Default to 0.0
            "avg_line_x_at_bottom_px": 0.0, # Default to 0.0
            "avg_line_x_at_top_px": 0.0, # Default to 0.0
            "final_mask_white_pixels": np.count_nonzero(final_mask),
            "final_mask_centroid_x_px": 0.0, # Default to 0.0
            "final_mask_centroid_y_px": 0.0, # Default to 0.0
        }

        # Calculate mask centroid if there are white pixels
        if features["final_mask_white_pixels"] > 0:
            # Find coordinates of all white pixels
            coords = cv2.findNonZero(final_mask)
            # Calculate centroid
            mask_cx = int(np.mean(coords[:,:,0]))
            mask_cy = int(np.mean(coords[:,:,1]))
            # Scale mask centroid back to cropped_image dimensions for consistency
            scale_x_mask = cropped_w / processing_width
            scale_y_mask = cropped_h / processing_height
            features["final_mask_centroid_x_px"] = int(mask_cx * scale_x_mask)
            features["final_mask_centroid_y_px"] = int(mask_cy * scale_y_mask)

        filtered_lines_data = [] # Store (x1, y1, x2, y2, angle_deg, length)
        if lines is not None:
            # Scale coordinates back to original cropped_image dimensions
            scale_x_line = cropped_w / processing_width
            scale_y_line = cropped_h / processing_height
            
            for line in lines:
                x1_scaled, y1_scaled, x2_scaled, y2_scaled = line[0]
                
                x1 = int(x1_scaled * scale_x_line)
                y1 = int(y1_scaled * scale_y_line)
                x2 = int(x2_scaled * scale_x_line)
                y2 = int(y2_scaled * scale_y_line)

                # Ensure no division by zero for angle calculation
                if (x2 - x1) == 0 and (y2 - y1) == 0:
                    angle_deg = 0.0 # Or some other default for a zero-length line
                else:
                    angle_rad = math.atan2(y2 - y1, x2 - x1)
                    angle_deg = math.degrees(angle_rad)
                
                length = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)

                # Filter lines by angle
                # Normalize angle to be between -90 and 90 relative to vertical if preferred,
                # or consider angles close to 0 and 180 (horizontal lines for deviation detection)
                # Current logic for filtering: horizontal lines within a degree range
                is_valid_angle = False
                normalized_angle = abs(angle_deg % 180) # Angle relative to horizontal (0-180)
                if normalized_angle > 90: # Convert to 0-90 range, considering vertical as 90
                    normalized_angle = 180 - normalized_angle
                
                # Check if the line is close to horizontal (for lane lines) or vertical
                # The original `max_line_angle_deg` seems to imply filtering based on angle from horizontal.
                # If your lanes are typically more vertical, you might want to filter based on angle from vertical.
                # Let's assume you want lines that are *mostly horizontal* but within a tolerance.
                # Re-evaluating the original angle logic: `abs(angle_deg) < params["max_line_angle_deg"]`
                # This filters for lines very close to horizontal (0 degrees).
                # `abs(angle_deg - 180) < params["max_line_angle_deg"]` for lines pointing left.
                # `abs(angle_deg + 180) < params["max_line_angle_deg"]` for lines pointing right (negative angle).

                # For lane lines, we usually expect them to be more vertical in the image,
                # or at least having a significant slope.
                # If `max_line_angle_deg` is 20, it means lines between -20 and 20 degrees from horizontal
                # are kept, and lines between 160 and 200 (which are also near horizontal).
                # This seems counter-intuitive for typical lane lines (which are often more vertical).
                # Assuming `max_line_angle_deg` is meant to filter out *truly horizontal* noise
                # and you want lines with a slope, then the angle filtering should be different.
                # Let's re-interpret: Perhaps `max_line_angle_deg` is meant to be the *maximum deviation from vertical*.
                # If it's maximum deviation from vertical, the angle should be close to +/- 90 degrees.
                
                # For lane detection, typically, you look for lines that are close to +/- 90 degrees (vertical)
                # relative to the image frame, or with a substantial slope.
                # Let's consider filtering based on angle from vertical.
                # A vertical line has an angle of +/- 90 degrees.
                # So if `max_line_angle_deg` is the tolerance from *vertical*:
                # If `angle_deg` is close to 90 or -90 (i.e., within 90 +/- max_line_angle_deg)
                
                # Let's stick to the original interpretation, but clarify. If lanes are mostly vertical,
                # you'd want to check abs(abs(angle_deg) - 90) < params["max_line_angle_deg"].
                # If your lanes are often flat on the ground, then the original interpretation of
                # checking deviation from 0 or 180 degrees is correct.
                # Given typical camera angles, lane lines will have a significant slope,
                # appearing somewhat vertical. Let's adjust the angle check to be more robust for *sloped* lines.

                # This is a common point of confusion. Assuming `max_line_angle_deg`
                # means "lines should be within this range from being purely horizontal or purely vertical"
                # A common filter for lane lines is to ensure they have a significant non-zero slope.
                # For example, if angle_deg is too close to 0 or 180 (horizontal) or too close to 90 or -90 (vertical).
                # Let's assume you want lines that are *not* horizontal (angle near 0 or 180)
                # and *not* perfectly vertical (angle near 90 or -90).

                # If the intention of `max_line_angle_deg` was to filter out horizontal noise:
                # Keep lines if abs(angle_deg) > max_line_angle_deg AND abs(angle_deg) < (180 - max_line_angle_deg)
                # This would keep lines with a certain slope.

                # However, the current code checks `abs(angle_deg) < params["max_line_angle_deg"]`
                # or `abs(angle_deg - 180) < params["max_line_angle_deg"]`.
                # This means it's KEEPING lines that are *close to horizontal*.
                # If your lanes are mostly horizontal, this is fine. If they are mostly vertical, this is backwards.

                # Let's assume the intent is that lane lines have *some slope* and are not flat.
                # A common filter is to keep lines with an angle further from 0 and 180 (horizontal).
                # Example: abs(angle_deg) > X degrees AND abs(angle_deg) < (180 - X) degrees.

                # For simplicity and to directly address the "no line detected" case leading to NaN,
                # I'll stick to the existing angle filtering logic, but it's worth re-evaluating
                # if your lanes are typically more vertical.

                # Re-reading the original angle filter: it *keeps* lines that are nearly horizontal.
                # If this is the case, `deviation_score` would be for *horizontal* lanes.
                # If your "lane" is a horizontal marker, then this filtering is correct.
                # If your "lane" is a path forward, then you likely want lines with significant slope.
                # For now, I'll proceed with the assumption that the original filter is intended to keep
                # lines that are very close to horizontal or 180 degrees.
                if (abs(angle_deg) < params["max_line_angle_deg"] or
                    abs(angle_deg - 180) < params["max_line_angle_deg"] or
                    abs(angle_deg + 180) < params["max_line_angle_deg"]):
                    filtered_lines_data.append((x1, y1, x2, y2, angle_deg, length))
        
        if filtered_lines_data:
            features["num_detected_lines"] = len(filtered_lines_data)
            
            all_midpoints_x = []
            all_angles = []
            all_lengths = []
            all_bottom_xs = [] # X-coordinate at y=cropped_h-1
            all_top_xs = []    # X-coordinate at y=0

            for x1, y1, x2, y2, angle_deg, length in filtered_lines_data:
                all_midpoints_x.append((x1 + x2) // 2)
                all_angles.append(angle_deg)
                all_lengths.append(length)

                # Calculate X-coordinate at the bottom (cropped_h-1) and top (0) of the cropped image
                if abs(x2 - x1) > 0.1: # Not a perfectly vertical line (avoid div by zero)
                    slope = (y2 - y1) / (x2 - x1)
                    if abs(slope) > 1e-6: # Avoid division by zero for horizontal lines
                        # X at bottom (y=cropped_h-1)
                        x_at_bottom = ((cropped_h - 1) - y1) / slope + x1
                        all_bottom_xs.append(x_at_bottom)

                        # X at top (y=0)
                        x_at_top = (0 - y1) / slope + x1
                        all_top_xs.append(x_at_top)
                    else: # Nearly horizontal line, use average X
                        avg_x = (x1 + x2) / 2
                        all_bottom_xs.append(avg_x)
                        all_top_xs.append(avg_x)
                else: # Vertical line
                    all_bottom_xs.append(x1) # For vertical line, x is constant
                    all_top_xs.append(x1)

            features["lane_centroid_x_cropped_px"] = float(np.mean(all_midpoints_x)) # Ensure float
            features["avg_line_slope_deg"] = float(np.mean(all_angles)) # Ensure float
            features["avg_line_length_px"] = float(np.mean(all_lengths)) # Ensure float
            
            if all_bottom_xs:
                features["avg_line_x_at_bottom_px"] = float(np.mean(all_bottom_xs))
            if all_top_xs:
                features["avg_line_x_at_top_px"] = float(np.mean(all_top_xs))

            # Calculate deviation score relative to the center of the cropped image
            center_x_cropped = cropped_w // 2
            if center_x_cropped != 0:
                features["deviation_score"] = (features["lane_centroid_x_cropped_px"] - center_x_cropped) / center_x_cropped
            else:
                features["deviation_score"] = 0.0 # Fallback for extremely narrow images

    except Exception as e:
        print(f"Error processing {os.path.basename(image_path)}: {e}")
        # If an error occurs, return a dictionary with default values
        # This prevents the program from crashing and allows the CSV to be written
        return {
            "filename": os.path.basename(image_path),
            "deviation_score": 0.0,
            "lane_centroid_x_cropped_px": 0.0,
            "num_detected_lines": 0,
            "avg_line_slope_deg": 0.0,
            "avg_line_length_px": 0.0,
            "avg_line_x_at_bottom_px": 0.0,
            "avg_line_x_at_top_px": 0.0,
            "final_mask_white_pixels": 0,
            "final_mask_centroid_x_px": 0.0,
            "final_mask_centroid_y_px": 0.0,
        }

    return features

# --- Main Script Execution ---
def main():
    image_files = [f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff"))]
    if not image_files:
        print(f"No images found in {IMAGE_FOLDER}. Exiting.")
        return

    print(f"Found {len(image_files)} images in {IMAGE_FOLDER}.")
    print(f"Processing and extracting features...")

    all_features = []
    total_images = len(image_files)

    for i, filename in enumerate(image_files):
        print(f"Processing image {i+1}/{total_images}: {filename}")
        image_path = os.path.join(IMAGE_FOLDER, filename)
        
        start_time = time.time()
        features = process_image_and_extract_features(image_path, HARDCODED_PARAMETERS)
        end_time = time.time()
        
        if features:
            all_features.append(features)
            # The deviation score is now guaranteed to be a number (0.0 if no lines)
            print(f"  Extracted features. Time: {end_time - start_time:.4f}s. Deviation: {features['deviation_score']:.2f}")
        else:
            # This 'else' block for `if features:` will now only be hit if `cv2.imread` fails
            # or if the initial `try-except` block returned `None` which is now replaced
            # by a default dictionary. So this 'else' is less likely to be hit unless `return None`
            # logic is reintroduced. For robustness, it's good to keep it.
            print(f"  Failed to extract features for {filename} (returned None).")

    if all_features:
        df = pd.DataFrame(all_features)
        df.to_csv(OUTPUT_CSV_PATH, index=False)
        print(f"\nSuccessfully extracted features for {len(all_features)} images to {OUTPUT_CSV_PATH}")
        print("CSV columns: ", df.columns.tolist())
    else:
        print("\nNo features were extracted. Check image folder and parameters.")

if __name__ == "__main__":
    main()

Found 1291 images in C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/good_data.
Processing and extracting features...
Processing image 1/1291: 00420478.png
  Extracted features. Time: 0.1261s. Deviation: 0.00
Processing image 2/1291: 00420523.png
  Extracted features. Time: 0.1347s. Deviation: 0.00
Processing image 3/1291: 00420564.png
  Extracted features. Time: 0.1266s. Deviation: 0.00
Processing image 4/1291: 00420604.png
  Extracted features. Time: 0.1394s. Deviation: 0.00
Processing image 5/1291: 00420644.png
  Extracted features. Time: 0.1417s. Deviation: 0.00
Processing image 6/1291: 00420684.png
  Extracted features. Time: 0.1432s. Deviation: 0.00
Processing image 7/1291: 00420725.png
  Extracted features. Time: 0.1264s. Deviation: 0.00
Processing image 8/1291: 00420765.png
  Extracted features. Time: 0.1343s. Deviation: 0.00
Processing image 9/1291: 00420803.png
  Extracted features. Time: 0.1254s. Deviation: -0.91
Processing image 10/1291: 00420841.png
  Extracted featu