In [22]:
import cv2
import numpy as np
import os
import pandas as pd

# --- USER-DEFINED OPTIMIZED PARAMETERS ---
# REPLACE THESE VALUES with the "perfect variables" you found during tuning!
OPTIMIZED_PARAMS = {
    "lower_L": 137,
    "upper_L": 255,
    "lower_A": 134,
    "upper_A": 161,
    "lower_B": 138,
    "upper_B": 165,
    "color_morph_kernel_size": 3,
    "edge_morph_kernel_size": 7,
    "canny_thresh1": 18,
    "canny_thresh2": 66,
    "hough_threshold": 57,       # Min votes for a line
    "hough_min_length": 18,      # Min line length
    "hough_max_gap": 17,         # Max gap to connect segments
    "crop_percent": 55,
    "line_center_tolerance_percent": 10 # Percentage of image width for "center" tolerance
}

# --- Configuration ---
image_folder = "C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/good_data"
output_csv_path = "line_detection_features.csv" # Output CSV file name

# List to store features and labels for each image
all_image_data = []

# --- Main Processing Loop ---
image_files = [f for f in os.listdir(image_folder) if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff"))]

if len(image_files) == 0:
    print(f"No images found in {image_folder}. Please check the path or file extensions.")
else:
    print(f"Processing {len(image_files)} images from {image_folder}...")

for filename in image_files:
    filepath = os.path.join(image_folder, filename)
    image = cv2.imread(filepath)

    if image is None:
        print(f"❌ Cannot read {filename}. Skipping.")
        continue

    current_features = {"filename": filename}
    
    # --- 1. Crop from top ---
    crop_y = int(image.shape[0] * OPTIMIZED_PARAMS["crop_percent"] / 100)
    if crop_y >= image.shape[0]:
        crop_y = image.shape[0] - 1
    cropped_image = image[crop_y:, :].copy()
    
    # Check if cropped image is valid before proceeding
    if cropped_image.shape[0] == 0 or cropped_image.shape[1] == 0:
        print(f"Warning: Cropped image for {filename} is empty. Skipping feature extraction.")
        # Fill with default/zero features for this image
        current_features.update({
            "line_label": 2, # No line detected due to empty crop
            "cx": -1,
            "num_detected_lines": 0,
            "avg_line_length": 0,
            "total_line_length": 0,
            "std_line_length": 0,
            "avg_line_angle_deg": 0,
            "std_line_angle_deg": 0,
            "line_cx_mean": -1,
            "line_cx_std": 0,
            "line_cy_mean": -1,
            "longest_line_length": 0,
            "longest_line_angle_deg": 0,
            "mask_pixel_count": 0,
            "mask_area_ratio": 0,
            "mask_centroid_x_norm": 0.5, # Assume center if no mask
            "mask_centroid_y_norm": 0.5,
            "mask_hu_moment_1": 0, "mask_hu_moment_2": 0, "mask_hu_moment_3": 0,
            "mask_hu_moment_4": 0, "mask_hu_moment_5": 0, "mask_hu_moment_6": 0, "mask_hu_moment_7": 0,
            "color_mask_pixel_count": 0,
            "color_mask_area_ratio": 0,
            "is_line_detected_binary": 0
        })
        all_image_data.append(current_features)
        continue

    # --- 2. Color Masking (L*a*b*) ---
    lab_cropped = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2LAB)
    l_channel, a_channel, b_channel = cv2.split(lab_cropped)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    l_eq = clahe.apply(l_channel)
    lab_eq = cv2.merge([l_eq, a_channel, b_channel])
    blurred_lab_eq = cv2.medianBlur(lab_eq, 5)

    lower_orange_lab = np.array([OPTIMIZED_PARAMS["lower_L"], OPTIMIZED_PARAMS["lower_A"], OPTIMIZED_PARAMS["lower_B"]])
    upper_orange_lab = np.array([OPTIMIZED_PARAMS["upper_L"], OPTIMIZED_PARAMS["upper_A"], OPTIMIZED_PARAMS["upper_B"]])
    color_mask = cv2.inRange(blurred_lab_eq, lower_orange_lab, upper_orange_lab)

    # --- 3. Morphological Operations on Color Mask ---
    color_morph_kernel = np.ones((OPTIMIZED_PARAMS["color_morph_kernel_size"], OPTIMIZED_PARAMS["color_morph_kernel_size"]), np.uint8)
    color_mask_morphed = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, color_morph_kernel, iterations=1)
    color_mask_morphed = cv2.dilate(color_mask_morphed, color_morph_kernel, iterations=1) 
    color_mask_morphed = cv2.morphologyEx(color_mask_morphed, cv2.MORPH_CLOSE, color_morph_kernel, iterations=1)
    
    # --- 4. Edge Detection (Canny) ---
    gray_cropped = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    blurred_gray = cv2.GaussianBlur(gray_cropped, (5, 5), 0)
    edge_mask = cv2.Canny(blurred_gray, OPTIMIZED_PARAMS["canny_thresh1"], OPTIMIZED_PARAMS["canny_thresh2"])

    # Morphological Operations on Edge Mask to connect broken edges
    edge_morph_kernel = np.ones((OPTIMIZED_PARAMS["edge_morph_kernel_size"], OPTIMIZED_PARAMS["edge_morph_kernel_size"]), np.uint8)
    edge_mask_morphed = cv2.morphologyEx(edge_mask, cv2.MORPH_CLOSE, edge_morph_kernel, iterations=1)

    # --- 5. Combine Color Mask and Morphed Edge Mask ---
    final_mask = cv2.bitwise_and(color_mask_morphed, edge_mask_morphed)

    # --- 6. Line Detection with Hough Transform ---
    lines = cv2.HoughLinesP(final_mask, 1, np.pi / 180, 
                            OPTIMIZED_PARAMS["hough_threshold"], 
                            minLineLength=OPTIMIZED_PARAMS["hough_min_length"], 
                            maxLineGap=OPTIMIZED_PARAMS["hough_max_gap"])
    
    # --- Feature Extraction and Labeling ---
    line_label = 2  # Default: No Line Detected
    cx = -1 # Default centroid x-coordinate
    
    img_width = image.shape[1]
    center_x_img = img_width // 2
    tolerance = img_width * OPTIMIZED_PARAMS["line_center_tolerance_percent"] / 100

    # Initialize line features
    num_detected_lines = 0
    avg_line_length = 0
    total_line_length = 0
    std_line_length = 0
    avg_line_angle_deg = 0
    std_line_angle_deg = 0
    line_cx_mean = -1
    line_cx_std = 0
    line_cy_mean = -1
    longest_line_length = 0
    longest_line_angle_deg = 0
    is_line_detected_binary = 0

    if lines is not None:
        is_line_detected_binary = 1
        num_detected_lines = len(lines)
        
        all_line_midpoints_x = []
        all_line_midpoints_y = []
        line_lengths = []
        line_angles_rad = []
        
        # Track longest line for a specific feature
        max_length_found = 0
        angle_of_longest_line = 0

        for line in lines:
            x1, y1, x2, y2 = line[0]
            
            # Line length
            length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            line_lengths.append(length)

            if length > max_length_found:
                max_length_found = length
                angle_of_longest_line = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi # Angle in degrees

            # Line angle (in radians, then convert to degrees for feature)
            angle_rad = np.arctan2(y2 - y1, x2 - x1)
            line_angles_rad.append(angle_rad)

            # Midpoints
            mid_x = (x1 + x2) // 2
            mid_y = (y1 + y2) // 2 # Y is relative to cropped_image, not original
            all_line_midpoints_x.append(mid_x)
            all_line_midpoints_y.append(mid_y)
        
        # Calculate aggregated line features
        if line_lengths:
            avg_line_length = np.mean(line_lengths)
            total_line_length = np.sum(line_lengths)
            std_line_length = np.std(line_lengths) if len(line_lengths) > 1 else 0
            longest_line_length = max_length_found
            longest_line_angle_deg = angle_of_longest_line
        
        if line_angles_rad:
            # Angles from atan2 range from -pi to pi. Normalize to 0 to 180 for line orientation
            normalized_angles_deg = [angle % 180 for angle in np.degrees(line_angles_rad)]
            avg_line_angle_deg = np.mean(normalized_angles_deg)
            std_line_angle_deg = np.std(normalized_angles_deg) if len(normalized_angles_deg) > 1 else 0

        if all_line_midpoints_x:
            cx = int(np.mean(all_line_midpoints_x))
            line_cx_mean = cx
            line_cx_std = np.std(all_line_midpoints_x) if len(all_line_midpoints_x) > 1 else 0
            line_cy_mean = int(np.mean(all_line_midpoints_y)) # Mean Y relative to cropped, used for feature

            if abs(cx - cropped_image.shape[1] // 2) < tolerance: # Use cropped image center
                line_label = 0  # Line in Center
            elif cx < cropped_image.shape[1] // 2 - tolerance:
                line_label = -1 # Line on Left
            else:
                line_label = 1  # Line on Right
        else: # No lines found, but `lines` was not None (e.g. empty array)
            line_label = 2
    else: # No lines found (lines is None)
        line_label = 2

    # --- Features from Final Mask ---
    mask_pixel_count = np.sum(final_mask > 0)
    mask_area_ratio = mask_pixel_count / (final_mask.shape[0] * final_mask.shape[1]) if (final_mask.shape[0] * final_mask.shape[1]) > 0 else 0

    # Calculate moments for centroid and Hu Moments (shape descriptors)
    M = cv2.moments(final_mask)
    if M["m00"] != 0:
        mask_centroid_x = M["m10"] / M["m00"]
        mask_centroid_y = M["m01"] / M["m00"]
        mask_centroid_x_norm = mask_centroid_x / final_mask.shape[1] # Normalized to [0,1]
        mask_centroid_y_norm = mask_centroid_y / final_mask.shape[0] # Normalized to [0,1]
        hu_moments = cv2.HuMoments(M).flatten()
        current_features.update({
            "mask_centroid_x_norm": mask_centroid_x_norm,
            "mask_centroid_y_norm": mask_centroid_y_norm,
            "mask_hu_moment_1": hu_moments[0], "mask_hu_moment_2": hu_moments[1], "mask_hu_moment_3": hu_moments[2],
            "mask_hu_moment_4": hu_moments[3], "mask_hu_moment_5": hu_moments[4], "mask_hu_moment_6": hu_moments[5], "mask_hu_moment_7": hu_moments[6],
        })
    else:
        current_features.update({
            "mask_centroid_x_norm": 0.5, # Default to center if no mask
            "mask_centroid_y_norm": 0.5,
            "mask_hu_moment_1": 0, "mask_hu_moment_2": 0, "mask_hu_moment_3": 0,
            "mask_hu_moment_4": 0, "mask_hu_moment_5": 0, "mask_hu_moment_6": 0, "mask_hu_moment_7": 0,
        })
    
    # --- Features from Morphed Color Mask ---
    color_mask_pixel_count = np.sum(color_mask_morphed > 0)
    color_mask_area_ratio = color_mask_pixel_count / (color_mask_morphed.shape[0] * color_mask_morphed.shape[1]) if (color_mask_morphed.shape[0] * color_mask_morphed.shape[1]) > 0 else 0

    # --- Store all features for this image ---
    current_features.update({
        "line_label": line_label,
        "cx": cx, # This is the x-coordinate used for labeling, relative to cropped image
        "num_detected_lines": num_detected_lines,
        "avg_line_length": avg_line_length,
        "total_line_length": total_line_length,
        "std_line_length": std_line_length,
        "avg_line_angle_deg": avg_line_angle_deg,
        "std_line_angle_deg": std_line_angle_deg,
        "line_cx_mean": line_cx_mean, # Mean of x-coords of line midpoints (relative to cropped image)
        "line_cx_std": line_cx_std,
        "line_cy_mean": line_cy_mean, # Mean of y-coords of line midpoints (relative to cropped image)
        "longest_line_length": longest_line_length,
        "longest_line_angle_deg": longest_line_angle_deg,
        "mask_pixel_count": mask_pixel_count,
        "mask_area_ratio": mask_area_ratio,
        "color_mask_pixel_count": color_mask_pixel_count,
        "color_mask_area_ratio": color_mask_area_ratio,
        "is_line_detected_binary": is_line_detected_binary,
        # Potentially add color features from original image if needed, e.g., mean L, A, B in cropped region
    })
    all_image_data.append(current_features)

# --- Save Features to CSV ---
if all_image_data:
    df = pd.DataFrame(all_image_data)
    df.to_csv(output_csv_path, index=False)
    print(f"\n✅ Feature extraction complete! Data saved to '{output_csv_path}'")
    print(f"Total images processed: {len(df)}")
    print(f"Label Distribution:\n{df['line_label'].value_counts()}")
else:
    print("\nNo images were processed, so no CSV file was generated.")

Processing 1291 images from C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/good_data...
❌ Cannot read 00424873.png. Skipping.
❌ Cannot read 00424913.png. Skipping.
❌ Cannot read 00424954.png. Skipping.
❌ Cannot read 00424995.png. Skipping.
❌ Cannot read 00425279.png. Skipping.
❌ Cannot read 00425317.png. Skipping.
❌ Cannot read 00425357.png. Skipping.
❌ Cannot read 00425396.png. Skipping.
❌ Cannot read 00425435.png. Skipping.
❌ Cannot read 00426295.png. Skipping.
❌ Cannot read 00426826.png. Skipping.
❌ Cannot read 00426867.png. Skipping.
❌ Cannot read 00426908.png. Skipping.
❌ Cannot read 00426949.png. Skipping.
❌ Cannot read 00427113.png. Skipping.
❌ Cannot read 00427317.png. Skipping.
❌ Cannot read 00428804.png. Skipping.
❌ Cannot read 00428845.png. Skipping.
❌ Cannot read 00428923.png. Skipping.
❌ Cannot read 00428996.png. Skipping.
❌ Cannot read 00429032.png. Skipping.
❌ Cannot read 00429068.png. Skipping.
❌ Cannot read 00429106.png. Skipping.
❌ Cannot read 00429144.png. Ski

Extracted features from 1151 images.
Feature matrix shape: (1151, 2)
Labels vector shape: (1151,)
Features and labels saved to C:/Users/BCI-Lab/Downloads/teamA_dataset/_out_dataset/good_data\features_labels.csv


(1151, 2)

Classification report:
               precision    recall  f1-score   support

          -1       1.00      1.00      1.00        10
           0       1.00      1.00      1.00       142
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        61

    accuracy                           1.00       231
   macro avg       1.00      1.00      1.00       231
weighted avg       1.00      1.00      1.00       231

Confusion matrix:
 [[ 10   0   0   0]
 [  0 142   0   0]
 [  0   0  18   0]
 [  0   0   0  61]]


label
 0    567
 2    241
 1     73
-1     39
Name: count, dtype: int64
label
 0    142
 2     61
 1     18
-1     10
Name: count, dtype: int64
Training Accuracy: 1.0000
Validation Accuracy: 1.0000
