In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import cv2
import json
import os
import numpy as np
from google.colab import drive
from tqdm import tqdm

In [3]:

def detect_error_bar_endpoints(image_path, data_points_json):
    """
    Detects upper and lower error bar endpoints for given data points.
    Approach: Vertical Intensity Profiling along the x-coordinate. [cite: 27]
    """
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None

    # Thresholding: Convert dark error bars to white (255) on black (0)
    # Synthetic plots usually have very clean white backgrounds (255)
    _, thresh = cv2.threshold(img, 220, 255, cv2.THRESH_BINARY_INV)

    output_error_bars = []

    # Process each line/group [cite: 20]
    for line in data_points_json.get("data_points", []):
        line_entry = {"lineName": line["lineName"], "points": []}

        for pt in line["points"]:
            x_orig, y_orig = pt["x"], pt["y"]
            ix, iy = int(round(x_orig)), int(round(y_orig))

            # Boundary constraints
            h, w = thresh.shape
            ix = max(0, min(ix, w - 1))
            iy = max(0, min(iy, h - 1))

            # --- Detect Upper Error Bar (Scan UP = Decreasing Y) --- [cite: 20]
            upper_y = iy
            # Check a 3px width to handle slight anti-aliasing or 1px shifts
            while upper_y > 0:
                roi = thresh[upper_y - 1, max(0, ix-1):min(w, ix+2)]
                if np.any(roi == 255):
                    upper_y -= 1
                else:
                    break

            # --- Detect Lower Error Bar (Scan DOWN = Increasing Y) --- [cite: 20]
            lower_y = iy
            while lower_y < h - 1:
                roi = thresh[lower_y + 1, max(0, ix-1):min(w, ix+2)]
                if np.any(roi == 255):
                    lower_y += 1
                else:
                    break

            # Append in the mandatory output format
            line_entry["points"].append({
                "data_point": {"x": x_orig, "y": y_orig},
                "upper_error_bar": {"x": x_orig, "y": float(upper_y)},
                "lower_error_bar": {"x": x_orig, "y": float(lower_y)}
            })

        output_error_bars.append(line_entry)

    return output_error_bars

# --- CONFIGURATION --- [cite: 6]
# Update these paths to match your Google Drive folder structure
BASE_PATH = '/content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2'
IMAGES_DIR = os.path.join(BASE_PATH, 'images')
LABELS_DIR = os.path.join(BASE_PATH, 'labels')
RESULTS_DIR = os.path.join(BASE_PATH, 'detection_results')

os.makedirs(RESULTS_DIR, exist_ok=True)

# --- EXECUTION LOOP --- [cite: 17]
label_files = [f for f in os.listdir(LABELS_DIR) if f.endswith('.json')]
print(f"Processing {len(label_files)} files...")

for filename in tqdm(label_files):
    try:
        with open(os.path.join(LABELS_DIR, filename), 'r') as f:
            input_data = json.load(f)

        image_name = input_data["image_file"]
        image_path = os.path.join(IMAGES_DIR, image_name)

        # Run detection pipeline [cite: 19]
        detected_data = detect_error_bar_endpoints(image_path, input_data)

        if detected_data:
            # Construct final JSON object
            final_output = {
                "image_file": image_name,
                "error_bars": detected_data
            }

            # Save the result to Google Drive [cite: 31, 34]
            with open(os.path.join(RESULTS_DIR, filename), 'w') as f:
                json.dump(final_output, f, indent=2)

    except Exception as e:
        print(f"Error in {filename}: {e}")

print(f"Pipeline finished. Results saved to {RESULTS_DIR}")

Processing 3001 files...


100%|██████████| 3001/3001 [23:45<00:00,  2.10it/s]

Pipeline finished. Results saved to /content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/detection_results





In [3]:
import json
import os
import numpy as np
from tqdm import tqdm

# --- CONFIGURATION ---
# Path to detection results from Assignment 2 [cite: 22, 45]
RESULTS_DIR = '/content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/detection_results'
# Path to original labels containing true distances (from Assignment 1) [cite: 5, 11]
GROUND_TRUTH_DIR = '/content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/labels'

def evaluate_performance(pixel_tolerance=2.0):
    """
    Calculates MAE and Accuracy based on pixel distance.
    """
    result_files = [f for f in os.listdir(RESULTS_DIR) if f.endswith('.json')]

    total_y_error = 0
    total_points = 0
    successful_detections = 0

    print(f"Evaluating {len(result_files)} files...")

    for filename in tqdm(result_files):
        try:
            with open(os.path.join(RESULTS_DIR, filename), 'r') as f:
                det = json.load(f)
            with open(os.path.join(GROUND_TRUTH_DIR, filename), 'r') as f:
                tru = json.load(f)

            # Compare detected endpoints vs ground truth distances
            # This logic assumes 'tru' has 'topBarPixelDistance' or 'upper_error_bar'
            for d_line, t_line in zip(det['error_bars'], tru['data_points']):
                for d_p, t_p in zip(d_line['points'], t_line['points']):
                    # Calculate Detected Distances
                    det_top = abs(d_p['upper_error_bar']['y'] - d_p['data_point']['y'])
                    det_bot = abs(d_p['lower_error_bar']['y'] - d_p['data_point']['y'])

                    # Truth Distances (Fallback logic if keys differ)
                    true_top = t_p.get('topBarPixelDistance', 0)
                    true_bot = t_p.get('bottomBarPixelDistance', 0)

                    # Accumulate Error
                    top_err = abs(det_top - true_top)
                    bot_err = abs(det_bot - true_bot)

                    total_y_error += (top_err + bot_err)
                    total_points += 2

                    # Accuracy check within tolerance
                    if top_err <= pixel_tolerance: successful_detections += 1
                    if bot_err <= pixel_tolerance: successful_detections += 1

        except Exception:
            continue

    if total_points == 0:
        print("\n[!] Error: No points matched. Check if your ground truth labels contain distances.")
        return

    mae = total_y_error / total_points
    accuracy = (successful_detections / total_points) * 100

    print("\n" + "="*30)
    print(f"QUANTITATIVE EVALUATION RESULTS")
    print("="*30)
    print(f"Mean Absolute Error (MAE): {mae:.4f} pixels")
    print(f"Accuracy (±{pixel_tolerance}px): {accuracy:.2f}%")
    print(f"Total Points Evaluated: {total_points}")
    print("="*30)

evaluate_performance()

Evaluating 3001 files...


100%|██████████| 3001/3001 [23:19<00:00,  2.14it/s]


QUANTITATIVE EVALUATION RESULTS
Mean Absolute Error (MAE): 21.9764 pixels
Accuracy (±2.0px): 35.63%
Total Points Evaluated: 112000





In [8]:
import cv2
import json
import os
import numpy as np
from google.colab import drive
from tqdm import tqdm

# 1. Mount Google Drive
drive.mount('/content/drive')

# --- CONFIGURATION ---
# Path to the source images from Assignment 1
IMAGE_DIR = '/content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/detection_results'
# Path to the results JSONs from Assignment 2 pipeline [cite: 22, 45]
RESULTS_DIR = '/content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/detection_results'
# Output directory for the final verified images [cite: 38]
VIS_OUT_DIR = '/content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/visualization'

os.makedirs(VIS_OUT_DIR, exist_ok=True)

def visualize_full_dataset():
    """
    Overlays detected error bars onto images for the entire 3000-image set.
    """
    # Get all result files
    result_files = [f for f in os.listdir(RESULTS_DIR) if f.endswith('.json')]

    if not result_files:
        print(f"No results found in {RESULTS_DIR}. Please check your paths.")
        return

    print(f"Starting visualization for {len(result_files)} images...")

    for filename in tqdm(result_files):
        try:
            # Load the detection result [cite: 22]
            with open(os.path.join(RESULTS_DIR, filename), 'r') as f:
                data = json.load(f)

            # Load corresponding plot image [cite: 21]
            img_path = os.path.join(IMAGE_DIR, data['image_file'])
            img = cv2.imread(img_path)

            if img is None:
                continue

            # Iterate through each detected line and its points [cite: 20]
            for line in data['error_bars']:
                for pt in line['points']:
                    # Extract coordinates [cite: 20]
                    # Data Point (Input from Assignment 2) [cite: 21]
                    x_c = int(round(pt['data_point']['x']))
                    y_c = int(round(pt['data_point']['y']))

                    # Detected Endpoints (Output from your Pipeline) [cite: 22]
                    y_u = int(round(pt['upper_error_bar']['y']))
                    y_l = int(round(pt['lower_error_bar']['y']))

                    # DRAWING LOGIC:
                    # 1. Draw the error bar vertical line (Green)
                    cv2.line(img, (x_c, y_u), (x_c, y_l), (0, 255, 0), 1)

                    # 2. Draw the data point center (Blue)
                    cv2.circle(img, (x_c, y_c), 3, (255, 0, 0), -1)

                    # 3. Draw the detected endpoints (Red)
                    cv2.circle(img, (x_c, y_u), 3, (0, 0, 255), -1)
                    cv2.circle(img, (x_c, y_l), 3, (0, 0, 255), -1)

            # Save to the verification folder [cite: 38]
            save_name = f"vis_{data['image_file']}"
            cv2.imwrite(os.path.join(VIS_OUT_DIR, save_name), img)

        except Exception as e:
            print(f"Skipping {filename} due to error: {e}")

    print(f"\nSuccessfully generated {len(os.listdir(VIS_OUT_DIR))} visualizations.")
    print(f"Results located at: {VIS_OUT_DIR}")

# Run the full visualization
visualize_full_dataset()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting visualization for 3001 images...


100%|██████████| 3001/3001 [00:50<00:00, 59.61it/s] 


Successfully generated 50 visualizations.
Results located at: /content/drive/MyDrive/Datasets/Delinate Assessment/Synthetic-Dataset-V2/visualization



