In [None]:
#CELL 1
!pip install -q openai opencv-python-headless scikit-image matplotlib pandas imutils scikit-learn xlsxwriter Pillow scipy shapely

In [None]:
#CELL 2
import os
import cv2
import base64
import io
import numpy as np
import pandas as pd
from PIL import Image
from datetime import datetime
from IPython.display import display, Markdown, clear_output
from ipywidgets import widgets
import time
from IPython.display import clear_output
import matplotlib.pyplot as plt
from google.colab import files
from skimage.morphology import skeletonize
import openai
import xlsxwriter
import imutils
from scipy.interpolate import UnivariateSpline
from scipy.interpolate import UnivariateSpline, interp1d
from sklearn.cluster import DBSCAN
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')

In [None]:
#CELL 3
try:
    api_key = input("Enter your API key (or press Enter to skip): ").strip()

    if api_key and api_key.startswith('sk-'):
        openai.api_key = api_key
        print("API key configured successfully!")
        print("Universal graph analysis will be available.")
        gpt_available = True
    else:
        print("Skipping API key. GPT features will be disabled.")
        print("Basic extraction features remain fully functional.")
        gpt_available = False

except Exception as e:
    print("API key setup failed. Please continue without GPT features.")
    gpt_available = False

In [None]:
#CELL 4
print("UPLOAD YOUR SCIENTIFIC GRAPH")
print("Supported formats: PNG, JPG, JPEG")

uploaded = files.upload()

if uploaded:
    image_path = list(uploaded.keys())[0]

    try:
        image = Image.open(image_path).convert("RGB")
        image_np = np.array(image)

        plt.figure(figsize=(12, 8))
        plt.imshow(image_np)
        plt.title("Original Graph for Processing", fontsize=16, fontweight='bold')
        plt.axis('off')
        plt.tight_layout()
        plt.show()

        height, width = image_np.shape[:2]

        print(f"Image uploaded successfully: {image_path}")
        print(f"Image dimensions: {width} x {height} pixels")
        print(f"file size: {len(uploaded[image_path])} bytes")

        gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
        contrast = np.std(gray)

        if contrast > 50:
            print("Image is ready for extraction.")
        elif contrast > 25:
            print("Moderate image contrast. Extraction should work.")
        else:
            print("Low image contrast. May need parameter adjustment.")

        if gpt_available:
            img_buffer = io.BytesIO()
            Image.fromarray(image_np).save(img_buffer, format='PNG')
            img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
        else:
            img_base64 = None

    except Exception as e:
        print(f"Error loading image: {str(e)}")
        print("Please try uploading a different image file")

else:
    print("No image uploaded. Please run this cell again and select an image.")

In [None]:
#CELL 5
print("INTELLIGENT GRAPH ANALYSIS ROUTER")

class GraphAnalysisRouter:
    def __init__(self, image_np, gpt_available=False):
        self.image = image_np
        self.gpt_available = gpt_available
        self.image_bounds = None
        self.detected_colors = None

    def route_and_execute(self, user_request):
        print(f"Processing: '{user_request}'")

        intent = self._parse_intent(user_request)
        print(f"Intent: {intent['type']}")

        if intent['type'] == 'curve_extraction':
            return self._trigger_extraction_pipeline(intent)
        elif intent['type'] == 'axis_analysis':
            return self._analyze_axes(intent)
        elif intent['type'] == 'color_detection':
            return self._trigger_color_detection()
        elif intent['type'] == 'coordinates':
            return self._handle_coordinates(intent)
        elif intent['type'] == 'excel_export':
            return self._trigger_excel_export(intent)
        elif intent['type'] == 'general_visual':
            return self._general_analysis(intent)
        else:
            return self._handle_other(intent)

    def _parse_intent(self, request):
        request_lower = request.lower()

        if any(word in request_lower for word in ['extract', 'curve', 'line', 'data']):
            colors = self._extract_color_mentions(request)
            return {
                'type': 'curve_extraction',
                'colors': colors,
                'mode': 'selective' if colors else 'auto'
            }
        elif any(word in request_lower for word in ['axis', 'axes', 'scale', 'range']):
            return {'type': 'axis_analysis'}
        elif any(word in request_lower for word in ['color', 'available']):
            return {'type': 'color_detection'}
        elif 'coordinate' in request_lower:
            return {'type': 'coordinates', 'coord_type': None}
        elif any(word in request_lower for word in ['excel', 'export', 'create excel', 'download', 'save']):
            return {'type': 'excel_export', 'action': 'create' if 'create' in request_lower else 'approve'}
        else:
            return {'type': 'general_visual'}

    def _extract_color_mentions(self, request):
        colors = []
        color_words = ['black', 'red', 'blue', 'green', 'yellow', 'teal', 'purple', 'orange', 'brown', 'gray', 'cyan', 'magenta']

        for color in color_words:
            if color in request.lower():
                colors.append(color)
        return colors

    def _trigger_extraction_pipeline(self, intent):
        print("Triggering extraction pipeline...")

        target_colors = intent.get('colors', [])
        if target_colors:
            print(f"Will extract: {target_colors}")
            global user_requested_colors, user_extraction_mode
            user_requested_colors = target_colors
            user_extraction_mode = "selective"
        else:
            print(f"Will auto-detect all available colors")
            user_extraction_mode = "auto"
            user_requested_colors = []

        try:
            global pipeline_triggered_by_router
            pipeline_triggered_by_router = True

            print("Executing Cell 6 (Color Detection)...")
            self._execute_cell_6()

            print("Executing Cell 7 (Infrastructure Removal)...")
            self._execute_cell_7()

            print("Executing Cell 8 (Curve Extraction)...")
            self._execute_cell_8()

            print("Executing Cell 10 (Quality Assessment)...")
            self._execute_cell_10()

            print("Pipeline execution complete!")

            if 'extracted_curves' in globals() and extracted_curves:
                total_points = sum(curve['point_count'] for curve in extracted_curves.values())

                print("PIPELINE COMPLETE - READY FOR VISUALIZATION")
                print(f"Curves extracted: {len(extracted_curves)}")
                print(f"Total points: {total_points}")
                print(f"Curves ready: {list(extracted_curves.keys())}")
                print(f"\nNOW RUN CELL 11 TO SEE THE VISUALIZATION!")
                print("Click the play button on Cell 11 to display the extracted curves")

                return {
                    'type': 'curve_extraction',
                    'results': extracted_curves,
                    'summary': f"Pipeline complete: extracted {len(extracted_curves)} curves with {total_points} total points",
                    'pipeline_executed': True,
                    'cells_executed': ['6', '7', '8', '10'],
                    'next_step': 'RUN CELL 11 to see visualization, then ask "create excel file" to export'
                }
            else:
                return {
                    'type': 'curve_extraction',
                    'results': {},
                    'summary': 'Pipeline executed but no curves were extracted',
                    'pipeline_executed': True,
                    'suggestion': 'Try different colors or check image quality'
                }

        except Exception as e:
            print(f"Pipeline execution failed: {str(e)}")
            import traceback
            traceback.print_exc()
            return {
                'type': 'curve_extraction',
                'results': {},
                'summary': f'Pipeline execution failed: {str(e)}',
                'pipeline_executed': False,
                'error': str(e)
            }

    def _execute_cell_6(self):
        print("Running color detection from Cell 6...")

        try:
            exec("""
print("SCANNING IMAGE FOR AVAILABLE COLORS...")
print("Analyzing pixel distributions and color presence...")

hsv_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV)

detected_colors = {}
min_pixel_threshold = 50

def analyze_color_presence(hsv_img, color_name, color_range):
    lower_hsv, upper_hsv = color_range
    lower = np.array(lower_hsv)
    upper = np.array(upper_hsv)

    mask = cv2.inRange(hsv_img, lower, upper)
    pixel_count = np.count_nonzero(mask)

    if pixel_count > 0:
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask)
        if num_labels > 1:
            largest_component = np.max(stats[1:, cv2.CC_STAT_AREA])
            distribution_quality = largest_component / pixel_count
        else:
            distribution_quality = 0
    else:
        distribution_quality = 0

    return {
        'pixel_count': pixel_count,
        'distribution_quality': distribution_quality,
        'mask': mask,
        'present': pixel_count > min_pixel_threshold
    }

print("\\nColor Detection Results:")

selected_colors = {
    'black': ((0, 0, 0), (180, 30, 30)),
    'red': ((0, 70, 50), (10, 255, 255)),
    'blue': ((90, 50, 50), (130, 255, 255)),
    'green': ((36, 50, 70), (89, 255, 255)),
    'yellow': ((15, 100, 100), (35, 255, 255)),
    'teal': ((85, 50, 50), (100, 255, 255)),
    'purple': ((120, 50, 50), (150, 255, 255)),
    'orange': ((10, 50, 50), (25, 255, 255)),
    'brown': ((10, 50, 20), (20, 255, 200)),
    'gray': ((0, 0, 50), (180, 50, 200)),
    'cyan': ((80, 50, 50), (90, 255, 255)),
    'magenta': ((150, 50, 50), (170, 255, 255))
}

for color_name, color_range in selected_colors.items():
    analysis = analyze_color_presence(hsv_image, color_name, color_range)

    if analysis['present']:
        detected_colors[color_name] = {
            'range': color_range,
            'pixel_count': analysis['pixel_count'],
            'quality': analysis['distribution_quality'],
            'mask': analysis['mask']
        }

        quality_desc = "High" if analysis['distribution_quality'] > 0.3 else "Medium" if analysis['distribution_quality'] > 0.1 else "Low"
        print(f"{color_name:8}: {analysis['pixel_count']:5} pixels (Quality: {quality_desc})")
    else:
        print(f"{color_name:8}: {analysis['pixel_count']:5} pixels (Below threshold)")

if detected_colors:
    print(f"\\nFound {len(detected_colors)} processable colors")
else:
    print("\\nNo colors detected above threshold!")

print(f"\\nSummary: {len(detected_colors)} colors ready for processing")
            """, globals())

        except Exception as e:
            print(f"Cell 6 execution failed: {str(e)}")
            raise e

    def _execute_cell_7(self):
        print("Running infrastructure removal from Cell 7...")

        try:
            exec("""
print("DETECTING AND REMOVING GRAPH INFRASTRUCTURE...")

def detect_text_regions(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    morph = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
    contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    text_mask = np.zeros_like(gray)
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = cv2.contourArea(contour)
        if 10 < area < 500 and 0.2 < h/w < 5:
            cv2.fillPoly(text_mask, [contour], 255)
    return text_mask

def detect_straight_lines(image, min_line_length=50):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
                           minLineLength=min_line_length, maxLineGap=10)
    line_mask = np.zeros_like(gray)
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            length = np.sqrt((x2-x1)**2 + (y2-y1)**2)
            angle = np.arctan2(y2-y1, x2-x1) * 180 / np.pi
            is_horizontal = abs(angle) < 5 or abs(angle-180) < 5
            is_vertical = abs(angle-90) < 5 or abs(angle+90) < 5
            if length > min_line_length * 2 or is_horizontal or is_vertical:
                cv2.line(line_mask, (x1, y1), (x2, y2), 255, 3)
    return line_mask

print("Detecting text regions...")
text_mask = detect_text_regions(image_np)

print("Detecting straight lines (axes, grids)...")
line_mask = detect_straight_lines(image_np)

infrastructure_mask = cv2.bitwise_or(text_mask, line_mask)
kernel = np.ones((3,3), np.uint8)
infrastructure_mask = cv2.morphologyEx(infrastructure_mask, cv2.MORPH_CLOSE, kernel)

cleaned_image = image_np.copy()
infrastructure_coords = np.where(infrastructure_mask == 255)
cleaned_image[infrastructure_coords] = [255, 255, 255]

print("Infrastructure removal complete")
            """, globals())

        except Exception as e:
            print(f"Cell 7 execution failed: {str(e)}")
            raise e

    def _execute_cell_8(self):
        """Execute Cell 8 - Curve Extraction"""
        print("Running curve extraction from Cell 8...")

        try:
            exec("""
print("INTELLIGENT CURVE EXTRACTION - RESPECTING USER PREFERENCES")

# Filter colors based on user request
if 'user_extraction_mode' in globals():
    extraction_mode = user_extraction_mode
    requested_colors = user_requested_colors if 'user_requested_colors' in globals() else []
else:
    extraction_mode = "auto"
    requested_colors = []

print(f"\\nFILTERING COLORS BASED ON USER REQUEST...")
print(f"Original detected colors: {list(detected_colors.keys())}")

def filter_colors_by_user_request(detected_colors, mode, requested_colors):
    if mode == "auto":
        filtered_colors = detected_colors.copy()
        print(f"AUTO MODE: Using all {len(filtered_colors)} detected colors")
        return filtered_colors
    elif mode == "selective":
        filtered_colors = {}
        for color in requested_colors:
            if color in detected_colors:
                filtered_colors[color] = detected_colors[color]
                print(f"{color.upper()}: Found and will be extracted ({detected_colors[color]['pixel_count']} pixels)")
        return filtered_colors
    else:
        return detected_colors.copy()

processing_colors = filter_colors_by_user_request(detected_colors, extraction_mode, requested_colors)

# Set up axis calibration
height, width = image_np.shape[:2]
axis_calibration = {
    'x_pixel': [int(width * 0.15), int(width * 0.90)],
    'y_pixel': [int(height * 0.85), int(height * 0.10)],
    'x_real': [0, 1000],
    'y_real': [0.001, 10],
    'x_scale': 'linear',
    'y_scale': 'log10',
    'bounds': (int(width * 0.15), int(height * 0.10), int(width * 0.90), int(height * 0.85))
}

# Extract curves using the original algorithms from Cell 8
from skimage.morphology import skeletonize

def trace_black_curve_enhanced(image_np, graph_bounds):
    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
    left, top, right, bottom = graph_bounds
    margin = 15
    roi = (top + margin, bottom - margin, left + margin, right - margin)
    y1, y2, x1, x2 = roi
    roi_img = gray[y1:y2, x1:x2]
    roi_rgb = image_np[y1:y2, x1:x2]

    hsv = cv2.cvtColor(roi_rgb, cv2.COLOR_RGB2HSV)
    lower_black = np.array([0, 0, 0])
    upper_black = np.array([180, 50, 80])
    black_mask = cv2.inRange(hsv, lower_black, upper_black)

    _, gray_thresh = cv2.threshold(roi_img, 60, 255, cv2.THRESH_BINARY_INV)
    edges = cv2.bitwise_or(black_mask, gray_thresh)

    kernel_h = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1))
    kernel_v = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 20))
    horizontal_lines = cv2.morphologyEx(edges, cv2.MORPH_OPEN, kernel_h)
    vertical_lines = cv2.morphologyEx(edges, cv2.MORPH_OPEN, kernel_v)
    edges = cv2.subtract(edges, horizontal_lines)
    edges = cv2.subtract(edges, vertical_lines)

    skeleton = skeletonize(edges > 0)
    num_lbl, lbl, stats, _ = cv2.connectedComponentsWithStats(skeleton.astype(np.uint8), connectivity=8)

    if num_lbl <= 1:
        return None

    best_component = None
    best_score = 0

    for i in range(1, num_lbl):
        if stats[i, cv2.CC_STAT_AREA] < 50:
            continue
        mask = (lbl == i)
        ys, xs = np.where(mask)
        pts = np.column_stack([xs, ys])
        if len(pts) < 20:
            continue
        x_span = np.max(pts[:, 0]) - np.min(pts[:, 0])
        y_span = np.max(pts[:, 1]) - np.min(pts[:, 1])
        if x_span < 100 or y_span < 30:
            continue
        aspect_ratio = x_span / max(y_span, 1)
        area_score = stats[i, cv2.CC_STAT_AREA]
        score = area_score * aspect_ratio
        if score > best_score:
            best_score = score
            best_component = i

    if best_component is None:
        return None

    mask = (lbl == best_component)
    ys, xs = np.where(mask)
    pts = np.column_stack([xs, ys])
    pts = pts[np.argsort(pts[:, 0])]
    unique_x = np.unique(pts[:, 0])

    if len(unique_x) < len(pts):
        new_pts = []
        for ux in unique_x:
            y_vals = pts[pts[:, 0] == ux, 1]
            median_y = np.median(y_vals)
            new_pts.append([ux, median_y])
        pts = np.array(new_pts)

    pts[:, 0] += x1
    pts[:, 1] += y1
    return pts

def extract_simple_curve(image_np, graph_bounds, color_name, color_range):
    left, top, right, bottom = graph_bounds
    legend_margin = 80
    border_margin = 20
    data_left = left + border_margin
    data_top = top + border_margin
    data_right = right - legend_margin
    data_bottom = bottom - border_margin
    data_roi = image_np[data_top:data_bottom, data_left:data_right]

    hsv_roi = cv2.cvtColor(data_roi, cv2.COLOR_RGB2HSV)
    lower_hsv, upper_hsv = color_range
    lower = np.array(lower_hsv)
    upper = np.array(upper_hsv)
    mask = cv2.inRange(hsv_roi, lower, upper)

    kernel = np.ones((2, 2), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    skeleton = skeletonize(mask > 0)
    coords = np.column_stack(np.where(skeleton.T))

    if len(coords) == 0:
        return None

    coords = coords[coords[:, 0].argsort()]
    unique_coords = []
    current_x = None
    y_values = []

    for x, y in coords:
        if current_x is None or x != current_x:
            if current_x is not None:
                avg_y = np.median(y_values)
                unique_coords.append([current_x, avg_y])
            current_x = x
            y_values = [y]
        else:
            y_values.append(y)

    if current_x is not None:
        avg_y = np.median(y_values)
        unique_coords.append([current_x, avg_y])

    if len(unique_coords) == 0:
        return None

    final_coords = np.array(unique_coords)
    final_coords[:, 0] += data_left
    final_coords[:, 1] += data_top
    return final_coords

# Extract curves
extracted_curves = {}

for color_name, color_data in processing_colors.items():
    print(f"\\nPROCESSING '{color_name.upper()}'...")

    if color_name == 'black':
        print("\\tUSING ADVANCED BLACK CURVE ALGORITHM...")
        curve_points = trace_black_curve_enhanced(image_np, axis_calibration['bounds'])
    else:
        print(f"\\tUSING SIMPLE DIRECT ALGORITHM...")
        curve_points = extract_simple_curve(image_np, axis_calibration['bounds'], color_name, color_data['range'])

    if curve_points is not None:
        extracted_curves[color_name] = {
            'pixel_coordinates': curve_points,
            'point_count': len(curve_points),
            'scientific_coordinates': ([], []),
            'confidence': color_data['quality'],
            'x_range': (np.min(curve_points[:, 0]), np.max(curve_points[:, 0])),
            'y_range': (np.min(curve_points[:, 1]), np.max(curve_points[:, 1]))
        }
        print(f"\\t{color_name.upper()}: {len(curve_points)} points extracted successfully")
    else:
        print(f"\\t{color_name.upper()}: No curve found")

print(f"\\nCURVE EXTRACTION COMPLETE!")
print(f"Curves Ready: {len(extracted_curves)}")
            """, globals())

        except Exception as e:
            print(f"Cell 8 execution failed: {str(e)}")
            raise e

    def _execute_cell_10(self):
        """Execute Cell 10 - Quality Assessment"""
        print("Running quality assessment from Cell 10...")

        try:
            exec("""
print("CURVE EXTRACTION QUALITY ASSESSMENT")

def calculate_overall_quality_score(curve_data):
    point_count = curve_data['point_count']
    density_score = min(point_count / 200, 1.0)
    overall_quality = density_score
    return overall_quality, {'density_score': density_score}

quality_assessment = {}
overall_success_metrics = {'high_quality': 0, 'medium_quality': 0, 'low_quality': 0}

print("\\nIndividual Curve Quality Analysis:")

for color_name, curve_data in extracted_curves.items():
    overall_quality, quality_details = calculate_overall_quality_score(curve_data)

    if overall_quality >= 0.75:
        quality_level = "HIGH"
        overall_success_metrics['high_quality'] += 1
    elif overall_quality >= 0.5:
        quality_level = "MEDIUM"
        overall_success_metrics['medium_quality'] += 1
    else:
        quality_level = "LOW"
        overall_success_metrics['low_quality'] += 1

    quality_assessment[color_name] = {
        'overall_quality': overall_quality,
        'quality_level': quality_level
    }

    print(f"\\n{color_name.upper()} CURVE - {quality_level} QUALITY")
    print(f"Overall Score: {overall_quality:.2f}")
    print(f"Points Extracted: {curve_data['point_count']}")

print(f"\\nOVERALL EXTRACTION SUMMARY")
print(f"High quality extractions: {overall_success_metrics['high_quality']}")
print(f"Medium quality extractions: {overall_success_metrics['medium_quality']}")
print(f"Low quality extractions: {overall_success_metrics['low_quality']}")
            """, globals())

        except Exception as e:
            print(f"Cell 10 execution failed: {str(e)}")
            raise e

    def _trigger_color_detection(self):
        print("Executing Cell 6 (Color Detection)...")

        try:
            self._execute_cell_6()

            if 'detected_colors' in globals():
                return {
                    'type': 'color_detection',
                    'colors': detected_colors,
                    'summary': f"Found {len(detected_colors)} colors: {list(detected_colors.keys())}",
                    'cell_executed': '6'
                }
            else:
                return {
                    'type': 'color_detection',
                    'colors': {},
                    'summary': 'Color detection executed but no results found',
                    'cell_executed': '6'
                }

        except Exception as e:
            return {
                'type': 'color_detection',
                'colors': {},
                'summary': f'Color detection failed: {str(e)}',
                'error': str(e)
            }

    def _trigger_excel_export(self, intent):
        print("Executing Cell 12 (Excel Export)...")

        if 'extracted_curves' not in globals() or not extracted_curves:
            return {
                'type': 'excel_export',
                'success': False,
                'message': 'No curves available for export. Please extract curves first.',
                'suggestion': 'Ask me to "extract curves" first.'
            }

        try:
            import __main__
            __main__.excel_export_approved = True

            exec("""
print("CELL 12 - EXCEL EXPORT")
print("=" * 50)

def convert_pixel_to_scientific(pixel_coords, axis_calibration):
    x_pixel = axis_calibration['x_pixel']
    y_pixel = axis_calibration['y_pixel']
    x_real = axis_calibration['x_real']
    y_real = axis_calibration['y_real']
    x_scale = axis_calibration.get('x_scale', 'linear')
    y_scale = axis_calibration.get('y_scale', 'linear')

    scientific_x = []
    scientific_y = []

    for px, py in pixel_coords:
        if x_scale == 'linear':
            x_scientific = x_real[0] + (px - x_pixel[0]) * (x_real[1] - x_real[0]) / (x_pixel[1] - x_pixel[0])
        elif x_scale == 'log10':
            log_x_real = [np.log10(max(x_real[0], 1e-10)), np.log10(max(x_real[1], 1e-10))]
            log_x_interp = log_x_real[0] + (px - x_pixel[0]) * (log_x_real[1] - log_x_real[0]) / (x_pixel[1] - x_pixel[0])
            x_scientific = 10 ** log_x_interp
        else:
            x_scientific = px

        if y_scale == 'linear':
            y_scientific = y_real[0] + (py - y_pixel[0]) * (y_real[1] - y_real[0]) / (y_pixel[1] - y_pixel[0])
        elif y_scale == 'log10':
            log_y_real = [np.log10(max(y_real[0], 1e-10)), np.log10(max(y_real[1], 1e-10))]
            log_y_interp = log_y_real[0] + (py - y_pixel[0]) * (log_y_real[1] - log_y_real[0]) / (y_pixel[1] - y_pixel[0])
            y_scientific = 10 ** log_y_interp
        else:
            y_scientific = py

        scientific_x.append(x_scientific)
        scientific_y.append(y_scientific)

    return np.array(scientific_x), np.array(scientific_y)

def create_excel_export():
    from datetime import datetime
    import pandas as pd
    from google.colab import files

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"curve_data_{timestamp}.xlsx"

    print(f"Creating Excel file: {filename}")

    # Use axis calibration from Cell 8
    if 'axis_calibration' in globals():
        axis_cal = axis_calibration
    else:
        # Fallback calibration
        h, w = image_np.shape[:2]
        axis_cal = {
            'x_pixel': [int(w * 0.15), int(w * 0.90)],
            'y_pixel': [int(h * 0.85), int(h * 0.10)],
            'x_real': [0, 1000],
            'y_real': [0.001, 10],
            'x_scale': 'linear',
            'y_scale': 'log10'
        }

    with pd.ExcelWriter(filename, engine='xlsxwriter') as writer:
        for color_name, curve_data in extracted_curves.items():
            print(f"Processing {color_name} curve...")

            x_scientific, y_scientific = convert_pixel_to_scientific(
                curve_data['pixel_coordinates'],
                axis_cal
            )

            simple_data = pd.DataFrame({
                'X': x_scientific,
                'Y': y_scientific
            })

            sheet_name = color_name[:31]
            simple_data.to_excel(writer, sheet_name=sheet_name, index=False)
            print(f"      {color_name}: {len(x_scientific)} data points exported")

    print(f"\\nExcel file created successfully: {filename}")

    try:
        files.download(filename)
        print(f"File download initiated successfully!")
        return True
    except Exception as download_error:
        print(f"Auto-download failed, but file is saved: {filename}")
        return True

excel_export_success = create_excel_export()
            """, globals())

            return {
                'type': 'excel_export',
                'success': True,
                'message': 'Excel file created and download initiated!',
                'curves_exported': list(extracted_curves.keys()),
                'total_points': sum(curve['point_count'] for curve in extracted_curves.values()),
                'cell_executed': '12'
            }

        except Exception as e:
            return {
                'type': 'excel_export',
                'success': False,
                'message': f'Excel export error: {str(e)}',
                'error': str(e)
            }

    def _analyze_axes(self, intent):
        return {
            'type': 'axis_analysis',
            'result': 'Axis analysis available through curve extraction pipeline.',
            'suggestion': 'Ask to "extract curves" for complete graph analysis including axes.'
        }

    def _handle_coordinates(self, intent):
        print("ðŸŽ¯ Analyzing graph coordinates...")

        if 'extracted_curves' not in globals() or not extracted_curves:
            print("ðŸ“Š No curves extracted yet. Running extraction pipeline first...")

            extraction_result = self._trigger_extraction_pipeline({
                'type': 'curve_extraction',
                'colors': [],
                'mode': 'auto'
            })

            if not extraction_result.get('pipeline_executed') or not extracted_curves:
                return {
                    'type': 'coordinates',
                    'result': 'Unable to extract coordinates. No curves were found in the image.',
                    'suggestion': 'Check image quality or try specifying specific colors to extract.'
                }

        def get_smart_calibration():
            if 'axis_calibration' in globals():
                cal = axis_calibration.copy()
                height, width = self.image.shape[:2]
                if 'x_pixel' not in cal or 'y_pixel' not in cal:
                    cal['x_pixel'] = [int(width * 0.15), int(width * 0.85)]
                    cal['y_pixel'] = [int(height * 0.85), int(height * 0.15)]
                return cal
            else:
                height, width = self.image.shape[:2]
                return {
                    'x_pixel': [int(width * 0.15), int(width * 0.85)],
                    'y_pixel': [int(height * 0.85), int(height * 0.15)],
                    'x_real': [0, 100],
                    'y_real': [0, 100],
                    'x_scale': 'linear',
                    'y_scale': 'linear'
                }

        cal = get_smart_calibration()

        coordinate_analysis = []

        for color_name, curve_data in extracted_curves.items():
            pixel_coords = curve_data['pixel_coordinates']

            if len(pixel_coords) == 0:
                continue

            try:
                x_sci, y_sci = self._convert_to_scientific_coords(pixel_coords, cal)
            except Exception as e:
                print(f"Conversion failed for {color_name}: {e}")
                continue

            x_min, x_max = np.min(x_sci), np.max(x_sci)
            y_min, y_max = np.min(y_sci), np.max(y_sci)

            if len(y_sci) > 10:
                quarter = len(y_sci) // 4
                start_y = np.median(y_sci[:quarter])
                end_y = np.median(y_sci[-quarter:])
                y_range = y_max - y_min
                y_change = abs(end_y - start_y)

                relative_change = y_change / max(y_range, 1e-10)
                growth_ratio = end_y / max(start_y, 1e-10) if start_y != 0 else float('inf')

                if relative_change < 0.15:
                    trend = "stays nearly flat"
                    trend_detail = f"around ~{np.median(y_sci):.2f}"
                elif growth_ratio > 3 or relative_change > 0.5:
                    trend = "rises steeply"
                    trend_detail = ""
                elif growth_ratio > 1.3 or relative_change > 0.25:
                    trend = "rises gradually"
                    trend_detail = ""
                elif growth_ratio < 0.7:
                    trend = "decreases"
                    trend_detail = ""
                else:
                    trend = "shows moderate variation"
                    trend_detail = ""
            else:
                trend = "has limited data points"
                trend_detail = ""

            curve_desc = f"The {color_name} curve"

            if cal.get('y_scale') == 'log10':
                if y_min < 0.01:
                    y_min_str = f"{y_min:.2e}"
                else:
                    y_min_str = f"{y_min:.1f}"

                if y_max < 0.01:
                    y_max_str = f"{y_max:.2e}"
                else:
                    y_max_str = f"{y_max:.1f}"
            else:
                y_min_str = f"{y_min:.2f}"
                y_max_str = f"{y_max:.2f}"

            x_min_str = f"{x_min:.1f}"
            x_max_str = f"{x_max:.1f}"

            if trend_detail:
                coord_desc = f"{curve_desc} {trend} {trend_detail} from {x_min:.1f} to {x_max:.1f}."
            else:
                coord_desc = f"{curve_desc} starts at around ({x_min_str}, {y_min_str}), {trend}, and reaches about ({x_max_str}, {y_max_str})."

            coordinate_analysis.append(coord_desc)

        if not coordinate_analysis:
            return {
                'type': 'coordinates',
                'result': 'No curve coordinates could be analyzed.',
                'suggestion': 'Try extracting specific colors or check image quality.'
            }

        full_analysis = "\n".join(coordinate_analysis)
        full_analysis += "\n\nIf you'd like, I can extract exact coordinate pairs from each curve and output them as CSV or plot them separately."

        return {
            'type': 'coordinates',
            'result': full_analysis,
            'curves_analyzed': len(coordinate_analysis),
            'total_points': sum(curve['point_count'] for curve in extracted_curves.values())
        }

    def _convert_to_scientific_coords(self, pixel_coords, axis_calibration):
        x_pixel = axis_calibration['x_pixel']
        y_pixel = axis_calibration['y_pixel']
        x_real = axis_calibration['x_real']
        y_real = axis_calibration['y_real']
        x_scale = axis_calibration.get('x_scale', 'linear')
        y_scale = axis_calibration.get('y_scale', 'linear')

        scientific_x = []
        scientific_y = []

        for px, py in pixel_coords:
            if x_scale == 'linear':
                x_sci = x_real[0] + (px - x_pixel[0]) * (x_real[1] - x_real[0]) / (x_pixel[1] - x_pixel[0])
            elif x_scale == 'log10':
                log_x_real = [np.log10(max(x_real[0], 1e-10)), np.log10(max(x_real[1], 1e-10))]
                log_x_interp = log_x_real[0] + (px - x_pixel[0]) * (log_x_real[1] - log_x_real[0]) / (x_pixel[1] - x_pixel[0])
                x_sci = 10 ** log_x_interp
            else:
                x_sci = px

            if y_scale == 'linear':
                y_sci = y_real[0] + (py - y_pixel[0]) * (y_real[1] - y_real[0]) / (y_pixel[1] - y_pixel[0])
            elif y_scale == 'log10':
                log_y_real = [np.log10(max(y_real[0], 1e-10)), np.log10(max(y_real[1], 1e-10))]
                log_y_interp = log_y_real[0] + (py - y_pixel[0]) * (log_y_real[1] - log_y_real[0]) / (y_pixel[1] - y_pixel[0])
                y_sci = 10 ** log_y_interp
            else:
                y_sci = py

            scientific_x.append(x_sci)
            scientific_y.append(y_sci)

        return np.array(scientific_x), np.array(scientific_y)

    def _general_analysis(self, intent):
        if self.gpt_available:
            return self._ai_general_analysis(intent)
        else:
            return {
                'type': 'general_visual',
                'result': 'Basic image loaded. Ask to extract curves, detect colors, or create excel files.',
                'note': 'For detailed AI analysis, enable GPT capability.'
            }

    def _ai_general_analysis(self, intent):
        print("Analyzing graph with AI...")

        img_buffer = io.BytesIO()
        Image.fromarray(self.image).save(img_buffer, format='PNG')
        img_base64 = base64.b64encode(img_buffer.getvalue()).decode()

        analysis_prompt = """
Analyze this scientific graph and provide a clear, structured description.

Format your response as natural prose with these sections:

**Overview**
Brief description of the graph type and what it shows.

**Axes**
* X-axis: units, range, scale type
* Y-axis: units, range, scale type

**Curves/Data**
For each visible curve/line/data series, describe:
* Color and line style
* What it represents (from legend if visible)
* General trend or behavior

**Key Insights**
Notable patterns, relationships, or scientific conclusions visible in the data.

Be specific about numerical ranges, units, and trends you can observe.
"""

        try:
            import openai
            response = openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": analysis_prompt},
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": "What type of graph is this? Provide detailed analysis."},
                            {
                                "type": "image_url",
                                "image_url": {"url": f"data:image/png;base64,{img_base64}"}
                            }
                        ]
                    }
                ],
                max_tokens=1000,
                temperature=0.1
            )

            analysis_text = response.choices[0].message.content

            return {
                'type': 'general_visual',
                'result': analysis_text,
                'method': 'ai_analysis'
            }

        except Exception as e:
            print(f"AI analysis failed: {str(e)}")
            return {
                'type': 'general_visual',
                'result': f'AI analysis unavailable. Basic observation: Scientific graph with axes and multiple curves.',
                'error': str(e)
            }

    def _handle_other(self, intent):
        return {
            'type': 'unknown',
            'result': 'Request not recognized. Try: "extract curves", "what colors are available", "analyze this graph", or "create excel file".',
            'suggestions': ['extract black curve', 'what colors are available', 'analyze this graph', 'create excel file']
        }

print("Initializing intelligent router...")

if 'image_np' not in globals():
    print("No image found. Please run image upload cell first.")
    router = None
elif 'gpt_available' not in globals():
    print("No GPT configuration found. Please run setup cells first.")
    print("Will use computer vision only (limited analysis).")
    gpt_available = False
    router = GraphAnalysisRouter(image_np, gpt_available)
else:
    if gpt_available and not hasattr(openai, 'api_key'):
        print("GPT marked available but no API key found. Using CV-only mode.")
        gpt_available = False

    router = GraphAnalysisRouter(image_np, gpt_available)

if router is not None:
    while True:
        user_request = input("\nWhat would you like to do with this graph? (or 'done' to finish): ").strip()

        if user_request.lower() in ['done', 'stop', 'quit', 'exit']:
            print("Stopping now.")
            break

        if not user_request:
            user_request = "detect available colors and extract all curves"
            print(f"Using default: {user_request}")

        result = router.route_and_execute(user_request)

        print(f"\nRESULT:")
        if result.get('status') == 'clarification_needed':
            print(f"{result['question']}")
            clarification = input(" ").strip()
            if clarification:
                intent = router._parse_intent(user_request)
                intent['coord_type'] = clarification
                result = router._handle_coordinates(intent)
                print(f"{result.get('result', 'Analysis complete')}")
        else:
            if result['type'] == 'general_visual' and 'result' in result:
                print(result['result'])
            elif result['type'] == 'excel_export':
                if result['success']:
                    print(f"{result['message']}")
                    if 'curves_exported' in result:
                        print(f"Exported curves: {', '.join(result['curves_exported'])}")
                        print(f"Total data points: {result['total_points']}")
                else:
                    print(f"{result['message']}")
                    if 'suggestion' in result:
                        print(f"{result['suggestion']}")
            elif 'result' in result:
                print(result['result'])
            elif 'summary' in result:
                print(f"{result['summary']}")
            else:
                print("Analysis complete")

            if 'cells_executed' in result:
                print(f"Cells executed: {', '.join(result['cells_executed'])}")
            elif 'cell_executed' in result:
                print(f"Cell executed: {result['cell_executed']}")

            if result['type'] == 'curve_extraction' and result.get('pipeline_executed'):
                print(f"{result.get('next_step', 'Analysis complete')}")
else:
    print("Cannot initialize router. Please check setup and try again.")

print(f"\nAnalysis session complete!")

In [None]:
#CELL 6
print("SCANNING IMAGE FOR AVAILABLE COLORS...")
print("Analyzing pixel distributions and color presence...")

hsv_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV)

detected_colors = {}
min_pixel_threshold = 50

def analyze_color_presence(hsv_img, color_name, color_range):
    lower_hsv, upper_hsv = color_range
    lower = np.array(lower_hsv)
    upper = np.array(upper_hsv)

    mask = cv2.inRange(hsv_img, lower, upper)

    pixel_count = np.count_nonzero(mask)

    if pixel_count > 0:
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask)

        if num_labels > 1:
            largest_component = np.max(stats[1:, cv2.CC_STAT_AREA])
            distribution_quality = largest_component / pixel_count
        else:
            distribution_quality = 0
    else:
        distribution_quality = 0

    return {
        'pixel_count': pixel_count,
        'distribution_quality': distribution_quality,
        'mask': mask,
        'present': pixel_count > min_pixel_threshold
    }

print("\nColor Detection Results:")

for color_name, color_range in selected_colors.items():
    analysis = analyze_color_presence(hsv_image, color_name, color_range)

    if analysis['present']:
        detected_colors[color_name] = {
            'range': color_range,
            'pixel_count': analysis['pixel_count'],
            'quality': analysis['distribution_quality'],
            'mask': analysis['mask']
        }

        quality_desc = "High" if analysis['distribution_quality'] > 0.3 else "Medium" if analysis['distribution_quality'] > 0.1 else "Low"
        print(f"{color_name:8}: {analysis['pixel_count']:5} pixels (Quality: {quality_desc})")
    else:
        print(f"{color_name:8}: {analysis['pixel_count']:5} pixels (Below threshold)")

if detected_colors:
    print(f"\nFound {len(detected_colors)} processable colors")
else:
    print("\nNo colors detected above threshold!")
    print("Try adjusting color selection or check image quality")

print(f"\nSummary: {len(detected_colors)} colors ready for processing")

In [None]:
#CELL 7
print("DETECTING AND REMOVING GRAPH INFRASTRUCTURE...")
print("Identifying text, axes, grid lines, and other non-curve elements...")

def detect_text_regions(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    morph = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
    contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    text_mask = np.zeros_like(gray)
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = cv2.contourArea(contour)
        if 10 < area < 500 and 0.2 < h/w < 5:
            cv2.fillPoly(text_mask, [contour], 255)
    return text_mask

def detect_straight_lines(image, min_line_length=50):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
                           minLineLength=min_line_length, maxLineGap=10)
    line_mask = np.zeros_like(gray)
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            length = np.sqrt((x2-x1)**2 + (y2-y1)**2)
            angle = np.arctan2(y2-y1, x2-x1) * 180 / np.pi
            is_horizontal = abs(angle) < 5 or abs(angle-180) < 5
            is_vertical = abs(angle-90) < 5 or abs(angle+90) < 5
            if length > min_line_length * 2 or is_horizontal or is_vertical:
                cv2.line(line_mask, (x1, y1), (x2, y2), 255, 3)
    return line_mask

print("Detecting text regions...")
text_mask = detect_text_regions(image_np)

print("Detecting straight lines (axes, grids)...")
line_mask = detect_straight_lines(image_np)

infrastructure_mask = cv2.bitwise_or(text_mask, line_mask)
kernel = np.ones((3,3), np.uint8)
infrastructure_mask = cv2.morphologyEx(infrastructure_mask, cv2.MORPH_CLOSE, kernel)

cleaned_image = image_np.copy()
infrastructure_coords = np.where(infrastructure_mask == 255)
cleaned_image[infrastructure_coords] = [255, 255, 255]

print("Infrastructure removal complete")

In [None]:
#CELL 8
print("INTELLIGENT CURVE EXTRACTION - RESPECTING USER PREFERENCES")
print("READING USER PREFERENCES FROM INTERACTIVE SESSION...")

if 'user_extraction_mode' in globals():
    extraction_mode = user_extraction_mode
    requested_colors = user_requested_colors if 'user_requested_colors' in globals() else []
    extraction_instructions = user_extraction_instructions if 'user_extraction_instructions' in globals() else ""
    nlp_connected = True
    print("Natural Language Processing connection: ACTIVE")
else:
    extraction_mode = "auto"
    requested_colors = []
    extraction_instructions = ""
    nlp_connected = False
    print("Natural Language Processing connection: MISSING")

print(f"\nUSER PREFERENCE SUMMARY:")
print(f"\tMode: {extraction_mode.upper()}")
print(f"\tRequested Colors: {requested_colors if requested_colors else 'All detected'}")
print(f"\tUser Instructions: '{extraction_instructions}'" if extraction_instructions else "No specific instructions")
print(f"\tNLP Connected: {'YES' if nlp_connected else 'NO'}")

print(f"\nFILTERING COLORS BASED ON USER REQUEST...")
print(f"Original detected colors: {list(detected_colors.keys())}")

def filter_colors_by_user_request(detected_colors, mode, requested_colors):
    """Filter colors based on user preferences"""

    if mode == "auto":
        filtered_colors = detected_colors.copy()
        print(f"AUTO MODE: Using all {len(filtered_colors)} detected colors")
        return filtered_colors

    elif mode == "all":
        filtered_colors = detected_colors.copy()
        print(f"ALL MODE: User requested all colors ({len(filtered_colors)} detected)")
        return filtered_colors

    elif mode == "selective":
        print(f"SELECTIVE MODE: Processing specific user requests...")
        filtered_colors = {}

        for color in requested_colors:
            if color in detected_colors:
                filtered_colors[color] = detected_colors[color]
                print(f"{color.upper()}: Found and will be extracted ({detected_colors[color]['pixel_count']} pixels)")
            else:
                print(f"{color.upper()}: Requested but not detected in image")
                print(f"This color may not be present or may need different detection parameters")

        if not filtered_colors:
            print(f"WARNING: No requested colors were found!")
            print(f"FALLBACK: Will attempt manual detection for requested colors...")

            for color in requested_colors:
                if color in selected_colors:  # Use predefined color ranges
                    print(f"Attempting manual detection for {color.upper()}...")

                    hsv_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2HSV)
                    color_range = selected_colors[color]
                    lower_hsv, upper_hsv = color_range

                    if color == 'black':
                        # More restrictive black detection to avoid red bars
                        expanded_lower = [0, 0, 0]
                        expanded_upper = [180, 50, 80]  # KEY FIX: Much tighter limits
                    else:
                        expanded_lower = [max(0, lower_hsv[0]-20), max(0, lower_hsv[1]-40), max(0, lower_hsv[2]-40)]
                        expanded_upper = [min(180, upper_hsv[0]+20), min(255, upper_hsv[1]+40), min(255, upper_hsv[2]+40)]

                    mask = cv2.inRange(hsv_image, np.array(expanded_lower), np.array(expanded_upper))
                    pixel_count = np.count_nonzero(mask)

                    min_threshold = 20

                    if pixel_count >= min_threshold:
                        filtered_colors[color] = {
                            'range': color_range,
                            'pixel_count': pixel_count,
                            'quality': 0.8,
                            'mask': mask
                        }
                        print(f"{color.upper()}: Manual detection successful ({pixel_count} pixels)")
                    else:
                        print(f"{color.upper()}: Manual detection failed ({pixel_count} pixels, needed {min_threshold})")

        print(f"SELECTIVE RESULT: Will extract {len(filtered_colors)} colors: {list(filtered_colors.keys())}")
        return filtered_colors

    else:
        print(f"UNKNOWN MODE '{mode}': Defaulting to all detected colors")
        return detected_colors.copy()

processing_colors = filter_colors_by_user_request(detected_colors, extraction_mode, requested_colors)

if not processing_colors:
    print(f"\nCRITICAL ERROR: No colors available for extraction!")
    print(f"You requested: {requested_colors if requested_colors else 'Auto-detect'}")
    print(f"Available colors: {list(detected_colors.keys())}")
    print(f"Suggestions:")
    print(f"- Check if the requested color is visible in the image")
    print(f"- Try using 'auto' mode to see what colors are detected")
    print(f"- Verify the color name spelling")

    # Emergency fallback
    print(f"EMERGENCY FALLBACK: Using all detected colors to avoid total failure...")
    processing_colors = detected_colors.copy()

print(f"\nFINAL PROCESSING LIST: {list(processing_colors.keys())}")
print(f"\nSTARTING EXTRACTION OF USER-REQUESTED COLORS...")

def trace_black_curve_enhanced(image_np, graph_bounds, roi=None, threshold_method='black_specific',
                              canny_params=(30, 80), min_pixels=100):
    """
    Enhanced black curve extraction with histogram bar filtering
    KEY IMPROVEMENTS:
    - Stricter black color detection
    - Vertical structure removal (removes histogram bars!)
    - Curve-focused component scoring
    """

    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

    # Set up ROI to focus on data area
    if roi is None and graph_bounds is not None:
        left, top, right, bottom = graph_bounds
        margin = 15
        roi = (top + margin, bottom - margin, left + margin, right - margin)
        print(f"\tUsing data-focused ROI to avoid borders: {roi}")

    if roi is not None:
        y1, y2, x1, x2 = roi
        roi_img = gray[y1:y2, x1:x2]
        roi_rgb = image_np[y1:y2, x1:x2]
        roi_offset = (x1, y1)
    else:
        roi_img = gray.copy()
        roi_rgb = image_np.copy()
        roi_offset = (0, 0)

    if threshold_method == 'black_specific':
        # CRITICAL FIX: Much stricter black detection to avoid red histogram bars
        hsv = cv2.cvtColor(roi_rgb, cv2.COLOR_RGB2HSV)

        lower_black = np.array([0, 0, 0])
        upper_black = np.array([180, 50, 80])  # KEY FIX: Tighter saturation/value limits
        black_mask = cv2.inRange(hsv, lower_black, upper_black)

        # Secondary threshold for robustness
        _, gray_thresh = cv2.threshold(roi_img, 60, 255, cv2.THRESH_BINARY_INV)

        # Combine both approaches
        edges = cv2.bitwise_or(black_mask, gray_thresh)

        # Clean up noise
        kernel = np.ones((2, 2), np.uint8)
        edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
        edges = cv2.morphologyEx(edges, cv2.MORPH_OPEN, kernel)

    elif threshold_method == 'canny':
        edges = cv2.Canny(cv2.GaussianBlur(roi_img, (5, 5), 0),
                          canny_params[0], canny_params[1])
    elif threshold_method == 'binary':
        _, edges = cv2.threshold(roi_img, 50, 255, cv2.THRESH_BINARY_INV)
    else:
        edges = cv2.adaptiveThreshold(roi_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, 11, 2)

    # CRITICAL FIX: Remove vertical structures (histogram bars!)
    kernel_h = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1))
    kernel_v = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 20))

    horizontal_lines = cv2.morphologyEx(edges, cv2.MORPH_OPEN, kernel_h)
    vertical_lines = cv2.morphologyEx(edges, cv2.MORPH_OPEN, kernel_v)

    # Remove both horizontal and vertical line structures
    edges = cv2.subtract(edges, horizontal_lines)
    edges = cv2.subtract(edges, vertical_lines)  # This removes histogram bars!

    # Skeletonize for clean curve representation
    thin = skeletonize(edges > 0)

    # Find connected components
    num_lbl, lbl, stats, _ = cv2.connectedComponentsWithStats(
        thin.astype(np.uint8), connectivity=8
    )
    if num_lbl <= 1:
        return None

    # INTELLIGENT COMPONENT SCORING - Prefers curve-like structures
    best_component = None
    best_score = 0

    for i in range(1, num_lbl):
        if stats[i, cv2.CC_STAT_AREA] < min_pixels:
            continue

        mask = (lbl == i)
        ys, xs = np.where(mask)
        pts = np.column_stack([xs, ys])

        if len(pts) < 20:
            continue

        pts = pts[np.argsort(pts[:, 0])]

        x_span = np.max(pts[:, 0]) - np.min(pts[:, 0])
        y_span = np.max(pts[:, 1]) - np.min(pts[:, 1])

        # Filter out narrow vertical structures (like remaining histogram bars)
        if x_span < 100 or y_span < 30:
            continue

        # Analyze curve characteristics
        x_sorted_indices = np.argsort(pts[:, 0])
        x_vals = pts[x_sorted_indices, 0]
        y_vals = pts[x_sorted_indices, 1]

        # Score based on curve-like properties
        trend_score = 0
        if len(y_vals) > 10:
            mid_point = len(y_vals) // 2
            y_start = np.mean(y_vals[:mid_point//2]) if mid_point//2 > 0 else y_vals[0]
            y_end = np.mean(y_vals[-mid_point//2:]) if mid_point//2 > 0 else y_vals[-1]

            # Prefer upward trends (typical for scientific curves)
            if y_end > y_start:
                trend_score += 2

            # Analyze curvature vs straight lines
            if len(pts) > 10:
                direct_distance = np.sqrt((pts[-1, 0] - pts[0, 0])**2 + (pts[-1, 1] - pts[0, 1])**2)
                actual_distance = np.sum(np.sqrt(np.diff(pts[:, 0])**2 + np.diff(pts[:, 1])**2))
                curvature_ratio = actual_distance / max(direct_distance, 1)

                # Prefer curved over straight lines
                if curvature_ratio > 1.2:
                    trend_score += 1

        # Prefer components in lower part of image (typical for data curves)
        avg_y = np.mean(pts[:, 1])
        if roi is None:
            img_height = gray.shape[0]
        else:
            img_height = roi_img.shape[0]

        if avg_y > img_height * 0.4:
            trend_score += 1

        # Calculate final score favoring wide, horizontal, curve-like structures
        aspect_ratio = x_span / max(y_span, 1)
        area_score = stats[i, cv2.CC_STAT_AREA]

        score = area_score * aspect_ratio * (1 + trend_score)

        print(f"\tComponent {i}: area={area_score}, ratio={aspect_ratio:.1f}, trend={trend_score}, score={score:.1f}")

        if score > best_score:
            best_score = score
            best_component = i

    if best_component is None:
        print("      No suitable curved line found")
        return None

    # Extract the best component
    mask = (lbl == best_component)
    ys, xs = np.where(mask)
    pts = np.column_stack([xs, ys])

    # Sort by x-coordinate
    pts = pts[np.argsort(pts[:, 0])]

    # Handle multiple y-values for same x (take median)
    unique_x = np.unique(pts[:, 0])
    if len(unique_x) < len(pts):
        new_pts = []
        for ux in unique_x:
            y_vals = pts[pts[:, 0] == ux, 1]
            median_y = np.median(y_vals)
            new_pts.append([ux, median_y])
        pts = np.array(new_pts)

    # Convert back to full image coordinates
    pts[:, 0] += roi_offset[0]
    pts[:, 1] += roi_offset[1]

    print(f"      Selected component {best_component} with score {best_score:.1f}")
    print(f"      Curve spans: X={np.min(pts[:, 0]):.0f}-{np.max(pts[:, 0]):.0f}, Y={np.min(pts[:, 1]):.0f}-{np.max(pts[:, 1]):.0f}")
    return pts

def extract_simple_curve(image_np, graph_bounds, color_name, color_range):
    """Extract non-black curves with improved ROI handling"""

    left, top, right, bottom = graph_bounds

    # KEY FIX: Exclude legend and border areas where artifacts might cluster
    legend_margin = 80
    border_margin = 20

    data_left = left + border_margin
    data_top = top + border_margin
    data_right = right - legend_margin  # Avoid right-side legend area
    data_bottom = bottom - border_margin

    data_roi = image_np[data_top:data_bottom, data_left:data_right]
    print(f"\tData ROI (excludes legend): {data_roi.shape}")

    # Color detection
    hsv_roi = cv2.cvtColor(data_roi, cv2.COLOR_RGB2HSV)
    lower_hsv, upper_hsv = color_range
    lower = np.array(lower_hsv)
    upper = np.array(upper_hsv)

    mask = cv2.inRange(hsv_roi, lower, upper)

    # Clean up noise
    kernel = np.ones((2, 2), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    # Skeletonize
    skeleton = skeletonize(mask > 0)
    coords = np.column_stack(np.where(skeleton.T))

    if len(coords) == 0:
        print(f"\tNo skeleton points found for {color_name}")
        return None

    # Sort by x-coordinate
    coords = coords[coords[:, 0].argsort()]

    # Handle multiple points at same x-coordinate (take median y)
    unique_coords = []
    current_x = None
    y_values = []

    for x, y in coords:
        if current_x is None or x != current_x:
            if current_x is not None:
                avg_y = np.median(y_values)
                unique_coords.append([current_x, avg_y])
            current_x = x
            y_values = [y]
        else:
            y_values.append(y)

    if current_x is not None:
        avg_y = np.median(y_values)
        unique_coords.append([current_x, avg_y])

    if len(unique_coords) == 0:
        print(f"\tNo unique coordinates for {color_name}")
        return None

    final_coords = np.array(unique_coords)
    # Convert back to full image coordinates
    final_coords[:, 0] += data_left
    final_coords[:, 1] += data_top

    print(f"      {color_name}: {len(final_coords)} points extracted")
    print(f"      Range: X={np.min(final_coords[:, 0]):.0f}-{np.max(final_coords[:, 0]):.0f}, Y={np.min(final_coords[:, 1]):.0f}-{np.max(final_coords[:, 1]):.0f}")

    return final_coords

# Main extraction loop
validated_colors = {}
extraction_results = {
    'requested': list(processing_colors.keys()),
    'successful': [],
    'failed': [],
    'total_points': 0
}

print(f"\nEXTRACTING CURVES FOR: {', '.join(processing_colors.keys())}")

for color_name, color_data in processing_colors.items():
    print(f"\nPROCESSING '{color_name.upper()}' (User Requested: {'YES' if color_name in requested_colors or extraction_mode != 'selective' else 'NO'})...")

    if color_name == 'black':
        print("\tUSING ADVANCED BLACK CURVE ALGORITHM...")

        curve_points = trace_black_curve_enhanced(
            image_np,
            axis_calibration['bounds']
        )

        if curve_points is not None:
            # Create visualization mask
            combined_mask = np.zeros_like(color_data['mask'])

            # Draw curve as lines
            for i in range(len(curve_points)-1):
                pt1 = (int(curve_points[i][0]), int(curve_points[i][1]))
                pt2 = (int(curve_points[i+1][0]), int(curve_points[i+1][1]))
                cv2.line(combined_mask, pt1, pt2, 255, 3)

            validated_colors[color_name] = {
                'original_data': color_data,
                'validated_mask': combined_mask,
                'valid_components': 1,
                'rejected_components': 0,
                'total_valid_area': len(curve_points),
                'extracted_points': curve_points
            }

            extraction_results['successful'].append(color_name)
            extraction_results['total_points'] += len(curve_points)

            print(f"\tBLACK: {len(curve_points)} points extracted successfully")

        else:
            extraction_results['failed'].append(color_name)
            print(f"\tBLACK: No suitable curve found")

    else:
        print(f"\tUSING SIMPLE DIRECT ALGORITHM...")

        curve_points = extract_simple_curve(
            image_np,
            axis_calibration['bounds'],
            color_name,
            color_data['range']
        )

        if curve_points is not None:
            # Create visualization mask
            combined_mask = np.zeros_like(color_data['mask'])

            # Draw curve as lines and points
            for i in range(len(curve_points)-1):
                pt1 = (int(curve_points[i][0]), int(curve_points[i][1]))
                pt2 = (int(curve_points[i+1][0]), int(curve_points[i+1][1]))
                cv2.line(combined_mask, pt1, pt2, 255, 3)

            for point in curve_points:
                pt = (int(point[0]), int(point[1]))
                cv2.circle(combined_mask, pt, 2, 255, -1)

            validated_colors[color_name] = {
                'original_data': color_data,
                'validated_mask': combined_mask,
                'valid_components': 1,
                'rejected_components': 0,
                'total_valid_area': len(curve_points),
                'extracted_points': curve_points
            }

            extraction_results['successful'].append(color_name)
            extraction_results['total_points'] += len(curve_points)

            print(f"\t{color_name.upper()}: {len(curve_points)} points extracted successfully")

        else:
            extraction_results['failed'].append(color_name)
            print(f"\t{color_name.upper()}: No curve found")

# Prepare final data structure for subsequent cells
extracted_curves = {}
for color_name, data in validated_colors.items():
    if 'extracted_points' in data:
        extracted_curves[color_name] = {
            'pixel_coordinates': data['extracted_points'],
            'point_count': len(data['extracted_points']),
            'scientific_coordinates': ([], []),
            'confidence': data['original_data']['quality'],
            'x_range': (np.min(data['extracted_points'][:, 0]), np.max(data['extracted_points'][:, 0])),
            'y_range': (np.min(data['extracted_points'][:, 1]), np.max(data['extracted_points'][:, 1])),
            'validation_data': data
        }

# Final summary
print(f"\nUSER-CONTROLLED EXTRACTION COMPLETE!")
print(f"\tUser Request: '{extraction_instructions}'" if extraction_instructions else "ðŸ’¬ No specific user request")
print(f"\tExtraction Mode: {extraction_mode.upper()}")
print(f"\tColors Requested: {extraction_results['requested']}")
print(f"\tSuccessfully Extracted: {extraction_results['successful']}")
print(f"\tFailed to Extract: {extraction_results['failed']}")
print(f"\tTotal Data Points: {extraction_results['total_points']}")
print(f"\tCurves Ready: {len(extracted_curves)}")

if extraction_mode == "selective":
    if len(extraction_results['successful']) == len(requested_colors):
        print(f"\tPERFECT SUCCESS: Got exactly what you requested!")
    elif len(extraction_results['successful']) > 0:
        print(f"\tPARTIAL SUCCESS: Got {len(extraction_results['successful'])}/{len(requested_colors)} requested colors")
    else:
        print(f"\tFAILED: Could not extract any of the requested colors")
        print(f"\tSuggestions: Check color visibility, try different color names, or use 'auto' mode")

print(f"\nProceeding to visualization...")

In [None]:
#CELL 9
print("ANALYZING CURVE INTERSECTIONS AND OVERLAPS...")

def find_curve_intersections(curve1_coords, curve2_coords, tolerance=3):
    """Find intersection points between two curves"""
    if len(curve1_coords) == 0 or len(curve2_coords) == 0:
        return []

    intersections = []

    for i, point1 in enumerate(curve1_coords):
        distances = np.linalg.norm(curve2_coords - point1, axis=1)
        nearby_indices = np.where(distances <= tolerance)[0]

        for j in nearby_indices:
            point2 = curve2_coords[j]
            intersection_point = (point1 + point2) / 2
            intersections.append({
                'position': intersection_point,
                'curve1_index': i,
                'curve2_index': j,
                'distance': distances[j]
            })

    return intersections

def analyze_pixel_color_at_intersection(image, intersection_point, color_ranges):
    """Analyze color dominance at intersection point"""
    x, y = int(intersection_point[0]), int(intersection_point[1])

    h, w = image.shape[:2]
    if 0 <= x < w and 0 <= y < h:
        hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
        pixel_hsv = hsv_image[y, x]

        color_distances = {}
        for color_name, (lower, upper) in color_ranges.items():
            center_hsv = [(lower[i] + upper[i]) / 2 for i in range(3)]
            distance = np.linalg.norm(pixel_hsv - center_hsv)
            color_distances[color_name] = distance

        closest_color = min(color_distances, key=color_distances.get)
        confidence = 1.0 / (1.0 + color_distances[closest_color] / 100)

        return closest_color, confidence

    return None, 0.0

intersection_analysis = {}

if len(extracted_curves) > 1:
    print("\nDetecting intersections between curves:")
    print("-" * 40)

    curve_names = list(extracted_curves.keys())

    for i in range(len(curve_names)):
        for j in range(i + 1, len(curve_names)):
            color1, color2 = curve_names[i], curve_names[j]

            coords1 = extracted_curves[color1]['pixel_coordinates']
            coords2 = extracted_curves[color2]['pixel_coordinates']

            intersections = find_curve_intersections(coords1, coords2)

            if intersections:
                print(f"{color1} â†” {color2}: {len(intersections)} intersections found")

                intersection_details = []
                for intersection in intersections:
                    dominant_color, confidence = analyze_pixel_color_at_intersection(
                        image_np, intersection['position'],
                        {color1: detected_colors[color1]['range'],
                         color2: detected_colors[color2]['range']}
                    )

                    intersection_details.append({
                        'position': intersection['position'],
                        'dominant_color': dominant_color,
                        'confidence': confidence,
                        'distance': intersection['distance']
                    })

                intersection_analysis[f"{color1}_{color2}"] = {
                    'intersections': intersection_details,
                    'count': len(intersections),
                    'quality': np.mean([detail['confidence'] for detail in intersection_details])
                }

                avg_confidence = np.mean([detail['confidence'] for detail in intersection_details])
                print(f"Average resolution confidence: {avg_confidence:.2f}")

            else:
                print(f"{color1} â†” {color2}: No intersections detected")

    total_intersections = sum(data['count'] for data in intersection_analysis.values())
    if total_intersections > 0:
        avg_quality = np.mean([data['quality'] for data in intersection_analysis.values()])
        print(f"\nIntersection Summary:")
        print(f"Total intersections: {total_intersections}")
        print(f"Average resolution quality: {avg_quality:.2f}")

        if avg_quality < 0.5:
            print("Some intersections have low resolution confidence")
    else:
        print("\nNo curve intersections detected - clean extraction")

else:
    print("Only one curve detected - no intersection analysis needed")

In [None]:
#CELL 10
print("CURVE EXTRACTION QUALITY ASSESSMENT")

def calculate_overall_quality_score(curve_data):
    point_count = curve_data['point_count']
    density_score = min(point_count / 200, 1.0)
    overall_quality = density_score
    return overall_quality, {'density_score': density_score}

quality_assessment = {}
overall_success_metrics = {'high_quality': 0, 'medium_quality': 0, 'low_quality': 0}

print("\nIndividual Curve Quality Analysis:")

for color_name, curve_data in extracted_curves.items():
    overall_quality, quality_details = calculate_overall_quality_score(curve_data)

    if overall_quality >= 0.75:
        quality_level = "HIGH"
        overall_success_metrics['high_quality'] += 1
    elif overall_quality >= 0.5:
        quality_level = "MEDIUM"
        overall_success_metrics['medium_quality'] += 1
    else:
        quality_level = "LOW"
        overall_success_metrics['low_quality'] += 1

    quality_assessment[color_name] = {
        'overall_quality': overall_quality,
        'quality_level': quality_level
    }

    print(f"\n{color_name.upper()} CURVE - {quality_level} QUALITY")
    print(f"Overall Score: {overall_quality:.2f}")
    print(f"Points Extracted: {curve_data['point_count']}")

print(f"\nOVERALL EXTRACTION SUMMARY")
print(f"High quality extractions: {overall_success_metrics['high_quality']}")
print(f"Medium quality extractions: {overall_success_metrics['medium_quality']}")
print(f"Low quality extractions: {overall_success_metrics['low_quality']}")

In [None]:
#CELL 11
print("VISUAL VALIDATION - CONFIRM EXTRACTION QUALITY")

excel_export_approved = False
print("Export approval status RESET - you must click a button below to proceed")

def create_curve_overlay_visualization(original_image, extracted_curves):
    fig = plt.figure(figsize=(20, 12))
    ax1 = plt.subplot(2, 3, 1)
    ax1.imshow(original_image)
    ax1.set_title('Original Image with Extracted Curves', fontweight='bold')
    ax1.axis('off')

    colors = ['red', 'blue', 'green', 'gold', 'teal', 'purple', 'orange', 'brown', 'gray', 'cyan', 'magenta', 'darkblue', 'darkgreen']

    for i, (color_name, curve_data) in enumerate(extracted_curves.items()):
        coords = curve_data['pixel_coordinates']
        if len(coords) > 0:
            plot_color = colors[i % len(colors)]

            if len(coords) > 200:
                step = max(1, len(coords) // 200)
                display_coords = coords[::step]
            else:
                display_coords = coords

            ax1.plot(display_coords[:, 0], display_coords[:, 1],
                    color=plot_color, linewidth=2, alpha=0.8,
                    label=f'{color_name} ({len(coords)} pts)')

    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    return fig

print("\nCreating comprehensive visualization...")
if 'extracted_curves' in globals() and extracted_curves:
    visualization_figure = create_curve_overlay_visualization(image_np, extracted_curves)
    plt.show(visualization_figure)

    print(f"\nVALIDATION SUMMARY:")
    print(f"Curves analyzed: {len(extracted_curves)}")
    for color_name, curve_data in extracted_curves.items():
        print(f"  {color_name}: {curve_data['point_count']} points extracted")

    print("DATA EXPORT OPTION")
    print(f"Available curves for export: {list(extracted_curves.keys())}")
    print(f"Total data points ready: {sum(len(curve['pixel_coordinates']) for curve in extracted_curves.values())}")

    print("\nEXCEL EXPORT DECISION:")
    print("Click the button below to approve Excel export:")

    export_button = widgets.Button(
        description='APPROVE EXCEL EXPORT',
        button_style='success',
        layout=widgets.Layout(width='300px', height='50px')
    )

    decline_button = widgets.Button(
        description='DECLINE EXCEL EXPORT',
        button_style='danger',
        layout=widgets.Layout(width='300px', height='50px')
    )

    output = widgets.Output()

    def on_approve_clicked(b):
        import __main__
        __main__.excel_export_approved = True
        with output:
            output.clear_output()
            print("Excel export APPROVED!")
            print("Creating Excel file...")

            try:
                if 'extracted_curves' not in globals() or not extracted_curves:
                    print("ERROR: No extracted curves found!")
                    return

                print(f"Found {len(extracted_curves)} curves for export")

                def convert_pixel_to_scientific(pixel_coords, axis_calibration):
                    x_pixel = axis_calibration['x_pixel']
                    y_pixel = axis_calibration['y_pixel']
                    x_real = axis_calibration['x_real']
                    y_real = axis_calibration['y_real']
                    x_scale = axis_calibration.get('x_scale', 'linear')
                    y_scale = axis_calibration.get('y_scale', 'linear')

                    scientific_x = []
                    scientific_y = []

                    for px, py in pixel_coords:
                        if x_scale == 'linear':
                            x_scientific = x_real[0] + (px - x_pixel[0]) * (x_real[1] - x_real[0]) / (x_pixel[1] - x_pixel[0])
                        elif x_scale == 'log10':
                            log_x_real = [np.log10(max(x_real[0], 1e-10)), np.log10(max(x_real[1], 1e-10))]
                            log_x_interp = log_x_real[0] + (px - x_pixel[0]) * (log_x_real[1] - log_x_real[0]) / (x_pixel[1] - x_pixel[0])
                            x_scientific = 10 ** log_x_interp
                        else:
                            x_scientific = px

                        if y_scale == 'linear':
                            y_scientific = y_real[0] + (py - y_pixel[0]) * (y_real[1] - y_real[0]) / (y_pixel[1] - y_pixel[0])
                        elif y_scale == 'log10':
                            log_y_real = [np.log10(max(y_real[0], 1e-10)), np.log10(max(y_real[1], 1e-10))]
                            log_y_interp = log_y_real[0] + (py - y_pixel[0]) * (log_y_real[1] - log_y_real[0]) / (y_pixel[1] - y_pixel[0])
                            y_scientific = 10 ** log_y_interp
                        else:
                            y_scientific = py

                        scientific_x.append(x_scientific)
                        scientific_y.append(y_scientific)

                    return np.array(scientific_x), np.array(scientific_y)

                def create_excel_export():

                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    filename = f"curve_data_{timestamp}.xlsx"

                    print(f"Creating Excel file: {filename}")

                    if 'axis_calibration' in globals():
                        axis_cal = axis_calibration
                        print("Using axis calibration from pipeline")
                    else:
                        h, w = image_np.shape[:2]
                        axis_cal = {
                            'x_pixel': [int(w * 0.15), int(w * 0.90)],
                            'y_pixel': [int(h * 0.85), int(h * 0.10)],
                            'x_real': [0, 1000],
                            'y_real': [0.001, 10],
                            'x_scale': 'linear',
                            'y_scale': 'log10'
                        }
                        print("Using fallback axis calibration")

                    with pd.ExcelWriter(filename, engine='xlsxwriter') as writer:
                        for color_name, curve_data in extracted_curves.items():
                            print(f"Processing {color_name} curve...")

                            pixel_coords = curve_data['pixel_coordinates']

                            x_scientific, y_scientific = convert_pixel_to_scientific(pixel_coords, axis_cal)

                            print(f"      X range: {np.min(x_scientific):.3f} to {np.max(x_scientific):.3f}")
                            print(f"      Y range: {np.min(y_scientific):.6f} to {np.max(y_scientific):.6f}")

                            simple_data = pd.DataFrame({
                                'X': x_scientific,
                                'Y': y_scientific
                            })

                            sheet_name = color_name[:31]
                            simple_data.to_excel(writer, sheet_name=sheet_name, index=False)

                            print(f"{color_name}: {len(x_scientific)} data points exported")

                    print(f"Excel file created successfully: {filename}")

                    import os
                    if os.path.exists(filename):
                        file_size = os.path.getsize(filename)
                        print(f"File confirmed: {file_size} bytes")
                    else:
                        print(f"ERROR: File was not created!")
                        return False

                    try:
                        files.download(filename)
                        print(f"File download initiated!")
                        return True
                    except Exception as download_error:
                        print(f"Auto-download failed: {str(download_error)}")
                        print(f"File saved as: {filename}")
                        return True

                export_success = create_excel_export()

                if export_success:
                    print(f"\nEXCEL EXPORT COMPLETE!")

                    total_points = sum(curve['point_count'] for curve in extracted_curves.values())
                    print(f"\nEXPORT SUMMARY:")
                    print(f"Curves exported: {', '.join(extracted_curves.keys())}")
                    print(f"Total data points: {total_points}")
                else:
                    print(f"\nEXCEL EXPORT FAILED!")

            except Exception as e:
                print(f"Excel export failed with error: {str(e)}")
                import traceback
                traceback.print_exc()

    def on_decline_clicked(b):
        import __main__
        __main__.excel_export_approved = False
        with output:
            output.clear_output()
            print("Excel export DECLINED.")
            print("You can click APPROVE above if you change your mind.")

    export_button.on_click(on_approve_clicked)
    decline_button.on_click(on_decline_clicked)

    button_box = widgets.HBox([export_button, decline_button])
    display(button_box)
    display(output)

    print(f"\nVISUALIZATION COMPLETE!")
    print(f"Use the buttons above to approve/decline Excel export")

else:
    print("\nNo curves found for visualization!")
    print("Please run the extraction pipeline first:")
    print("Go to Cell 5")
    print("Ask: 'extract the black line' (or your desired color)")
    print("Wait for pipeline to complete")
    print("Then run this Cell 11 again")

print(f"\nLine Extraction Process Complete!")