## Notebook Converter 1

In [1]:
CHILL_BROWN='#948979'
SOLARIZED_BACKGROUND_COLOR='#fdf4e0'
CELL_WIDTH=800
CANVAS_WIDTH=950
OUTPUT_FONT_SIZE=11

import json
import re
import base64
import io
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

def wrap_text(text, max_chars_per_line=80, font_size=12):
    """
    Wrap long text to fit within specified character width.
    
    Args:
        text (str): Text to wrap
        max_chars_per_line (int): Maximum characters per line
        font_size (int): Font size (used to estimate character width)
    
    Returns:
        str: Text with newlines inserted for wrapping
    """
    if not text:
        return text
    
    lines = text.split('\n')
    wrapped_lines = []
    
    for line in lines:
        if len(line) <= max_chars_per_line:
            wrapped_lines.append(line)
        else:
            # Break long lines at word boundaries when possible
            words = line.split(' ')
            current_line = ''
            
            for word in words:
                # If adding this word would exceed the limit
                if len(current_line + ' ' + word) > max_chars_per_line:
                    if current_line:  # If we have content in current line
                        wrapped_lines.append(current_line)
                        current_line = word
                    else:  # Word itself is longer than max_chars_per_line
                        # Break the word itself
                        while len(word) > max_chars_per_line:
                            wrapped_lines.append(word[:max_chars_per_line])
                            word = word[max_chars_per_line:]
                        current_line = word
                else:
                    if current_line:
                        current_line += ' ' + word
                    else:
                        current_line = word
            
            if current_line:
                wrapped_lines.append(current_line)
    
    return '\n'.join(wrapped_lines)

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') == 'code':
        # Extract source code
        source = cell.get('source', [])
        if isinstance(source, list):
            code = ''.join(source)
        else:
            code = source
        
        # Extract execution count
        execution_count = cell.get('execution_count')
        
        # Extract outputs
        outputs = cell.get('outputs', [])
        output_items = []  # Changed to list to handle multiple output types
        
        for output in outputs:
            if 'data' in output:
                # Handle different output types
                if 'text/plain' in output['data']:
                    plain_text = output['data']['text/plain']
                    if isinstance(plain_text, list):
                        text_content = ''.join(plain_text)
                    else:
                        text_content = plain_text
                    output_items.append({'type': 'text', 'content': text_content.rstrip()})
                
                # Handle image outputs
                if 'image/png' in output['data']:
                    png_data = output['data']['image/png']
                    # Remove any whitespace/newlines from base64 data
                    png_data = ''.join(png_data.split()) if isinstance(png_data, str) else png_data
                    output_items.append({'type': 'image', 'format': 'png', 'data': png_data})
                
                if 'image/jpeg' in output['data']:
                    jpeg_data = output['data']['image/jpeg']
                    jpeg_data = ''.join(jpeg_data.split()) if isinstance(jpeg_data, str) else jpeg_data
                    output_items.append({'type': 'image', 'format': 'jpeg', 'data': jpeg_data})
                
                if 'image/svg+xml' in output['data']:
                    svg_data = output['data']['image/svg+xml']
                    if isinstance(svg_data, list):
                        svg_data = ''.join(svg_data)
                    output_items.append({'type': 'image', 'format': 'svg', 'data': svg_data})
                    
            elif 'text' in output:
                # Handle direct text output
                text_content = output['text']
                if isinstance(text_content, list):
                    text_content = ''.join(text_content)
                output_items.append({'type': 'text', 'content': text_content.rstrip()})
        
        return code.strip(), output_items, execution_count
    
    elif cell.get('cell_type') == 'markdown':
        # Extract markdown source
        source = cell.get('source', [])
        if isinstance(source, list):
            markdown_text = ''.join(source)
        else:
            markdown_text = source
        
        return markdown_text.strip(), None, None
    
    return None, None, None

def get_image_dimensions(image_data, image_format, max_width=CELL_WIDTH):
    """
    Get or estimate image dimensions. For now, we'll use a simple approach.
    In a more complete implementation, you might want to decode the image to get actual dimensions.
    
    Args:
        image_data: Base64 encoded image data or SVG string
        image_format: 'png', 'jpeg', or 'svg'
        max_width: Maximum allowed width
    
    Returns:
        tuple: (width, height)
    """
    if image_format == 'svg':
        # Try to extract dimensions from SVG
        try:
            # Simple regex to find width/height attributes
            width_match = re.search(r'width=["\'](\d+(?:\.\d+)?)', image_data)
            height_match = re.search(r'height=["\'](\d+(?:\.\d+)?)', image_data)
            
            if width_match and height_match:
                width = float(width_match.group(1))
                height = float(height_match.group(1))
                
                # Scale if too wide
                if width > max_width:
                    scale = max_width / width
                    width = max_width
                    height = height * scale
                
                return int(width), int(height)
        except:
            pass
    
    # Default dimensions for PNG/JPEG or when SVG parsing fails
    # These could be improved by actually decoding the image
    default_width = min(400, max_width)
    default_height = 300
    return default_width, default_height

def create_image_element(image_data, image_format, x, y, max_width=CELL_WIDTH):
    """
    Create an SVG image element from base64 data or inline SVG.
    
    Args:
        image_data: Base64 encoded data (for PNG/JPEG) or SVG string
        image_format: 'png', 'jpeg', or 'svg'
        x, y: Position coordinates
        max_width: Maximum width for the image
    
    Returns:
        tuple: (svg_element, width, height)
    """
    width, height = get_image_dimensions(image_data, image_format, max_width)
    
    if image_format == 'svg':
        # For SVG, we can embed it directly
        try:
            # Wrap the SVG in a group and position it
            group = ET.Element('g')
            group.set('transform', f'translate({x}, {y})')
            
            # Parse the SVG content
            svg_root = ET.fromstring(image_data)
            
            # Set dimensions if not present
            svg_root.set('width', str(width))
            svg_root.set('height', str(height))
            
            group.append(svg_root)
            return group, width, height
            
        except ET.ParseError:
            # Fallback: create a placeholder rectangle
            rect = ET.Element('rect')
            rect.set('x', str(x))
            rect.set('y', str(y))
            rect.set('width', str(width))
            rect.set('height', str(height))
            rect.set('fill', '#f0f0f0')
            rect.set('stroke', '#ccc')
            rect.set('stroke-width', '1')
            return rect, width, height
    
    else:
        # For PNG/JPEG, use image element with data URL
        img = ET.Element('image')
        img.set('x', str(x))
        img.set('y', str(y))
        img.set('width', str(width))
        img.set('height', str(height))
        
        # Create data URL
        mime_type = f'image/{image_format}'
        data_url = f'data:{mime_type};base64,{image_data}'
        img.set('href', data_url)
        
        return img, width, height

def simple_markdown_to_text(markdown_text):
    """
    Convert basic markdown formatting to plain text with some simple styling hints.
    Returns a list of (text, style_info) tuples.
    """
    lines = markdown_text.split('\n')
    styled_lines = []
    
    for line in lines:
        line = line.strip()
        
        # Headers
        if line.startswith('# '):
            styled_lines.append((line[2:], {'type': 'h1', 'weight': 'bold', 'size': 18}))
        elif line.startswith('## '):
            styled_lines.append((line[3:], {'type': 'h2', 'weight': 'bold', 'size': 16}))
        elif line.startswith('### '):
            styled_lines.append((line[4:], {'type': 'h3', 'weight': 'bold', 'size': 14}))
        elif line.startswith('#### '):
            styled_lines.append((line[5:], {'type': 'h4', 'weight': 'bold', 'size': 13}))
        # Bold text (simple case)
        elif '**' in line:
            # Simple bold handling - just remove ** and mark as bold
            cleaned_line = line.replace('**', '')
            styled_lines.append((cleaned_line, {'type': 'bold', 'weight': 'bold', 'size': 12}))
        # Italic text (simple case)
        elif '*' in line and '**' not in line:
            # Simple italic handling
            cleaned_line = line.replace('*', '')
            styled_lines.append((cleaned_line, {'type': 'italic', 'style': 'italic', 'size': 12}))
        # Code blocks
        elif line.startswith('```'):
            continue  # Skip code block markers for now
        # Empty lines
        elif line == '':
            styled_lines.append(('', {'type': 'empty', 'size': 12}))
        # Regular text
        else:
            styled_lines.append((line, {'type': 'normal', 'size': 12}))
    
    return styled_lines

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal', font_style='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    if font_style != 'normal':
        text_elem.set('font-style', font_style)
    text_elem.text = text if text else ' '
    return text_elem

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4, max_width=None):
    """
    Create SVG text elements for multiline text with optional wrapping.
    
    Args:
        text (str): Text to render
        x, y (float): Starting position
        font_family, font_size, fill: Text styling
        line_height (float): Line height multiplier
        max_width (int): Maximum width in pixels for wrapping (optional)
    
    Returns:
        tuple: (text_group_element, total_height_used)
    """
    # Apply text wrapping if max_width is specified
    if max_width:
        # Estimate characters per line based on font size and width
        # This is a rough approximation - monospace fonts are ~0.6 * font_size wide per character
        chars_per_line = int(max_width / (font_size * 0.6))
        text = wrap_text(text, max_chars_per_line=chars_per_line, font_size=font_size)
    
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        # Create text element for all lines, including empty ones
        # For empty lines, we still need the element to maintain spacing
        text_elem = create_text_element(
            line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
            x, y + (i * font_size * line_height), 
            font_family, font_size, fill
        )
        text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_markdown_cell_svg_group(cell_data, y_offset, font_family='Menlo', max_width=800):
    """
    Create an SVG group element for a markdown cell at a given y offset.
    No background or borders - just styled text.
    """
    markdown_text, _, _ = extract_cell_data(cell_data)
    
    if markdown_text is None:
        return None, 0
    
    # Create group for this cell
    cell_group = ET.Element('g')
    
    # Constants for layout
    margin = 20
    line_height = 1.5
    cell_spacing = 15  # Space between cells
    
    # Parse markdown into styled lines
    styled_lines = simple_markdown_to_text(markdown_text)
    
    current_y = y_offset
    
    for text, style in styled_lines:
        font_size = style.get('size', 12)
        font_weight = style.get('weight', 'normal')
        font_style = style.get('style', 'normal')
        
        # Use a more readable font for markdown
        markdown_font = 'system-ui, -apple-system, sans-serif' if style['type'].startswith('h') else font_family
        
        # Create text element
        text_elem = create_text_element(
            text, margin, current_y + font_size,
            markdown_font, font_size, '#333', font_weight, font_style
        )
        cell_group.append(text_elem)
        
        # Calculate line height based on font size
        current_y += font_size * line_height
        
        # Add extra space after headers
        if style['type'].startswith('h'):
            current_y += font_size * 0.3
    
    # Calculate total height
    cell_height = current_y - y_offset + cell_spacing
    
    return cell_group, cell_height

def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
    """
    Create an SVG group element for a single cell at a given y offset.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        y_offset (float): Y position to start this cell
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width for the cell
    
    Returns:
        tuple: (svg_group_element, height_used)
    """
    
    # Check if this is a markdown cell
    if cell_data.get('cell_type') == 'markdown':
        return create_markdown_cell_svg_group(cell_data, y_offset, font_family, max_width)
    
    code, output_items, execution_count = extract_cell_data(cell_data)  # Updated to use output_items
    
    if code is None:
        return None, 0
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create group for this cell
    cell_group = ET.Element('g')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    cell_spacing = 10  # Space between cells
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position within this cell
    current_y = y_offset
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, CHILL_BROWN, 'normal'
    )
    cell_group.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code
    code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(CELL_WIDTH))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
    input_bg.set('stroke', CHILL_BROWN) 
    input_bg.set('opacity', '0.2') 
    input_bg.set('stroke-width', '1')
    cell_group.append(input_bg)
    
    # Add the syntax-highlighted code
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    cell_group.append(code_group)
    
    # Move to output section
    input_output_spacing = cell_padding
    current_y += code_height + input_output_spacing
    
    # Handle outputs if they exist
    if output_items:  # Updated to check output_items instead of output_text
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + OUTPUT_FONT_SIZE,
            font_family, OUTPUT_FONT_SIZE, CHILL_BROWN, 'normal'
        )
        cell_group.append(output_exec_count)
        
        # Process each output item
        for output_item in output_items:
            if output_item['type'] == 'text':
                # Handle text output with wrapping
                available_width = CELL_WIDTH - cell_padding * 2
                output_group, output_height = create_multiline_text(
                    output_item['content'], code_x, current_y + OUTPUT_FONT_SIZE,
                    font_family, OUTPUT_FONT_SIZE, '#333', 
                    max_width=available_width
                )
                cell_group.append(output_group)
                current_y += output_height + cell_padding
                
            elif output_item['type'] == 'image':
                # Handle image output
                image_elem, img_width, img_height = create_image_element(
                    output_item['data'], output_item['format'], 
                    code_x, current_y + cell_padding, max_width=CELL_WIDTH - cell_padding * 2
                )
                cell_group.append(image_elem)
                current_y += img_height + cell_padding * 2  # Extra spacing after images
    
    # Calculate the total cell height
    cell_height = current_y - y_offset
    
    # Add spacing after cell
    cell_height += cell_spacing
        
    return cell_group, cell_height

In [2]:
def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900, split_sections=False):
    """
    Process an entire Jupyter notebook and convert all code and markdown cells to SVG file(s).
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_path (str): Path to save the SVG file(s) (can include directory)
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width of the SVG
        split_sections (bool): If True, create separate SVG files for each section (# or ## headers)
    
    Returns:
        str or list: SVG content as string (single file) or list of SVG contents (multiple files)
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    if not split_sections:
        # Original behavior - single SVG file
        return _create_single_svg(notebook, output_path, font_family, font_size, max_width)
    
    # New behavior - split into sections
    return _create_section_svgs(notebook, output_path, font_family, font_size, max_width)

def _is_section_header(cell):
    """Check if a markdown cell starts with # or ## (new section)."""
    if cell.get('cell_type') != 'markdown':
        return False
    
    markdown_text, _, _ = extract_cell_data(cell)
    if not markdown_text:
        return False
    
    # Check if any line starts with # or ## (but not ###, ####, etc.)
    lines = markdown_text.strip().split('\n')
    for line in lines:
        line = line.strip()
        if line.startswith('## ') and not line.startswith('### '):
            return True
        if line.startswith('# ') and not line.startswith('## '):
            return True
    
    return False

def _get_section_name(cell):
    """Extract section name from a header markdown cell."""
    markdown_text, _, _ = extract_cell_data(cell)
    if not markdown_text:
        return "untitled"
    
    lines = markdown_text.strip().split('\n')
    for line in lines:
        line = line.strip()
        if line.startswith('# '):
            return line[2:].strip()
        elif line.startswith('## '):
            return line[3:].strip()
    
    return "untitled"

def _sanitize_filename(name):
    """Convert section name to a safe filename."""
    import re
    # Remove or replace unsafe characters
    name = re.sub(r'[<>:"/\\|?*]', '_', name)
    # Replace spaces with underscores
    name = name.replace(' ', '_')
    # Remove multiple underscores
    name = re.sub(r'_+', '_', name)
    # Remove leading/trailing underscores
    name = name.strip('_')
    # Limit length
    name = name[:50]
    return name or "section"

def _create_section_svgs(notebook, output_path, font_family, font_size, max_width):
    """Create separate SVG files for each section."""
    import os
    
    cells = notebook.get('cells', [])
    if not cells:
        print("No cells found in notebook")
        return []
    
    # Determine output directory and base filename
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    base_name = os.path.splitext(os.path.basename(output_path))[0]
    
    sections = []
    current_section_cells = []
    current_section_name = "introduction"  # Default name for cells before first header
    section_counter = 0
    
    # Group cells into sections
    for i, cell in enumerate(cells):
        if _is_section_header(cell):
            # Save previous section if it has content
            if current_section_cells:
                sections.append({
                    'name': current_section_name,
                    'cells': current_section_cells.copy(),
                    'number': section_counter
                })
                section_counter += 1
            
            # Start new section
            current_section_name = _get_section_name(cell)
            current_section_cells = [cell]  # Include the header cell
        else:
            current_section_cells.append(cell)
    
    # Don't forget the last section
    if current_section_cells:
        sections.append({
            'name': current_section_name,
            'cells': current_section_cells.copy(),
            'number': section_counter
        })
    
    if not sections:
        print("No sections found - creating single file")
        return _create_single_svg(notebook, output_path, font_family, font_size, max_width)
    
    # Create SVG for each section
    svg_contents = []
    created_files = []
    
    for section in sections:
        # Create filename
        safe_name = _sanitize_filename(section['name'])
        section_filename = f"{base_name}_{section['number']:02d}_{safe_name}.svg"
        section_path = os.path.join(output_dir, section_filename)
        
        # Create SVG for this section
        svg_content = _create_svg_from_cells(
            section['cells'], section_path, font_family, font_size, max_width
        )
        
        if svg_content:
            svg_contents.append(svg_content)
            created_files.append(section_path)
    
    print(f"Created {len(created_files)} section SVGs:")
    for file_path in created_files:
        print(f"  - {file_path}")
    
    return svg_contents

def _create_svg_from_cells(cells, output_path, font_family, font_size, max_width):
    """Create an SVG from a list of cells."""
    import os
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    margin = 20
    current_y = margin
    actual_width = 0
    processed_cell_count = 0
    
    # Process each cell
    for cell in cells:
        cell_type = cell.get('cell_type')
        
        if cell_type == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                processed_cell_count += 1
                
                cell_group, cell_height = create_cell_svg_group(
                    cell, current_y, font_family, font_size, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width
                    code_lines = code.split('\n')
                    max_line_length = max(len(line) for line in code_lines) if code_lines else 0
                    estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
        
        elif cell_type == 'markdown':
            markdown_text, _, _ = extract_cell_data(cell)
            if markdown_text and markdown_text.strip():
                processed_cell_count += 1
                
                cell_group, cell_height = create_markdown_cell_svg_group(
                    cell, current_y, font_family, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width
                    lines = markdown_text.split('\n')
                    max_line_length = max(len(line) for line in lines) if lines else 0
                    estimated_width = margin + max_line_length * 8 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
    
    if processed_cell_count == 0:
        print(f"No content found for section: {output_path}")
        return None
    
    # Final dimensions
    total_height = current_y + margin
    final_width = max(CANVAS_WIDTH, actual_width)
    
    svg.set('width', f'{final_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
    # Add background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(final_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    # Save to file
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(svg_string)
    
    return svg_string

def _create_single_svg(notebook, output_path, font_family, font_size, max_width):
    """Original single SVG creation logic."""
    import os
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    margin = 20
    current_y = margin
    actual_width = 0
    
    # Process each cell (both code and markdown)
    processed_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        cell_type = cell.get('cell_type')
        
        if cell_type == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                processed_cell_count += 1
                
                # Create SVG group for this cell
                cell_group, cell_height = create_cell_svg_group(
                    cell, current_y, font_family, font_size, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width based on content
                    code_lines = code.split('\n')
                    max_line_length = max(len(line) for line in code_lines) if code_lines else 0
                    estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
        
        elif cell_type == 'markdown':
            markdown_text, _, _ = extract_cell_data(cell)
            if markdown_text and markdown_text.strip():
                processed_cell_count += 1
                
                # Create SVG group for this markdown cell
                cell_group, cell_height = create_markdown_cell_svg_group(
                    cell, current_y, font_family, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width for markdown (simpler estimation)
                    lines = markdown_text.split('\n')
                    max_line_length = max(len(line) for line in lines) if lines else 0
                    estimated_width = margin + max_line_length * 8 + 40  # Rough character width
                    actual_width = max(actual_width, min(estimated_width, max_width))
    
    if processed_cell_count == 0:
        print("No code or markdown cells found in notebook")
        return None
    
    # Final dimensions
    total_height = current_y + margin
    final_width = max(CANVAS_WIDTH, actual_width)
    
    svg.set('width', f'{final_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(final_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    # Save to file
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(svg_string)
    
    print(f"Combined notebook SVG saved to: {output_path}")
    print(f"Processed {processed_cell_count} cells (code and markdown)")
    
    return svg_string

In [3]:
import base64
import os
from PIL import Image
import io
def extract_and_save_output_images(notebook_path, output_dir='output_images', image_prefix='img'):
    """
    Extract all output images from a Jupyter notebook and save them as PNG files.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_dir (str): Directory to save the PNG files
        image_prefix (str): Prefix for image filenames
    
    Returns:
        list: List of saved image file paths
    """
    import json
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    os.makedirs(output_dir, exist_ok=True)
    saved_images = []
    image_counter = 0
    
    for cell_idx, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            outputs = cell.get('outputs', [])
            
            for output_idx, output in enumerate(outputs):
                # Handle display_data and execute_result outputs
                if output.get('output_type') in ['display_data', 'execute_result']:
                    data = output.get('data', {})
                    
                    # Look for image data
                    for mime_type in ['image/png', 'image/jpeg', 'image/jpg']:
                        if mime_type in data:
                            image_data = data[mime_type]
                            
                            # Handle both string and list formats
                            if isinstance(image_data, list):
                                image_data = ''.join(image_data)
                            
                            try:
                                # Decode base64 image data
                                image_bytes = base64.b64decode(image_data)
                                
                                # Open with PIL and convert to PNG
                                image = Image.open(io.BytesIO(image_bytes))
                                
                                # Generate filename
                                extension = 'png'  # Always save as PNG for consistency
                                filename = f"{image_prefix}_{image_counter:03d}_cell{cell_idx}.{extension}"
                                filepath = os.path.join(output_dir, filename)
                                
                                # Save as PNG
                                if image.mode in ('RGBA', 'LA', 'P'):
                                    # Handle transparency
                                    image.save(filepath, 'PNG')
                                else:
                                    # Convert to RGB for JPEG sources
                                    if image.mode != 'RGB':
                                        image = image.convert('RGB')
                                    image.save(filepath, 'PNG')
                                
                                saved_images.append(filepath)
                                image_counter += 1
                                
                                print(f"Saved image: {filepath}")
                                
                            except Exception as e:
                                print(f"Failed to process image in cell {cell_idx}, output {output_idx}: {e}")
    
    print(f"Extracted {len(saved_images)} images to {output_dir}")
    return saved_images

def process_jupyter_notebook_with_png_extraction(notebook_path, output_path='notebook.svg', 
                                                font_family='Menlo', font_size=12, max_width=900, 
                                                split_sections=False, extract_images=True, 
                                                image_output_dir=None):
    """
    Enhanced version of your original function that also extracts PNG images.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_path (str): Path to save the SVG file(s)
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width of the SVG
        split_sections (bool): If True, create separate SVG files for each section
        extract_images (bool): If True, extract output images as PNG files
        image_output_dir (str): Directory for PNG images (defaults to same dir as SVG with '_images' suffix)
    
    Returns:
        dict: Dictionary with 'svg_content' and 'image_paths' keys
    """
    # Process the SVG as usual (call your existing function)
    svg_result = process_jupyter_notebook(notebook_path, output_path, font_family, 
                                        font_size, max_width, split_sections)
    
    result = {'svg_content': svg_result, 'image_paths': []}
    
    # Extract images if requested
    if extract_images:
        if image_output_dir is None:
            # Create image directory based on SVG output path
            svg_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
            svg_base = os.path.splitext(os.path.basename(output_path))[0]
            image_output_dir = os.path.join(svg_dir, f"{svg_base}_images")
        
        image_paths = extract_and_save_output_images(notebook_path, image_output_dir)
        result['image_paths'] = image_paths
    
    return result

def create_image_reference_mapping(notebook_path, image_output_dir):
    """
    Create a mapping of cell indices to their corresponding PNG image files.
    This can help you manually reference the correct images in your book.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        image_output_dir (str): Directory containing the PNG files
    
    Returns:
        dict: Mapping of cell_index -> list of image filenames
    """
    import json
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    mapping = {}
    image_counter = 0
    
    for cell_idx, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            outputs = cell.get('outputs', [])
            cell_images = []
            
            for output_idx, output in enumerate(outputs):
                if output.get('output_type') in ['display_data', 'execute_result']:
                    data = output.get('data', {})
                    
                    # Check if this output has image data
                    has_image = any(mime_type in data for mime_type in ['image/png', 'image/jpeg', 'image/jpg'])
                    
                    if has_image:
                        filename = f"img_{image_counter:03d}_cell{cell_idx}.png"
                        cell_images.append(filename)
                        image_counter += 1
            
            if cell_images:
                mapping[cell_idx] = cell_images
    
    return mapping

# Example usage function
def process_notebook_for_book(notebook_path, output_base_name=None):
    """
    Convenience function that processes a notebook with all the bells and whistles.
    """
    if output_base_name is None:
        output_base_name = os.path.splitext(os.path.basename(notebook_path))[0]
    
    # Process with section splitting and image extraction
    result = process_jupyter_notebook_with_png_extraction(
        notebook_path=notebook_path,
        output_path=f"{output_base_name}.svg",
        split_sections=True,
        extract_images=True
    )
    
    # Create reference mapping
    image_dir = f"{output_base_name}_images"
    mapping = create_image_reference_mapping(notebook_path, image_dir)
    
    # Save mapping as JSON for reference
    mapping_path = f"{output_base_name}_image_mapping.json"
    with open(mapping_path, 'w') as f:
        json.dump(mapping, f, indent=2)
    
    print(f"\nProcessing complete!")
    print(f"SVG files: {len(result['svg_content']) if isinstance(result['svg_content'], list) else 1}")
    print(f"PNG images: {len(result['image_paths'])}")
    print(f"Image mapping saved to: {mapping_path}")
    
    return result, mapping

In [4]:
in_path="/Users/stephen/ai_book/1_perceptron.ipynb"
out_path="/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/"

process_jupyter_notebook_with_png_extraction(in_path, out_path, split_sections=True, max_width=900);
image_paths = extract_and_save_output_images(in_path, out_path)

Created 5 section SVGs:
  - /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_00_1._Perceptron_Supporting_Code.svg
  - /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_01_1.1_Ts_vs_Js_-_Step_Through_Manually.svg
  - /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_02_1.2_Ts_vs_Js_-_Step_through_in_automated_loop.svg
  - /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_03_1.3_Two_input_perceptron_-_solvable_case..svg
  - /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_04_Example_4_-_Two_input_perceptron_-_unsolvable_XOR_.svg
Saved image: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_images/img_000_cell4.png
Saved image: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets

In [6]:
in_path="/Users/stephen/ai_book/1_perceptron_cats_vs_dogs.ipynb"
out_path="/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/"

process_jupyter_notebook_with_png_extraction(in_path, out_path, split_sections=True, max_width=900);
image_paths = extract_and_save_output_images(in_path, out_path)

Created 1 section SVGs:
  - /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_00_1.5_Cats_Vs_Dogs_Example.svg
Saved image: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_images/img_000_cell11.png
Extracted 1 images to /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/_images
Saved image: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/img_000_cell11.png
Extracted 1 images to /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/1_perceptron/Design/Assets/code_renders/


In [5]:
# in_path="/Users/stephen/ai_book/3_backpropagation.ipynb"
# out_path="/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/code_renders/"
# # out_path="/home/ tephen/book_chapter_3_1.svg"

# process_jupyter_notebook(in_path, out_path, split_sections=True);
# image_paths = extract_and_save_output_images(in_path, out_path)

In [6]:
# out_path='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'
# process_jupyter_cell(example_cell_with_output, out_path)

---

In [7]:
# def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900, split_sections=False):
#     """
#     Process an entire Jupyter notebook and convert all code and markdown cells to SVG file(s).
    
#     Args:
#         notebook_path (str): Path to the .ipynb file
#         output_path (str): Path to save the SVG file(s) (can include directory)
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width of the SVG
#         split_sections (bool): If True, create separate SVG files for each section (# or ## headers)
    
#     Returns:
#         str or list: SVG content as string (single file) or list of SVG contents (multiple files)
#     """
#     import os
    
#     with open(notebook_path, 'r', encoding='utf-8') as f:
#         notebook = json.load(f)
    
#     if not split_sections:
#         # Original behavior - single SVG file
#         return _create_single_svg(notebook, output_path, font_family, font_size, max_width)
    
#     # New behavior - split into sections
#     return _create_section_svgs(notebook, output_path, font_family, font_size, max_width)

# def _is_section_header(cell):
#     """Check if a markdown cell starts with # or ## (new section)."""
#     if cell.get('cell_type') != 'markdown':
#         return False
    
#     markdown_text, _, _ = extract_cell_data(cell)
#     if not markdown_text:
#         return False
    
#     # Check if any line starts with # or ## (but not ###, ####, etc.)
#     lines = markdown_text.strip().split('\n')
#     for line in lines:
#         line = line.strip()
#         if line.startswith('## ') and not line.startswith('### '):
#             return True
#         if line.startswith('# ') and not line.startswith('## '):
#             return True
    
#     return False

# def _get_section_name(cell):
#     """Extract section name from a header markdown cell."""
#     markdown_text, _, _ = extract_cell_data(cell)
#     if not markdown_text:
#         return "untitled"
    
#     lines = markdown_text.strip().split('\n')
#     for line in lines:
#         line = line.strip()
#         if line.startswith('# '):
#             return line[2:].strip()
#         elif line.startswith('## '):
#             return line[3:].strip()
    
#     return "untitled"

# def _sanitize_filename(name):
#     """Convert section name to a safe filename."""
#     import re
#     # Remove or replace unsafe characters
#     name = re.sub(r'[<>:"/\\|?*]', '_', name)
#     # Replace spaces with underscores
#     name = name.replace(' ', '_')
#     # Remove multiple underscores
#     name = re.sub(r'_+', '_', name)
#     # Remove leading/trailing underscores
#     name = name.strip('_')
#     # Limit length
#     name = name[:50]
#     return name or "section"

# def _create_section_svgs(notebook, output_path, font_family, font_size, max_width):
#     """Create separate SVG files for each section."""
#     import os
    
#     cells = notebook.get('cells', [])
#     if not cells:
#         print("No cells found in notebook")
#         return []
    
#     # Determine output directory and base filename
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     base_name = os.path.splitext(os.path.basename(output_path))[0]
    
#     sections = []
#     current_section_cells = []
#     current_section_name = "introduction"  # Default name for cells before first header
#     section_counter = 0
    
#     # Group cells into sections
#     for i, cell in enumerate(cells):
#         if _is_section_header(cell):
#             # Save previous section if it has content
#             if current_section_cells:
#                 sections.append({
#                     'name': current_section_name,
#                     'cells': current_section_cells.copy(),
#                     'number': section_counter
#                 })
#                 section_counter += 1
            
#             # Start new section
#             current_section_name = _get_section_name(cell)
#             current_section_cells = [cell]  # Include the header cell
#         else:
#             current_section_cells.append(cell)
    
#     # Don't forget the last section
#     if current_section_cells:
#         sections.append({
#             'name': current_section_name,
#             'cells': current_section_cells.copy(),
#             'number': section_counter
#         })
    
#     if not sections:
#         print("No sections found - creating single file")
#         return _create_single_svg(notebook, output_path, font_family, font_size, max_width)
    
#     # Create SVG for each section
#     svg_contents = []
#     created_files = []
    
#     for section in sections:
#         # Create filename
#         safe_name = _sanitize_filename(section['name'])
#         section_filename = f"{base_name}_{section['number']:02d}_{safe_name}.svg"
#         section_path = os.path.join(output_dir, section_filename)
        
#         # Create SVG for this section
#         svg_content = _create_svg_from_cells(
#             section['cells'], section_path, font_family, font_size, max_width
#         )
        
#         if svg_content:
#             svg_contents.append(svg_content)
#             created_files.append(section_path)
    
#     print(f"Created {len(created_files)} section SVGs:")
#     for file_path in created_files:
#         print(f"  - {file_path}")
    
#     return svg_contents

# def _create_svg_from_cells(cells, output_path, font_family, font_size, max_width):
#     """Create an SVG from a list of cells."""
#     import os
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     margin = 20
#     current_y = margin
#     actual_width = 0
#     processed_cell_count = 0
    
#     # Process each cell
#     for cell in cells:
#         cell_type = cell.get('cell_type')
        
#         if cell_type == 'code':
#             code, _, _ = extract_cell_data(cell)
#             if code and code.strip():
#                 processed_cell_count += 1
                
#                 cell_group, cell_height = create_cell_svg_group(
#                     cell, current_y, font_family, font_size, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width
#                     code_lines = code.split('\n')
#                     max_line_length = max(len(line) for line in code_lines) if code_lines else 0
#                     estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
        
#         elif cell_type == 'markdown':
#             markdown_text, _, _ = extract_cell_data(cell)
#             if markdown_text and markdown_text.strip():
#                 processed_cell_count += 1
                
#                 cell_group, cell_height = create_markdown_cell_svg_group(
#                     cell, current_y, font_family, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width
#                     lines = markdown_text.split('\n')
#                     max_line_length = max(len(line) for line in lines) if lines else 0
#                     estimated_width = margin + max_line_length * 8 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
    
#     if processed_cell_count == 0:
#         print(f"No content found for section: {output_path}")
#         return None
    
#     # Final dimensions
#     total_height = current_y + margin
#     final_width = max(CANVAS_WIDTH, actual_width)
    
#     svg.set('width', f'{final_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
#     # Add background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(final_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     # Save to file
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     os.makedirs(output_dir, exist_ok=True)
    
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(svg_string)
    
#     return svg_string

# def _create_single_svg(notebook, output_path, font_family, font_size, max_width):
#     """Original single SVG creation logic."""
#     import os
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     margin = 20
#     current_y = margin
#     actual_width = 0
    
#     # Process each cell (both code and markdown)
#     processed_cell_count = 0
#     for i, cell in enumerate(notebook.get('cells', [])):
#         cell_type = cell.get('cell_type')
        
#         if cell_type == 'code':
#             code, _, _ = extract_cell_data(cell)
#             if code and code.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this cell
#                 cell_group, cell_height = create_cell_svg_group(
#                     cell, current_y, font_family, font_size, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width based on content
#                     code_lines = code.split('\n')
#                     max_line_length = max(len(line) for line in code_lines) if code_lines else 0
#                     estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
        
#         elif cell_type == 'markdown':
#             markdown_text, _, _ = extract_cell_data(cell)
#             if markdown_text and markdown_text.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this markdown cell
#                 cell_group, cell_height = create_markdown_cell_svg_group(
#                     cell, current_y, font_family, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width for markdown (simpler estimation)
#                     lines = markdown_text.split('\n')
#                     max_line_length = max(len(line) for line in lines) if lines else 0
#                     estimated_width = margin + max_line_length * 8 + 40  # Rough character width
#                     actual_width = max(actual_width, min(estimated_width, max_width))
    
#     if processed_cell_count == 0:
#         print("No code or markdown cells found in notebook")
#         return None
    
#     # Final dimensions
#     total_height = current_y + margin
#     final_width = max(CANVAS_WIDTH, actual_width)
    
#     svg.set('width', f'{final_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
#     # Add overall background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(final_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     # Save to file
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     os.makedirs(output_dir, exist_ok=True)
    
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(svg_string)
    
#     print(f"Combined notebook SVG saved to: {output_path}")
#     print(f"Processed {processed_cell_count} cells (code and markdown)")
    
#     return svg_string

In [8]:
# CHILL_BROWN='#948979'
# SOLARIZED_BACKGROUND_COLOR='#fdf4e0'
# CELL_WIDTH=600
# CANVAS_WIDTH=700
# OUTPUT_FONT_SIZE=11

# import json
# import re
# import base64
# import io
# from pygments import highlight
# from pygments.lexers import PythonLexer
# from pygments.formatters import SvgFormatter
# from pygments.styles import get_style_by_name
# import xml.etree.ElementTree as ET

# def wrap_text(text, max_chars_per_line=80, font_size=12):
#     """
#     Wrap long text to fit within specified character width.
    
#     Args:
#         text (str): Text to wrap
#         max_chars_per_line (int): Maximum characters per line
#         font_size (int): Font size (used to estimate character width)
    
#     Returns:
#         str: Text with newlines inserted for wrapping
#     """
#     if not text:
#         return text
    
#     lines = text.split('\n')
#     wrapped_lines = []
    
#     for line in lines:
#         if len(line) <= max_chars_per_line:
#             wrapped_lines.append(line)
#         else:
#             # Break long lines at word boundaries when possible
#             words = line.split(' ')
#             current_line = ''
            
#             for word in words:
#                 # If adding this word would exceed the limit
#                 if len(current_line + ' ' + word) > max_chars_per_line:
#                     if current_line:  # If we have content in current line
#                         wrapped_lines.append(current_line)
#                         current_line = word
#                     else:  # Word itself is longer than max_chars_per_line
#                         # Break the word itself
#                         while len(word) > max_chars_per_line:
#                             wrapped_lines.append(word[:max_chars_per_line])
#                             word = word[max_chars_per_line:]
#                         current_line = word
#                 else:
#                     if current_line:
#                         current_line += ' ' + word
#                     else:
#                         current_line = word
            
#             if current_line:
#                 wrapped_lines.append(current_line)
    
#     return '\n'.join(wrapped_lines)

# def extract_cell_data(cell):
#     """Extract code, outputs, and execution count from a Jupyter notebook cell."""
#     if cell.get('cell_type') == 'code':
#         # Extract source code
#         source = cell.get('source', [])
#         if isinstance(source, list):
#             code = ''.join(source)
#         else:
#             code = source
        
#         # Extract execution count
#         execution_count = cell.get('execution_count')
        
#         # Extract outputs
#         outputs = cell.get('outputs', [])
#         output_items = []  # Changed to list to handle multiple output types
        
#         for output in outputs:
#             if 'data' in output:
#                 # Handle different output types
#                 if 'text/plain' in output['data']:
#                     plain_text = output['data']['text/plain']
#                     if isinstance(plain_text, list):
#                         text_content = ''.join(plain_text)
#                     else:
#                         text_content = plain_text
#                     output_items.append({'type': 'text', 'content': text_content.rstrip()})
                
#                 # Handle image outputs
#                 if 'image/png' in output['data']:
#                     png_data = output['data']['image/png']
#                     # Remove any whitespace/newlines from base64 data
#                     png_data = ''.join(png_data.split()) if isinstance(png_data, str) else png_data
#                     output_items.append({'type': 'image', 'format': 'png', 'data': png_data})
                
#                 if 'image/jpeg' in output['data']:
#                     jpeg_data = output['data']['image/jpeg']
#                     jpeg_data = ''.join(jpeg_data.split()) if isinstance(jpeg_data, str) else jpeg_data
#                     output_items.append({'type': 'image', 'format': 'jpeg', 'data': jpeg_data})
                
#                 if 'image/svg+xml' in output['data']:
#                     svg_data = output['data']['image/svg+xml']
#                     if isinstance(svg_data, list):
#                         svg_data = ''.join(svg_data)
#                     output_items.append({'type': 'image', 'format': 'svg', 'data': svg_data})
                    
#             elif 'text' in output:
#                 # Handle direct text output
#                 text_content = output['text']
#                 if isinstance(text_content, list):
#                     text_content = ''.join(text_content)
#                 output_items.append({'type': 'text', 'content': text_content.rstrip()})
        
#         return code.strip(), output_items, execution_count
    
#     elif cell.get('cell_type') == 'markdown':
#         # Extract markdown source
#         source = cell.get('source', [])
#         if isinstance(source, list):
#             markdown_text = ''.join(source)
#         else:
#             markdown_text = source
        
#         return markdown_text.strip(), None, None
    
#     return None, None, None

# def get_image_dimensions(image_data, image_format, max_width=CELL_WIDTH):
#     """
#     Get or estimate image dimensions. For now, we'll use a simple approach.
#     In a more complete implementation, you might want to decode the image to get actual dimensions.
    
#     Args:
#         image_data: Base64 encoded image data or SVG string
#         image_format: 'png', 'jpeg', or 'svg'
#         max_width: Maximum allowed width
    
#     Returns:
#         tuple: (width, height)
#     """
#     if image_format == 'svg':
#         # Try to extract dimensions from SVG
#         try:
#             # Simple regex to find width/height attributes
#             width_match = re.search(r'width=["\'](\d+(?:\.\d+)?)', image_data)
#             height_match = re.search(r'height=["\'](\d+(?:\.\d+)?)', image_data)
            
#             if width_match and height_match:
#                 width = float(width_match.group(1))
#                 height = float(height_match.group(1))
                
#                 # Scale if too wide
#                 if width > max_width:
#                     scale = max_width / width
#                     width = max_width
#                     height = height * scale
                
#                 return int(width), int(height)
#         except:
#             pass
    
#     # Default dimensions for PNG/JPEG or when SVG parsing fails
#     # These could be improved by actually decoding the image
#     default_width = min(400, max_width)
#     default_height = 300
#     return default_width, default_height

# def create_image_element(image_data, image_format, x, y, max_width=CELL_WIDTH):
#     """
#     Create an SVG image element from base64 data or inline SVG.
    
#     Args:
#         image_data: Base64 encoded data (for PNG/JPEG) or SVG string
#         image_format: 'png', 'jpeg', or 'svg'
#         x, y: Position coordinates
#         max_width: Maximum width for the image
    
#     Returns:
#         tuple: (svg_element, width, height)
#     """
#     width, height = get_image_dimensions(image_data, image_format, max_width)
    
#     if image_format == 'svg':
#         # For SVG, we can embed it directly
#         try:
#             # Wrap the SVG in a group and position it
#             group = ET.Element('g')
#             group.set('transform', f'translate({x}, {y})')
            
#             # Parse the SVG content
#             svg_root = ET.fromstring(image_data)
            
#             # Set dimensions if not present
#             svg_root.set('width', str(width))
#             svg_root.set('height', str(height))
            
#             group.append(svg_root)
#             return group, width, height
            
#         except ET.ParseError:
#             # Fallback: create a placeholder rectangle
#             rect = ET.Element('rect')
#             rect.set('x', str(x))
#             rect.set('y', str(y))
#             rect.set('width', str(width))
#             rect.set('height', str(height))
#             rect.set('fill', '#f0f0f0')
#             rect.set('stroke', '#ccc')
#             rect.set('stroke-width', '1')
#             return rect, width, height
    
#     else:
#         # For PNG/JPEG, use image element with data URL
#         img = ET.Element('image')
#         img.set('x', str(x))
#         img.set('y', str(y))
#         img.set('width', str(width))
#         img.set('height', str(height))
        
#         # Create data URL
#         mime_type = f'image/{image_format}'
#         data_url = f'data:{mime_type};base64,{image_data}'
#         img.set('href', data_url)
        
#         return img, width, height

# def simple_markdown_to_text(markdown_text):
#     """
#     Convert basic markdown formatting to plain text with some simple styling hints.
#     Returns a list of (text, style_info) tuples.
#     """
#     lines = markdown_text.split('\n')
#     styled_lines = []
    
#     for line in lines:
#         line = line.strip()
        
#         # Headers
#         if line.startswith('# '):
#             styled_lines.append((line[2:], {'type': 'h1', 'weight': 'bold', 'size': 18}))
#         elif line.startswith('## '):
#             styled_lines.append((line[3:], {'type': 'h2', 'weight': 'bold', 'size': 16}))
#         elif line.startswith('### '):
#             styled_lines.append((line[4:], {'type': 'h3', 'weight': 'bold', 'size': 14}))
#         elif line.startswith('#### '):
#             styled_lines.append((line[5:], {'type': 'h4', 'weight': 'bold', 'size': 13}))
#         # Bold text (simple case)
#         elif '**' in line:
#             # Simple bold handling - just remove ** and mark as bold
#             cleaned_line = line.replace('**', '')
#             styled_lines.append((cleaned_line, {'type': 'bold', 'weight': 'bold', 'size': 12}))
#         # Italic text (simple case)
#         elif '*' in line and '**' not in line:
#             # Simple italic handling
#             cleaned_line = line.replace('*', '')
#             styled_lines.append((cleaned_line, {'type': 'italic', 'style': 'italic', 'size': 12}))
#         # Code blocks
#         elif line.startswith('```'):
#             continue  # Skip code block markers for now
#         # Empty lines
#         elif line == '':
#             styled_lines.append(('', {'type': 'empty', 'size': 12}))
#         # Regular text
#         else:
#             styled_lines.append((line, {'type': 'normal', 'size': 12}))
    
#     return styled_lines

# def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal', font_style='normal'):
#     """Create an SVG text element."""
#     text_elem = ET.Element('text')
#     text_elem.set('x', str(x))
#     text_elem.set('y', str(y))
#     text_elem.set('font-family', font_family)
#     text_elem.set('font-size', f'{font_size}px')
#     text_elem.set('fill', fill)
#     text_elem.set('font-weight', font_weight)
#     if font_style != 'normal':
#         text_elem.set('font-style', font_style)
#     text_elem.text = text if text else ' '
#     return text_elem

# def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4, max_width=None):
#     """
#     Create SVG text elements for multiline text with optional wrapping.
    
#     Args:
#         text (str): Text to render
#         x, y (float): Starting position
#         font_family, font_size, fill: Text styling
#         line_height (float): Line height multiplier
#         max_width (int): Maximum width in pixels for wrapping (optional)
    
#     Returns:
#         tuple: (text_group_element, total_height_used)
#     """
#     # Apply text wrapping if max_width is specified
#     if max_width:
#         # Estimate characters per line based on font size and width
#         # This is a rough approximation - monospace fonts are ~0.6 * font_size wide per character
#         chars_per_line = int(max_width / (font_size * 0.6))
#         text = wrap_text(text, max_chars_per_line=chars_per_line, font_size=font_size)
    
#     lines = text.split('\n')
#     text_group = ET.Element('g')
    
#     for i, line in enumerate(lines):
#         # Create text element for all lines, including empty ones
#         # For empty lines, we still need the element to maintain spacing
#         text_elem = create_text_element(
#             line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
#             x, y + (i * font_size * line_height), 
#             font_family, font_size, fill
#         )
#         text_group.append(text_elem)
    
#     return text_group, len(lines) * font_size * line_height

# def create_markdown_cell_svg_group(cell_data, y_offset, font_family='Menlo', max_width=800):
#     """
#     Create an SVG group element for a markdown cell at a given y offset.
#     No background or borders - just styled text.
#     """
#     markdown_text, _, _ = extract_cell_data(cell_data)
    
#     if markdown_text is None:
#         return None, 0
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     line_height = 1.5
#     cell_spacing = 15  # Space between cells
    
#     # Parse markdown into styled lines
#     styled_lines = simple_markdown_to_text(markdown_text)
    
#     current_y = y_offset
    
#     for text, style in styled_lines:
#         font_size = style.get('size', 12)
#         font_weight = style.get('weight', 'normal')
#         font_style = style.get('style', 'normal')
        
#         # Use a more readable font for markdown
#         markdown_font = 'system-ui, -apple-system, sans-serif' if style['type'].startswith('h') else font_family
        
#         # Create text element
#         text_elem = create_text_element(
#             text, margin, current_y + font_size,
#             markdown_font, font_size, '#333', font_weight, font_style
#         )
#         cell_group.append(text_elem)
        
#         # Calculate line height based on font size
#         current_y += font_size * line_height
        
#         # Add extra space after headers
#         if style['type'].startswith('h'):
#             current_y += font_size * 0.3
    
#     # Calculate total height
#     cell_height = current_y - y_offset + cell_spacing
    
#     return cell_group, cell_height

# def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
#     """
#     Create an SVG group element for a single cell at a given y offset.
    
#     Args:
#         cell_data (dict): Jupyter cell dictionary
#         y_offset (float): Y position to start this cell
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width for the cell
    
#     Returns:
#         tuple: (svg_group_element, height_used)
#     """
    
#     # Check if this is a markdown cell
#     if cell_data.get('cell_type') == 'markdown':
#         return create_markdown_cell_svg_group(cell_data, y_offset, font_family, max_width)
    
#     code, output_items, execution_count = extract_cell_data(cell_data)  # Updated to use output_items
    
#     if code is None:
#         return None, 0
    
#     # Create lexer and formatter for Python code
#     lexer = PythonLexer()
#     formatter = SvgFormatter(
#         style='solarized-light',
#         fontfamily=font_family,
#         fontsize=f'{font_size}px',
#         linenos=False,
#         noclasses=True,
#         nobackground=True
#     )
    
#     # Generate highlighted code SVG
#     code_svg = highlight(code, lexer, formatter)
#     code_root = ET.fromstring(code_svg)
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     execution_count_width = 60
#     cell_padding = 12
#     line_height = 1.4
#     cell_spacing = 10  # Space between cells
    
#     # Calculate dimensions
#     code_lines = len(code.split('\n'))
#     code_height = code_lines * font_size * line_height
    
#     # Start position within this cell
#     current_y = y_offset
    
#     # Execution count for input
#     exec_count_text = f"[{execution_count or ' '}]:"
#     exec_count_elem = create_text_element(
#         exec_count_text, margin, current_y + font_size,
#         font_family, font_size, CHILL_BROWN, 'normal'
#     )
#     cell_group.append(exec_count_elem)
    
#     # Input code area with background
#     code_x = margin + execution_count_width
#     code_y = current_y
    
#     # Create background rectangle for input code
#     code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
#     input_bg = ET.Element('rect')
#     input_bg.set('x', str(code_x - cell_padding))
#     input_bg.set('y', str(code_y - cell_padding//2))
#     input_bg.set('width', str(CELL_WIDTH))
#     input_bg.set('height', str(code_height + cell_padding))
#     input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
#     input_bg.set('stroke', CHILL_BROWN) 
#     input_bg.set('opacity', '0.2') 
#     input_bg.set('stroke-width', '1')
#     cell_group.append(input_bg)
    
#     # Add the syntax-highlighted code
#     code_group = ET.Element('g')
#     code_group.set('transform', f'translate({code_x}, {code_y})')
    
#     # Extract the highlighted content from pygments SVG
#     for elem in code_root:
#         if elem.tag.endswith('g') or elem.tag.endswith('text'):
#             code_group.append(elem)
    
#     cell_group.append(code_group)
    
#     # Move to output section
#     input_output_spacing = cell_padding
#     current_y += code_height + input_output_spacing
    
#     # Handle outputs if they exist
#     if output_items:  # Updated to check output_items instead of output_text
#         # Execution count for output
#         output_exec_count = create_text_element(
#             exec_count_text, margin, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, CHILL_BROWN, 'normal'
#         )
#         cell_group.append(output_exec_count)
        
#         # Process each output item
#         for output_item in output_items:
#             if output_item['type'] == 'text':
#                 # Handle text output with wrapping
#                 available_width = CELL_WIDTH - cell_padding * 2
#                 output_group, output_height = create_multiline_text(
#                     output_item['content'], code_x, current_y + OUTPUT_FONT_SIZE,
#                     font_family, OUTPUT_FONT_SIZE, '#333', 
#                     max_width=available_width
#                 )
#                 cell_group.append(output_group)
#                 current_y += output_height + cell_padding
                
#             elif output_item['type'] == 'image':
#                 # Handle image output
#                 image_elem, img_width, img_height = create_image_element(
#                     output_item['data'], output_item['format'], 
#                     code_x, current_y + cell_padding, max_width=CELL_WIDTH - cell_padding * 2
#                 )
#                 cell_group.append(image_elem)
#                 current_y += img_height + cell_padding * 2  # Extra spacing after images
    
#     # Calculate the total cell height
#     cell_height = current_y - y_offset
    
#     # Add spacing after cell
#     cell_height += cell_spacing
        
#     return cell_group, cell_height

# def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
#     """
#     Process an entire Jupyter notebook and convert all code and markdown cells to a single SVG file.
    
#     Args:
#         notebook_path (str): Path to the .ipynb file
#         output_path (str): Path to save the combined SVG file
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width of the SVG
    
#     Returns:
#         str: SVG content as string
#     """
#     import os
    
#     with open(notebook_path, 'r', encoding='utf-8') as f:
#         notebook = json.load(f)
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     margin = 20
#     current_y = margin
#     actual_width = 0
    
#     # Process each cell (both code and markdown)
#     processed_cell_count = 0
#     for i, cell in enumerate(notebook.get('cells', [])):
#         cell_type = cell.get('cell_type')
        
#         if cell_type == 'code':
#             code, _, _ = extract_cell_data(cell)
#             if code and code.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this cell
#                 cell_group, cell_height = create_cell_svg_group(
#                     cell, current_y, font_family, font_size, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width based on content
#                     code_lines = code.split('\n')
#                     max_line_length = max(len(line) for line in code_lines) if code_lines else 0
#                     estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
        
#         elif cell_type == 'markdown':
#             markdown_text, _, _ = extract_cell_data(cell)
#             if markdown_text and markdown_text.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this markdown cell
#                 cell_group, cell_height = create_markdown_cell_svg_group(
#                     cell, current_y, font_family, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width for markdown (simpler estimation)
#                     lines = markdown_text.split('\n')
#                     max_line_length = max(len(line) for line in lines) if lines else 0
#                     estimated_width = margin + max_line_length * 8 + 40  # Rough character width
#                     actual_width = max(actual_width, min(estimated_width, max_width))
    
#     if processed_cell_count == 0:
#         print("No code or markdown cells found in notebook")
#         return None
    
#     # Final dimensions
#     total_height = current_y + margin
#     final_width = max(CANVAS_WIDTH, actual_width)
    
#     svg.set('width', f'{final_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
#     # Add overall background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(final_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     # Save to file
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     os.makedirs(output_dir, exist_ok=True)
    
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(svg_string)
    
#     print(f"Combined notebook SVG saved to: {output_path}")
#     print(f"Processed {processed_cell_count} cells (code and markdown)")
    
#     return svg_string

In [9]:
# CHILL_BROWN='#948979'
# SOLARIZED_BACKGROUND_COLOR='#fdf4e0'
# CELL_WIDTH=600
# CANVAS_WIDTH=700
# OUTPUT_FONT_SIZE=11

# import json
# import re
# from pygments import highlight
# from pygments.lexers import PythonLexer
# from pygments.formatters import SvgFormatter
# from pygments.styles import get_style_by_name
# import xml.etree.ElementTree as ET

# def wrap_text(text, max_chars_per_line=80, font_size=12):
#     """
#     Wrap long text to fit within specified character width.
    
#     Args:
#         text (str): Text to wrap
#         max_chars_per_line (int): Maximum characters per line
#         font_size (int): Font size (used to estimate character width)
    
#     Returns:
#         str: Text with newlines inserted for wrapping
#     """
#     if not text:
#         return text
    
#     lines = text.split('\n')
#     wrapped_lines = []
    
#     for line in lines:
#         if len(line) <= max_chars_per_line:
#             wrapped_lines.append(line)
#         else:
#             # Break long lines at word boundaries when possible
#             words = line.split(' ')
#             current_line = ''
            
#             for word in words:
#                 # If adding this word would exceed the limit
#                 if len(current_line + ' ' + word) > max_chars_per_line:
#                     if current_line:  # If we have content in current line
#                         wrapped_lines.append(current_line)
#                         current_line = word
#                     else:  # Word itself is longer than max_chars_per_line
#                         # Break the word itself
#                         while len(word) > max_chars_per_line:
#                             wrapped_lines.append(word[:max_chars_per_line])
#                             word = word[max_chars_per_line:]
#                         current_line = word
#                 else:
#                     if current_line:
#                         current_line += ' ' + word
#                     else:
#                         current_line = word
            
#             if current_line:
#                 wrapped_lines.append(current_line)
    
#     return '\n'.join(wrapped_lines)

# def extract_cell_data(cell):
#     """Extract code, outputs, and execution count from a Jupyter notebook cell."""
#     if cell.get('cell_type') == 'code':
#         # Extract source code
#         source = cell.get('source', [])
#         if isinstance(source, list):
#             code = ''.join(source)
#         else:
#             code = source
        
#         # Extract execution count
#         execution_count = cell.get('execution_count')
        
#         # Extract outputs
#         outputs = cell.get('outputs', [])
#         output_text = []
        
#         for output in outputs:
#             if 'data' in output:
#                 # Handle different output types
#                 if 'text/plain' in output['data']:
#                     plain_text = output['data']['text/plain']
#                     if isinstance(plain_text, list):
#                         # Join list items without adding extra newlines
#                         output_text.append(''.join(plain_text))
#                     else:
#                         output_text.append(plain_text)
#             elif 'text' in output:
#                 # Handle direct text output
#                 text_content = output['text']
#                 if isinstance(text_content, list):
#                     # Join list items without adding extra newlines
#                     output_text.append(''.join(text_content))
#                 else:
#                     output_text.append(text_content)
        
#         # Join all outputs and strip any trailing whitespace
#         final_output = ''.join(output_text).rstrip()
        
#         return code.strip(), final_output, execution_count
    
#     elif cell.get('cell_type') == 'markdown':
#         # Extract markdown source
#         source = cell.get('source', [])
#         if isinstance(source, list):
#             markdown_text = ''.join(source)
#         else:
#             markdown_text = source
        
#         return markdown_text.strip(), None, None
    
#     return None, None, None

# def simple_markdown_to_text(markdown_text):
#     """
#     Convert basic markdown formatting to plain text with some simple styling hints.
#     Returns a list of (text, style_info) tuples.
#     """
#     lines = markdown_text.split('\n')
#     styled_lines = []
    
#     for line in lines:
#         line = line.strip()
        
#         # Headers
#         if line.startswith('# '):
#             styled_lines.append((line[2:], {'type': 'h1', 'weight': 'bold', 'size': 18}))
#         elif line.startswith('## '):
#             styled_lines.append((line[3:], {'type': 'h2', 'weight': 'bold', 'size': 16}))
#         elif line.startswith('### '):
#             styled_lines.append((line[4:], {'type': 'h3', 'weight': 'bold', 'size': 14}))
#         elif line.startswith('#### '):
#             styled_lines.append((line[5:], {'type': 'h4', 'weight': 'bold', 'size': 13}))
#         # Bold text (simple case)
#         elif '**' in line:
#             # Simple bold handling - just remove ** and mark as bold
#             cleaned_line = line.replace('**', '')
#             styled_lines.append((cleaned_line, {'type': 'bold', 'weight': 'bold', 'size': 12}))
#         # Italic text (simple case)
#         elif '*' in line and '**' not in line:
#             # Simple italic handling
#             cleaned_line = line.replace('*', '')
#             styled_lines.append((cleaned_line, {'type': 'italic', 'style': 'italic', 'size': 12}))
#         # Code blocks
#         elif line.startswith('```'):
#             continue  # Skip code block markers for now
#         # Empty lines
#         elif line == '':
#             styled_lines.append(('', {'type': 'empty', 'size': 12}))
#         # Regular text
#         else:
#             styled_lines.append((line, {'type': 'normal', 'size': 12}))
    
#     return styled_lines

# def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal', font_style='normal'):
#     """Create an SVG text element."""
#     text_elem = ET.Element('text')
#     text_elem.set('x', str(x))
#     text_elem.set('y', str(y))
#     text_elem.set('font-family', font_family)
#     text_elem.set('font-size', f'{font_size}px')
#     text_elem.set('fill', fill)
#     text_elem.set('font-weight', font_weight)
#     if font_style != 'normal':
#         text_elem.set('font-style', font_style)
#     text_elem.text = text if text else ' '
#     return text_elem

# def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4, max_width=None):
#     """
#     Create SVG text elements for multiline text with optional wrapping.
    
#     Args:
#         text (str): Text to render
#         x, y (float): Starting position
#         font_family, font_size, fill: Text styling
#         line_height (float): Line height multiplier
#         max_width (int): Maximum width in pixels for wrapping (optional)
    
#     Returns:
#         tuple: (text_group_element, total_height_used)
#     """
#     # Apply text wrapping if max_width is specified
#     if max_width:
#         # Estimate characters per line based on font size and width
#         # This is a rough approximation - monospace fonts are ~0.6 * font_size wide per character
#         chars_per_line = int(max_width / (font_size * 0.6))
#         text = wrap_text(text, max_chars_per_line=chars_per_line, font_size=font_size)
    
#     lines = text.split('\n')
#     text_group = ET.Element('g')
    
#     for i, line in enumerate(lines):
#         # Create text element for all lines, including empty ones
#         # For empty lines, we still need the element to maintain spacing
#         text_elem = create_text_element(
#             line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
#             x, y + (i * font_size * line_height), 
#             font_family, font_size, fill
#         )
#         text_group.append(text_elem)
    
#     return text_group, len(lines) * font_size * line_height

# def create_markdown_cell_svg_group(cell_data, y_offset, font_family='Menlo', max_width=800):
#     """
#     Create an SVG group element for a markdown cell at a given y offset.
#     No background or borders - just styled text.
#     """
#     markdown_text, _, _ = extract_cell_data(cell_data)
    
#     if markdown_text is None:
#         return None, 0
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     line_height = 1.5
#     cell_spacing = 15  # Space between cells
    
#     # Parse markdown into styled lines
#     styled_lines = simple_markdown_to_text(markdown_text)
    
#     current_y = y_offset
    
#     for text, style in styled_lines:
#         font_size = style.get('size', 12)
#         font_weight = style.get('weight', 'normal')
#         font_style = style.get('style', 'normal')
        
#         # Use a more readable font for markdown
#         markdown_font = 'system-ui, -apple-system, sans-serif' if style['type'].startswith('h') else font_family
        
#         # Create text element
#         text_elem = create_text_element(
#             text, margin, current_y + font_size,
#             markdown_font, font_size, '#333', font_weight, font_style
#         )
#         cell_group.append(text_elem)
        
#         # Calculate line height based on font size
#         current_y += font_size * line_height
        
#         # Add extra space after headers
#         if style['type'].startswith('h'):
#             current_y += font_size * 0.3
    
#     # Calculate total height
#     cell_height = current_y - y_offset + cell_spacing
    
#     return cell_group, cell_height

# def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
#     """
#     Create an SVG group element for a single cell at a given y offset.
    
#     Args:
#         cell_data (dict): Jupyter cell dictionary
#         y_offset (float): Y position to start this cell
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width for the cell
    
#     Returns:
#         tuple: (svg_group_element, height_used)
#     """
    
#     # Check if this is a markdown cell
#     if cell_data.get('cell_type') == 'markdown':
#         return create_markdown_cell_svg_group(cell_data, y_offset, font_family, max_width)
    
#     code, output_text, execution_count = extract_cell_data(cell_data)
    
#     if code is None:
#         return None, 0
    
#     # Create lexer and formatter for Python code
#     lexer = PythonLexer()
#     formatter = SvgFormatter(
#         style='solarized-light',
#         fontfamily=font_family,
#         fontsize=f'{font_size}px',
#         linenos=False,
#         noclasses=True,
#         nobackground=True
#     )
    
#     # Generate highlighted code SVG
#     code_svg = highlight(code, lexer, formatter)
#     code_root = ET.fromstring(code_svg)
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     execution_count_width = 60
#     cell_padding = 12
#     line_height = 1.4
#     cell_spacing = 10  # Space between cells
    
#     # Calculate dimensions
#     code_lines = len(code.split('\n'))
#     code_height = code_lines * font_size * line_height
    
#     # Start position within this cell
#     current_y = y_offset
    
#     # Execution count for input
#     exec_count_text = f"[{execution_count or ' '}]:"
#     exec_count_elem = create_text_element(
#         exec_count_text, margin, current_y + font_size,
#         font_family, font_size, CHILL_BROWN, 'normal'
#     )
#     cell_group.append(exec_count_elem)
    
#     # Input code area with background
#     code_x = margin + execution_count_width
#     code_y = current_y
    
#     # Create background rectangle for input code
#     code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
#     input_bg = ET.Element('rect')
#     input_bg.set('x', str(code_x - cell_padding))
#     input_bg.set('y', str(code_y - cell_padding//2))
#     input_bg.set('width', str(CELL_WIDTH)) #str(code_width + 2 * cell_padding))
#     input_bg.set('height', str(code_height + cell_padding))
#     input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
#     input_bg.set('stroke', CHILL_BROWN) 
#     input_bg.set('opacity', '0.2') 
#     input_bg.set('stroke-width', '1')
#     cell_group.append(input_bg)
    
#     # Add the syntax-highlighted code
#     code_group = ET.Element('g')
#     code_group.set('transform', f'translate({code_x}, {code_y})')
    
#     # Extract the highlighted content from pygments SVG
#     for elem in code_root:
#         if elem.tag.endswith('g') or elem.tag.endswith('text'):
#             code_group.append(elem)
    
#     cell_group.append(code_group)
    
#     # Move to output section
#     input_output_spacing = cell_padding   # SW Noodling
#     current_y += code_height + input_output_spacing
    
#     # Handle outputs if they exist
#     if output_text and output_text.strip():
#         # Execution count for output
#         output_exec_count = create_text_element(
#             exec_count_text, margin, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, CHILL_BROWN, 'normal'
#         )
#         cell_group.append(output_exec_count)
        
#         # Output text with wrapping - calculate available width for output
#         available_width = CELL_WIDTH - cell_padding * 2  # Account for padding
#         output_group, output_height = create_multiline_text(
#             output_text, code_x, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, '#333', 
#             max_width=available_width  # Enable text wrapping for outputs
#         )
#         cell_group.append(output_group)
        
#         current_y += output_height + cell_padding  # Add some space after output
    
#     # NOW calculate the total cell height
#     cell_height = current_y - y_offset
    
#     # Add spacing after cell (this controls spacing between different cells)
#     cell_height += cell_spacing  # This is your 0 value
        
#     return cell_group, cell_height

# def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
#     """
#     Process an entire Jupyter notebook and convert all code and markdown cells to a single SVG file.
    
#     Args:
#         notebook_path (str): Path to the .ipynb file
#         output_path (str): Path to save the combined SVG file
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width of the SVG
    
#     Returns:
#         str: SVG content as string
#     """
#     import os
    
#     with open(notebook_path, 'r', encoding='utf-8') as f:
#         notebook = json.load(f)
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     margin = 20
#     current_y = margin
#     actual_width = 0
    
#     # Process each cell (both code and markdown)
#     processed_cell_count = 0
#     for i, cell in enumerate(notebook.get('cells', [])):
#         cell_type = cell.get('cell_type')
        
#         if cell_type == 'code':
#             code, _, _ = extract_cell_data(cell)
#             if code and code.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this cell
#                 cell_group, cell_height = create_cell_svg_group(
#                     cell, current_y, font_family, font_size, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width based on content
#                     code_lines = code.split('\n')
#                     max_line_length = max(len(line) for line in code_lines) if code_lines else 0
#                     estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
        
#         elif cell_type == 'markdown':
#             markdown_text, _, _ = extract_cell_data(cell)
#             if markdown_text and markdown_text.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this markdown cell
#                 cell_group, cell_height = create_markdown_cell_svg_group(
#                     cell, current_y, font_family, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width for markdown (simpler estimation)
#                     lines = markdown_text.split('\n')
#                     max_line_length = max(len(line) for line in lines) if lines else 0
#                     estimated_width = margin + max_line_length * 8 + 40  # Rough character width
#                     actual_width = max(actual_width, min(estimated_width, max_width))
    
#     if processed_cell_count == 0:
#         print("No code or markdown cells found in notebook")
#         return None
    
#     # Final dimensions
#     total_height = current_y + margin
#     final_width = max(CANVAS_WIDTH, actual_width)
    
#     svg.set('width', f'{final_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
#     # Add overall background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(final_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     # Save to file
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     os.makedirs(output_dir, exist_ok=True)
    
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(svg_string)
    
#     print(f"Combined notebook SVG saved to: {output_path}")
#     print(f"Processed {processed_cell_count} cells (code and markdown)")
    
#     return svg_string

In [10]:
# CHILL_BROWN='#948979'
# SOLARIZED_BACKGROUND_COLOR='#fdf4e0'
# CELL_WIDTH=600
# CANVAS_WIDTH=700
# OUTPUT_FONT_SIZE=11

# import json
# import re
# from pygments import highlight
# from pygments.lexers import PythonLexer
# from pygments.formatters import SvgFormatter
# from pygments.styles import get_style_by_name
# import xml.etree.ElementTree as ET

# def extract_cell_data(cell):
#     """Extract code, outputs, and execution count from a Jupyter notebook cell."""
#     if cell.get('cell_type') == 'code':
#         # Extract source code
#         source = cell.get('source', [])
#         if isinstance(source, list):
#             code = ''.join(source)
#         else:
#             code = source
        
#         # Extract execution count
#         execution_count = cell.get('execution_count')
        
#         # Extract outputs
#         outputs = cell.get('outputs', [])
#         output_text = []
        
#         for output in outputs:
#             if 'data' in output:
#                 # Handle different output types
#                 if 'text/plain' in output['data']:
#                     plain_text = output['data']['text/plain']
#                     if isinstance(plain_text, list):
#                         # Join list items without adding extra newlines
#                         output_text.append(''.join(plain_text))
#                     else:
#                         output_text.append(plain_text)
#             elif 'text' in output:
#                 # Handle direct text output
#                 text_content = output['text']
#                 if isinstance(text_content, list):
#                     # Join list items without adding extra newlines
#                     output_text.append(''.join(text_content))
#                 else:
#                     output_text.append(text_content)
        
#         # Join all outputs and strip any trailing whitespace
#         final_output = ''.join(output_text).rstrip()
        
#         return code.strip(), final_output, execution_count
    
#     elif cell.get('cell_type') == 'markdown':
#         # Extract markdown source
#         source = cell.get('source', [])
#         if isinstance(source, list):
#             markdown_text = ''.join(source)
#         else:
#             markdown_text = source
        
#         return markdown_text.strip(), None, None
    
#     return None, None, None

# def simple_markdown_to_text(markdown_text):
#     """
#     Convert basic markdown formatting to plain text with some simple styling hints.
#     Returns a list of (text, style_info) tuples.
#     """
#     lines = markdown_text.split('\n')
#     styled_lines = []
    
#     for line in lines:
#         line = line.strip()
        
#         # Headers
#         if line.startswith('# '):
#             styled_lines.append((line[2:], {'type': 'h1', 'weight': 'bold', 'size': 18}))
#         elif line.startswith('## '):
#             styled_lines.append((line[3:], {'type': 'h2', 'weight': 'bold', 'size': 16}))
#         elif line.startswith('### '):
#             styled_lines.append((line[4:], {'type': 'h3', 'weight': 'bold', 'size': 14}))
#         elif line.startswith('#### '):
#             styled_lines.append((line[5:], {'type': 'h4', 'weight': 'bold', 'size': 13}))
#         # Bold text (simple case)
#         elif '**' in line:
#             # Simple bold handling - just remove ** and mark as bold
#             cleaned_line = line.replace('**', '')
#             styled_lines.append((cleaned_line, {'type': 'bold', 'weight': 'bold', 'size': 12}))
#         # Italic text (simple case)
#         elif '*' in line and '**' not in line:
#             # Simple italic handling
#             cleaned_line = line.replace('*', '')
#             styled_lines.append((cleaned_line, {'type': 'italic', 'style': 'italic', 'size': 12}))
#         # Code blocks
#         elif line.startswith('```'):
#             continue  # Skip code block markers for now
#         # Empty lines
#         elif line == '':
#             styled_lines.append(('', {'type': 'empty', 'size': 12}))
#         # Regular text
#         else:
#             styled_lines.append((line, {'type': 'normal', 'size': 12}))
    
#     return styled_lines

# def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal', font_style='normal'):
#     """Create an SVG text element."""
#     text_elem = ET.Element('text')
#     text_elem.set('x', str(x))
#     text_elem.set('y', str(y))
#     text_elem.set('font-family', font_family)
#     text_elem.set('font-size', f'{font_size}px')
#     text_elem.set('fill', fill)
#     text_elem.set('font-weight', font_weight)
#     if font_style != 'normal':
#         text_elem.set('font-style', font_style)
#     text_elem.text = text if text else ' '
#     return text_elem

# def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
#     """Create SVG text elements for multiline text."""
#     lines = text.split('\n')
#     text_group = ET.Element('g')
    
#     for i, line in enumerate(lines):
#         # Create text element for all lines, including empty ones
#         # For empty lines, we still need the element to maintain spacing
#         text_elem = create_text_element(
#             line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
#             x, y + (i * font_size * line_height), 
#             font_family, font_size, fill
#         )
#         text_group.append(text_elem)
    
#     return text_group, len(lines) * font_size * line_height

# def create_markdown_cell_svg_group(cell_data, y_offset, font_family='Menlo', max_width=800):
#     """
#     Create an SVG group element for a markdown cell at a given y offset.
#     No background or borders - just styled text.
#     """
#     markdown_text, _, _ = extract_cell_data(cell_data)
    
#     if markdown_text is None:
#         return None, 0
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     line_height = 1.5
#     cell_spacing = 15  # Space between cells
    
#     # Parse markdown into styled lines
#     styled_lines = simple_markdown_to_text(markdown_text)
    
#     current_y = y_offset
    
#     for text, style in styled_lines:
#         font_size = style.get('size', 12)
#         font_weight = style.get('weight', 'normal')
#         font_style = style.get('style', 'normal')
        
#         # Use a more readable font for markdown
#         markdown_font = 'system-ui, -apple-system, sans-serif' if style['type'].startswith('h') else font_family
        
#         # Create text element
#         text_elem = create_text_element(
#             text, margin, current_y + font_size,
#             markdown_font, font_size, '#333', font_weight, font_style
#         )
#         cell_group.append(text_elem)
        
#         # Calculate line height based on font size
#         current_y += font_size * line_height
        
#         # Add extra space after headers
#         if style['type'].startswith('h'):
#             current_y += font_size * 0.3
    
#     # Calculate total height
#     cell_height = current_y - y_offset + cell_spacing
    
#     return cell_group, cell_height

# def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
#     """
#     Create an SVG group element for a single cell at a given y offset.
    
#     Args:
#         cell_data (dict): Jupyter cell dictionary
#         y_offset (float): Y position to start this cell
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width for the cell
    
#     Returns:
#         tuple: (svg_group_element, height_used)
#     """
    
#     # Check if this is a markdown cell
#     if cell_data.get('cell_type') == 'markdown':
#         return create_markdown_cell_svg_group(cell_data, y_offset, font_family, max_width)
    
#     code, output_text, execution_count = extract_cell_data(cell_data)
    
#     if code is None:
#         return None, 0
    
#     # Create lexer and formatter for Python code
#     lexer = PythonLexer()
#     formatter = SvgFormatter(
#         style='solarized-light',
#         fontfamily=font_family,
#         fontsize=f'{font_size}px',
#         linenos=False,
#         noclasses=True,
#         nobackground=True
#     )
    
#     # Generate highlighted code SVG
#     code_svg = highlight(code, lexer, formatter)
#     code_root = ET.fromstring(code_svg)
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     execution_count_width = 60
#     cell_padding = 12
#     line_height = 1.4
#     cell_spacing = 10  # Space between cells
    
#     # Calculate dimensions
#     code_lines = len(code.split('\n'))
#     code_height = code_lines * font_size * line_height
    
#     # Start position within this cell
#     current_y = y_offset
    
#     # Execution count for input
#     exec_count_text = f"[{execution_count or ' '}]:"
#     exec_count_elem = create_text_element(
#         exec_count_text, margin, current_y + font_size,
#         font_family, font_size, CHILL_BROWN, 'normal'
#     )
#     cell_group.append(exec_count_elem)
    
#     # Input code area with background
#     code_x = margin + execution_count_width
#     code_y = current_y
    
#     # Create background rectangle for input code
#     code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
#     input_bg = ET.Element('rect')
#     input_bg.set('x', str(code_x - cell_padding))
#     input_bg.set('y', str(code_y - cell_padding//2))
#     input_bg.set('width', str(CELL_WIDTH)) #str(code_width + 2 * cell_padding))
#     input_bg.set('height', str(code_height + cell_padding))
#     input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
#     input_bg.set('stroke', CHILL_BROWN) 
#     input_bg.set('opacity', '0.2') 
#     input_bg.set('stroke-width', '1')
#     cell_group.append(input_bg)
    
#     # Add the syntax-highlighted code
#     code_group = ET.Element('g')
#     code_group.set('transform', f'translate({code_x}, {code_y})')
    
#     # Extract the highlighted content from pygments SVG
#     for elem in code_root:
#         if elem.tag.endswith('g') or elem.tag.endswith('text'):
#             code_group.append(elem)
    
#     cell_group.append(code_group)
    
#     # Move to output section
#     input_output_spacing = cell_padding   # SW Noodling
#     current_y += code_height + input_output_spacing
    
#     # Handle outputs if they exist
#     if output_text and output_text.strip():
#         # Execution count for output
#         output_exec_count = create_text_element(
#             exec_count_text, margin, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, CHILL_BROWN, 'normal'
#         )
#         cell_group.append(output_exec_count)
        
#         # Output text
#         output_group, output_height = create_multiline_text(
#             output_text, code_x, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, '#333'
#         )
#         cell_group.append(output_group)
        
#         current_y += output_height + cell_padding  # Add some space after output
    
#     # NOW calculate the total cell height
#     cell_height = current_y - y_offset
    
#     # Add spacing after cell (this controls spacing between different cells)
#     cell_height += cell_spacing  # This is your 0 value
        
#     return cell_group, cell_height

# def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
#     """
#     Process an entire Jupyter notebook and convert all code and markdown cells to a single SVG file.
    
#     Args:
#         notebook_path (str): Path to the .ipynb file
#         output_path (str): Path to save the combined SVG file
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width of the SVG
    
#     Returns:
#         str: SVG content as string
#     """
#     import os
    
#     with open(notebook_path, 'r', encoding='utf-8') as f:
#         notebook = json.load(f)
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     margin = 20
#     current_y = margin
#     actual_width = 0
    
#     # Process each cell (both code and markdown)
#     processed_cell_count = 0
#     for i, cell in enumerate(notebook.get('cells', [])):
#         cell_type = cell.get('cell_type')
        
#         if cell_type == 'code':
#             code, _, _ = extract_cell_data(cell)
#             if code and code.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this cell
#                 cell_group, cell_height = create_cell_svg_group(
#                     cell, current_y, font_family, font_size, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width based on content
#                     code_lines = code.split('\n')
#                     max_line_length = max(len(line) for line in code_lines) if code_lines else 0
#                     estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
        
#         elif cell_type == 'markdown':
#             markdown_text, _, _ = extract_cell_data(cell)
#             if markdown_text and markdown_text.strip():
#                 processed_cell_count += 1
                
#                 # Create SVG group for this markdown cell
#                 cell_group, cell_height = create_markdown_cell_svg_group(
#                     cell, current_y, font_family, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width for markdown (simpler estimation)
#                     lines = markdown_text.split('\n')
#                     max_line_length = max(len(line) for line in lines) if lines else 0
#                     estimated_width = margin + max_line_length * 8 + 40  # Rough character width
#                     actual_width = max(actual_width, min(estimated_width, max_width))
    
#     if processed_cell_count == 0:
#         print("No code or markdown cells found in notebook")
#         return None
    
#     # Final dimensions
#     total_height = current_y + margin
#     final_width = max(CANVAS_WIDTH, actual_width)
    
#     svg.set('width', f'{final_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
#     # Add overall background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(final_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     # Save to file
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     os.makedirs(output_dir, exist_ok=True)
    
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(svg_string)
    
#     print(f"Combined notebook SVG saved to: {output_path}")
#     print(f"Processed {processed_cell_count} cells (code and markdown)")
    
#     return svg_string

In [11]:
# CHILL_BROWN='#948979'
# SOLARIZED_BACKGROUND_COLOR='#fdf4e0'
# CELL_WIDTH=600
# CANVAS_WIDTH=700
# OUTPUT_FONT_SIZE=11

# import json
# import re
# from pygments import highlight
# from pygments.lexers import PythonLexer
# from pygments.formatters import SvgFormatter
# from pygments.styles import get_style_by_name
# import xml.etree.ElementTree as ET

# def extract_cell_data(cell):
#     """Extract code, outputs, and execution count from a Jupyter notebook cell."""
#     if cell.get('cell_type') != 'code':
#         return None, None, None
    
#     # Extract source code
#     source = cell.get('source', [])
#     if isinstance(source, list):
#         code = ''.join(source)
#     else:
#         code = source
    
#     # Extract execution count
#     execution_count = cell.get('execution_count')
    
#     # Extract outputs
#     outputs = cell.get('outputs', [])
#     output_text = []
    
#     for output in outputs:
#         if 'data' in output:
#             # Handle different output types
#             if 'text/plain' in output['data']:
#                 plain_text = output['data']['text/plain']
#                 if isinstance(plain_text, list):
#                     # Join list items without adding extra newlines
#                     output_text.append(''.join(plain_text))
#                 else:
#                     output_text.append(plain_text)
#         elif 'text' in output:
#             # Handle direct text output
#             text_content = output['text']
#             if isinstance(text_content, list):
#                 # Join list items without adding extra newlines
#                 output_text.append(''.join(text_content))
#             else:
#                 output_text.append(text_content)
    
#     # Join all outputs and strip any trailing whitespace
#     final_output = ''.join(output_text).rstrip()
    
#     return code.strip(), final_output, execution_count

# def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
#     """Create an SVG text element."""
#     text_elem = ET.Element('text')
#     text_elem.set('x', str(x))
#     text_elem.set('y', str(y))
#     text_elem.set('font-family', font_family)
#     text_elem.set('font-size', f'{font_size}px')
#     text_elem.set('fill', fill)
#     text_elem.set('font-weight', font_weight)
#     text_elem.text = text
#     return text_elem

# def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
#     """Create SVG text elements for multiline text."""
#     lines = text.split('\n')
#     text_group = ET.Element('g')
    
#     for i, line in enumerate(lines):
#         # Create text element for all lines, including empty ones
#         # For empty lines, we still need the element to maintain spacing
#         text_elem = create_text_element(
#             line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
#             x, y + (i * font_size * line_height), 
#             font_family, font_size, fill
#         )
#         text_group.append(text_elem)
    
#     return text_group, len(lines) * font_size * line_height

# def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
#     """
#     Create an SVG group element for a single cell at a given y offset.
    
#     Args:
#         cell_data (dict): Jupyter cell dictionary
#         y_offset (float): Y position to start this cell
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width for the cell
    
#     Returns:
#         tuple: (svg_group_element, height_used)
#     """
    
#     code, output_text, execution_count = extract_cell_data(cell_data)
    
#     if code is None:
#         return None, 0
    
#     # Create lexer and formatter for Python code
#     lexer = PythonLexer()
#     formatter = SvgFormatter(
#         style='solarized-light',
#         fontfamily=font_family,
#         fontsize=f'{font_size}px',
#         linenos=False,
#         noclasses=True,
#         nobackground=True
#     )
    
#     # Generate highlighted code SVG
#     code_svg = highlight(code, lexer, formatter)
#     code_root = ET.fromstring(code_svg)
    
#     # Create group for this cell
#     cell_group = ET.Element('g')
    
#     # Constants for layout
#     margin = 20
#     execution_count_width = 60
#     cell_padding = 12
#     line_height = 1.4
#     cell_spacing = 10  # Space between cells
    
#     # Calculate dimensions
#     code_lines = len(code.split('\n'))
#     code_height = code_lines * font_size * line_height
    
#     # Start position within this cell
#     current_y = y_offset
    
#     # Execution count for input
#     exec_count_text = f"[{execution_count or ' '}]:"
#     exec_count_elem = create_text_element(
#         exec_count_text, margin, current_y + font_size,
#         font_family, font_size, CHILL_BROWN, 'normal'
#     )
#     cell_group.append(exec_count_elem)
    
#     # Input code area with background
#     code_x = margin + execution_count_width
#     code_y = current_y
    
#     # Create background rectangle for input code
#     code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
#     input_bg = ET.Element('rect')
#     input_bg.set('x', str(code_x - cell_padding))
#     input_bg.set('y', str(code_y - cell_padding//2))
#     input_bg.set('width', str(CELL_WIDTH)) #str(code_width + 2 * cell_padding))
#     input_bg.set('height', str(code_height + cell_padding))
#     input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
#     input_bg.set('stroke', CHILL_BROWN) 
#     input_bg.set('opacity', '0.2') 
#     input_bg.set('stroke-width', '1')
#     cell_group.append(input_bg)
    
#     # Add the syntax-highlighted code
#     code_group = ET.Element('g')
#     code_group.set('transform', f'translate({code_x}, {code_y})')
    
#     # Extract the highlighted content from pygments SVG
#     for elem in code_root:
#         if elem.tag.endswith('g') or elem.tag.endswith('text'):
#             code_group.append(elem)
    
#     cell_group.append(code_group)
    
#     # Move to output section
#     input_output_spacing = cell_padding   # SW Noodling
#     current_y += code_height + input_output_spacing
    
#     # Handle outputs if they exist
#     if output_text and output_text.strip():
#         # Execution count for output
#         output_exec_count = create_text_element(
#             exec_count_text, margin, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, CHILL_BROWN, 'normal'
#         )
#         cell_group.append(output_exec_count)
        
#         # Output text
#         output_group, output_height = create_multiline_text(
#             output_text, code_x, current_y + OUTPUT_FONT_SIZE,
#             font_family, OUTPUT_FONT_SIZE, '#333'
#         )
#         cell_group.append(output_group)
        
#         current_y += output_height + cell_padding  # Add some space after output
    
#     # NOW calculate the total cell height
#     cell_height = current_y - y_offset
    
#     # Add spacing after cell (this controls spacing between different cells)
#     cell_height += cell_spacing  # This is your 0 value
        
#     return cell_group, cell_height

# def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
#     """
#     Process an entire Jupyter notebook and convert all code cells to a single SVG file.
    
#     Args:
#         notebook_path (str): Path to the .ipynb file
#         output_path (str): Path to save the combined SVG file
#         font_family (str): Font family to use
#         font_size (int): Font size in pixels
#         max_width (int): Maximum width of the SVG
    
#     Returns:
#         str: SVG content as string
#     """
#     import os
    
#     with open(notebook_path, 'r', encoding='utf-8') as f:
#         notebook = json.load(f)
    
#     # Create the main SVG container
#     svg = ET.Element('svg')
#     svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
#     margin = 20
#     current_y = margin
#     actual_width = 0
    
#     # Process each code cell
#     code_cell_count = 0
#     for i, cell in enumerate(notebook.get('cells', [])):
#         if cell.get('cell_type') == 'code':
#             code, _, _ = extract_cell_data(cell)
#             if code and code.strip():
#                 code_cell_count += 1
                
#                 # Create SVG group for this cell
#                 cell_group, cell_height = create_cell_svg_group(
#                     cell, current_y, font_family, font_size, max_width
#                 )
                
#                 if cell_group is not None:
#                     svg.append(cell_group)
#                     current_y += cell_height
                    
#                     # Update actual width based on content
#                     # This is a rough estimate - you might want to make this more precise
#                     code_lines = code.split('\n')
#                     max_line_length = max(len(line) for line in code_lines) if code_lines else 0
#                     estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
#                     actual_width = max(actual_width, min(estimated_width, max_width))
    
#     if code_cell_count == 0:
#         print("No code cells found in notebook")
#         return None
    
#     # Final dimensions
#     total_height = current_y + margin
#     final_width = max(CANVAS_WIDTH, actual_width)
    
#     svg.set('width', f'{final_width}px')
#     svg.set('height', f'{total_height}px')
#     svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
#     # Add overall background
#     overall_bg = ET.Element('rect')
#     overall_bg.set('x', '0')
#     overall_bg.set('y', '0')
#     overall_bg.set('width', str(final_width))
#     overall_bg.set('height', str(total_height))
#     overall_bg.set('fill', '#ffffff')
#     svg.insert(0, overall_bg)
    
#     # Convert to string
#     svg_string = ET.tostring(svg, encoding='unicode')
#     svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
#     # Save to file
#     output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
#     os.makedirs(output_dir, exist_ok=True)
    
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(svg_string)
    
#     print(f"Combined notebook SVG saved to: {output_path}")
#     print(f"Processed {code_cell_count} code cells")
    
#     return svg_string