## Notebook Converter 2

In [4]:
CHILL_BROWN='#948979'
SOLARIZED_BACKGROUND_COLOR='#fdf4e0'
CELL_WIDTH=600
CANVAS_WIDTH=700
OUTPUT_FONT_SIZE=11

import json
import re
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') != 'code':
        return None, None, None
    
    # Extract source code
    source = cell.get('source', [])
    if isinstance(source, list):
        code = ''.join(source)
    else:
        code = source
    
    # Extract execution count
    execution_count = cell.get('execution_count')
    
    # Extract outputs
    outputs = cell.get('outputs', [])
    output_text = []
    
    for output in outputs:
        if 'data' in output:
            # Handle different output types
            if 'text/plain' in output['data']:
                plain_text = output['data']['text/plain']
                if isinstance(plain_text, list):
                    # Join list items without adding extra newlines
                    output_text.append(''.join(plain_text))
                else:
                    output_text.append(plain_text)
        elif 'text' in output:
            # Handle direct text output
            text_content = output['text']
            if isinstance(text_content, list):
                # Join list items without adding extra newlines
                output_text.append(''.join(text_content))
            else:
                output_text.append(text_content)
    
    # Join all outputs and strip any trailing whitespace
    final_output = ''.join(output_text).rstrip()
    
    return code.strip(), final_output, execution_count

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    text_elem.text = text
    return text_elem

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
    """Create SVG text elements for multiline text."""
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        # Create text element for all lines, including empty ones
        # For empty lines, we still need the element to maintain spacing
        text_elem = create_text_element(
            line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
            x, y + (i * font_size * line_height), 
            font_family, font_size, fill
        )
        text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
    """
    Create an SVG group element for a single cell at a given y offset.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        y_offset (float): Y position to start this cell
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width for the cell
    
    Returns:
        tuple: (svg_group_element, height_used)
    """
    
    code, output_text, execution_count = extract_cell_data(cell_data)
    
    if code is None:
        return None, 0
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create group for this cell
    cell_group = ET.Element('g')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    cell_spacing = 10  # Space between cells
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position within this cell
    current_y = y_offset
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, CHILL_BROWN, 'normal'
    )
    cell_group.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code
    code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(CELL_WIDTH)) #str(code_width + 2 * cell_padding))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
    input_bg.set('stroke', CHILL_BROWN) 
    input_bg.set('opacity', '0.2') 
    input_bg.set('stroke-width', '1')
    cell_group.append(input_bg)
    
    # Add the syntax-highlighted code
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    cell_group.append(code_group)
    
    # Move to output section
    input_output_spacing = cell_padding   # SW Noodling
    current_y += code_height + input_output_spacing
    
    # Handle outputs if they exist
    if output_text and output_text.strip():
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + OUTPUT_FONT_SIZE,
            font_family, OUTPUT_FONT_SIZE, CHILL_BROWN, 'normal'
        )
        cell_group.append(output_exec_count)
        
        # Output text
        output_group, output_height = create_multiline_text(
            output_text, code_x, current_y + OUTPUT_FONT_SIZE,
            font_family, OUTPUT_FONT_SIZE, '#333'
        )
        cell_group.append(output_group)
        
        current_y += output_height + cell_padding  # Add some space after output
    
    # NOW calculate the total cell height
    cell_height = current_y - y_offset
    
    # Add spacing after cell (this controls spacing between different cells)
    cell_height += cell_spacing  # This is your 0 value
        
    return cell_group, cell_height

def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
    """
    Process an entire Jupyter notebook and convert all code cells to a single SVG file.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_path (str): Path to save the combined SVG file
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width of the SVG
    
    Returns:
        str: SVG content as string
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    margin = 20
    current_y = margin
    actual_width = 0
    
    # Process each code cell
    code_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                code_cell_count += 1
                
                # Create SVG group for this cell
                cell_group, cell_height = create_cell_svg_group(
                    cell, current_y, font_family, font_size, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width based on content
                    # This is a rough estimate - you might want to make this more precise
                    code_lines = code.split('\n')
                    max_line_length = max(len(line) for line in code_lines) if code_lines else 0
                    estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
    
    if code_cell_count == 0:
        print("No code cells found in notebook")
        return None
    
    # Final dimensions
    total_height = current_y + margin
    final_width = max(CANVAS_WIDTH, actual_width)
    
    svg.set('width', f'{final_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(final_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    # Save to file
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(svg_string)
    
    print(f"Combined notebook SVG saved to: {output_path}")
    print(f"Processed {code_cell_count} code cells")
    
    return svg_string

In [5]:
# out_path='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'
# process_jupyter_cell(example_cell_with_output, out_path)

In [6]:
in_path="book_chapter_3_1.ipynb"
out_path='/Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg'
process_jupyter_notebook(in_path, out_path)

Combined notebook SVG saved to: /Users/stephen/Stephencwelch Dropbox/welch_labs/ai_book/3_backprop_2/graphics/book_chapter_3_1.svg
Processed 4 code cells


'<?xml version="1.0" encoding="UTF-8"?>\n<svg xmlns:ns0="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="700px" height="640.0px" viewBox="0 0 700 640.0"><rect x="0" y="0" width="700" height="640.0" fill="#ffffff" /><g><text x="20" y="32" font-family="Menlo" font-size="12px" fill="#948979" font-weight="normal">[1]:</text><rect x="68" y="14" width="600" height="129.6" fill="#fdf4e0" stroke="#948979" opacity="0.2" stroke-width="1" /><g transform="translate(80, 20)"><ns0:g font-family="Menlo" font-size="12px">\n<ns0:text x="0" y="12" xml:space="preserve"><ns0:tspan fill="#cb4b16">import</ns0:tspan><ns0:tspan fill="#657b83">\xa0</ns0:tspan><ns0:tspan fill="#268bd2">torch</ns0:tspan><ns0:tspan fill="#657b83" /></ns0:text>\n<ns0:text x="0" y="29" xml:space="preserve"><ns0:tspan fill="#657b83" /><ns0:tspan fill="#cb4b16">from</ns0:tspan><ns0:tspan fill="#657b83">\xa0</ns0:tspan><ns0:tspan fill="#268bd2">transformers</ns0:tspan><ns0:tspan fill="#657b83">\xa0</ns0:tspan><ns

---

In [14]:
CHILL_BROWN='#948979'
SOLARIZED_BACKGROUND_COLOR='#fdf4e0'

import json
import re
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import SvgFormatter
from pygments.styles import get_style_by_name
import xml.etree.ElementTree as ET

# def extract_cell_data(cell):
#     """Extract code, outputs, and execution count from a Jupyter notebook cell."""
#     if cell.get('cell_type') != 'code':
#         return None, None, None
    
#     # Extract source code
#     source = cell.get('source', [])
#     if isinstance(source, list):
#         code = ''.join(source)
#     else:
#         code = source
    
#     # Extract execution count
#     execution_count = cell.get('execution_count')
    
#     # Extract outputs
#     outputs = cell.get('outputs', [])
#     output_text = []
    
#     for output in outputs:
#         if 'data' in output:
#             # Handle different output types
#             if 'text/plain' in output['data']:
#                 plain_text = output['data']['text/plain']
#                 if isinstance(plain_text, list):
#                     output_text.extend(plain_text)
#                 else:
#                     output_text.append(plain_text)
#         elif 'text' in output:
#             # Handle direct text output
#             text_content = output['text']
#             if isinstance(text_content, list):
#                 output_text.extend(text_content)
#             else:
#                 output_text.append(text_content)
    
#     return code.strip(), '\n'.join(output_text).strip(), execution_count

def extract_cell_data(cell):
    """Extract code, outputs, and execution count from a Jupyter notebook cell."""
    if cell.get('cell_type') != 'code':
        return None, None, None
    
    # Extract source code
    source = cell.get('source', [])
    if isinstance(source, list):
        code = ''.join(source)
    else:
        code = source
    
    # Extract execution count
    execution_count = cell.get('execution_count')
    
    # Extract outputs
    outputs = cell.get('outputs', [])
    output_text = []
    
    for output in outputs:
        if 'data' in output:
            # Handle different output types
            if 'text/plain' in output['data']:
                plain_text = output['data']['text/plain']
                if isinstance(plain_text, list):
                    # Join list items without adding extra newlines
                    output_text.append(''.join(plain_text))
                else:
                    output_text.append(plain_text)
        elif 'text' in output:
            # Handle direct text output
            text_content = output['text']
            if isinstance(text_content, list):
                # Join list items without adding extra newlines
                output_text.append(''.join(text_content))
            else:
                output_text.append(text_content)
    
    # Join all outputs and strip any trailing whitespace
    final_output = ''.join(output_text).rstrip()
    
    return code.strip(), final_output, execution_count

def create_text_element(text, x, y, font_family='Menlo', font_size=12, fill='black', font_weight='normal'):
    """Create an SVG text element."""
    text_elem = ET.Element('text')
    text_elem.set('x', str(x))
    text_elem.set('y', str(y))
    text_elem.set('font-family', font_family)
    text_elem.set('font-size', f'{font_size}px')
    text_elem.set('fill', fill)
    text_elem.set('font-weight', font_weight)
    text_elem.text = text
    return text_elem

# def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
#     """Create SVG text elements for multiline text."""
#     lines = text.split('\n')
#     text_group = ET.Element('g')
    
#     for i, line in enumerate(lines):
#         if line.strip():  # Only create elements for non-empty lines
#             text_elem = create_text_element(
#                 line, x, y + (i * font_size * line_height), 
#                 font_family, font_size, fill
#             )
#             text_group.append(text_elem)
    
#     return text_group, len(lines) * font_size * line_height

def create_multiline_text(text, x, y, font_family='Menlo', font_size=12, fill='black', line_height=1.4):
    """Create SVG text elements for multiline text."""
    lines = text.split('\n')
    text_group = ET.Element('g')
    
    for i, line in enumerate(lines):
        # Create text element for all lines, including empty ones
        # For empty lines, we still need the element to maintain spacing
        text_elem = create_text_element(
            line if line.strip() else ' ',  # Use space for empty lines to maintain spacing
            x, y + (i * font_size * line_height), 
            font_family, font_size, fill
        )
        text_group.append(text_elem)
    
    return text_group, len(lines) * font_size * line_height

def create_cell_svg_group(cell_data, y_offset, font_family='Menlo', font_size=12, max_width=800):
    """
    Create an SVG group element for a single cell at a given y offset.
    
    Args:
        cell_data (dict): Jupyter cell dictionary
        y_offset (float): Y position to start this cell
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width for the cell
    
    Returns:
        tuple: (svg_group_element, height_used)
    """
    
    code, output_text, execution_count = extract_cell_data(cell_data)
    
    if code is None:
        return None, 0
    
    # Create lexer and formatter for Python code
    lexer = PythonLexer()
    formatter = SvgFormatter(
        style='solarized-light',
        fontfamily=font_family,
        fontsize=f'{font_size}px',
        linenos=False,
        noclasses=True,
        nobackground=True
    )
    
    # Generate highlighted code SVG
    code_svg = highlight(code, lexer, formatter)
    code_root = ET.fromstring(code_svg)
    
    # Create group for this cell
    cell_group = ET.Element('g')
    
    # Constants for layout
    margin = 20
    execution_count_width = 60
    cell_padding = 12
    line_height = 1.4
    cell_spacing = 10  # Space between cells
    
    # Calculate dimensions
    code_lines = len(code.split('\n'))
    code_height = code_lines * font_size * line_height
    
    # Start position within this cell
    current_y = y_offset
    
    # Execution count for input
    exec_count_text = f"[{execution_count or ' '}]:"
    exec_count_elem = create_text_element(
        exec_count_text, margin, current_y + font_size,
        font_family, font_size, CHILL_BROWN, 'normal'
    )
    cell_group.append(exec_count_elem)
    
    # Input code area with background
    code_x = margin + execution_count_width
    code_y = current_y
    
    # Create background rectangle for input code
    code_width = min(max_width - code_x - margin, max(400, len(max(code.split('\n'), key=len)) * font_size * 0.6))
    input_bg = ET.Element('rect')
    input_bg.set('x', str(code_x - cell_padding))
    input_bg.set('y', str(code_y - cell_padding//2))
    input_bg.set('width', str(code_width + 2 * cell_padding))
    input_bg.set('height', str(code_height + cell_padding))
    input_bg.set('fill', SOLARIZED_BACKGROUND_COLOR)  
    input_bg.set('stroke', CHILL_BROWN) 
    input_bg.set('opacity', '0.2') 
    input_bg.set('stroke-width', '1')
    cell_group.append(input_bg)
    
    # Add the syntax-highlighted code
    code_group = ET.Element('g')
    code_group.set('transform', f'translate({code_x}, {code_y})')
    
    # Extract the highlighted content from pygments SVG
    for elem in code_root:
        if elem.tag.endswith('g') or elem.tag.endswith('text'):
            code_group.append(elem)
    
    cell_group.append(code_group)
    
    # Move to output section
    input_output_spacing = cell_padding   # SW Noodling
    current_y += code_height + input_output_spacing
    
    # Handle outputs if they exist
    if output_text and output_text.strip():
        # Execution count for output
        output_exec_count = create_text_element(
            exec_count_text, margin, current_y + font_size,
            font_family, font_size, CHILL_BROWN, 'normal'
        )
        cell_group.append(output_exec_count)
        
        # Output text
        output_group, output_height = create_multiline_text(
            output_text, code_x, current_y + font_size,
            font_family, font_size, '#333'
        )
        cell_group.append(output_group)
        
        current_y += output_height + cell_padding  # Add some space after output
    
    # NOW calculate the total cell height
    cell_height = current_y - y_offset
    
    # Add spacing after cell (this controls spacing between different cells)
    cell_height += cell_spacing  # This is your 0 value
        
    return cell_group, cell_height

def process_jupyter_notebook(notebook_path, output_path='notebook.svg', font_family='Menlo', font_size=12, max_width=900):
    """
    Process an entire Jupyter notebook and convert all code cells to a single SVG file.
    
    Args:
        notebook_path (str): Path to the .ipynb file
        output_path (str): Path to save the combined SVG file
        font_family (str): Font family to use
        font_size (int): Font size in pixels
        max_width (int): Maximum width of the SVG
    
    Returns:
        str: SVG content as string
    """
    import os
    
    with open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = json.load(f)
    
    # Create the main SVG container
    svg = ET.Element('svg')
    svg.set('xmlns', 'http://www.w3.org/2000/svg')
    
    margin = 20
    current_y = margin
    actual_width = 0
    
    # Process each code cell
    code_cell_count = 0
    for i, cell in enumerate(notebook.get('cells', [])):
        if cell.get('cell_type') == 'code':
            code, _, _ = extract_cell_data(cell)
            if code and code.strip():
                code_cell_count += 1
                
                # Create SVG group for this cell
                cell_group, cell_height = create_cell_svg_group(
                    cell, current_y, font_family, font_size, max_width
                )
                
                if cell_group is not None:
                    svg.append(cell_group)
                    current_y += cell_height
                    
                    # Update actual width based on content
                    # This is a rough estimate - you might want to make this more precise
                    code_lines = code.split('\n')
                    max_line_length = max(len(line) for line in code_lines) if code_lines else 0
                    estimated_width = margin + 60 + max_line_length * font_size * 0.6 + 40
                    actual_width = max(actual_width, min(estimated_width, max_width))
    
    if code_cell_count == 0:
        print("No code cells found in notebook")
        return None
    
    # Final dimensions
    total_height = current_y + margin
    final_width = max(CANVAS_WIDTH, actual_width)
    
    svg.set('width', f'{final_width}px')
    svg.set('height', f'{total_height}px')
    svg.set('viewBox', f'0 0 {final_width} {total_height}')
    
    # Add overall background
    overall_bg = ET.Element('rect')
    overall_bg.set('x', '0')
    overall_bg.set('y', '0')
    overall_bg.set('width', str(final_width))
    overall_bg.set('height', str(total_height))
    overall_bg.set('fill', '#ffffff')
    svg.insert(0, overall_bg)
    
    # Convert to string
    svg_string = ET.tostring(svg, encoding='unicode')
    svg_string = '<?xml version="1.0" encoding="UTF-8"?>\n' + svg_string
    
    # Save to file
    output_dir = os.path.dirname(output_path) if os.path.dirname(output_path) else '.'
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(svg_string)
    
    print(f"Combined notebook SVG saved to: {output_path}")
    print(f"Processed {code_cell_count} code cells")
    
    return svg_string