# Notebook utilities

> Utilities for working with notebooks

In [None]:
#| default_exp nbuse

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import ast
import inspect
import nbformat
import tomli_w
import tomllib
from pathlib import Path
from typing import Type, Any, Optional, Dict, get_type_hints, get_origin, get_args
from pydantic import BaseModel, Field
from IPython.display import display, Markdown


## Create markdown table from a Pydantic dataclass

Especially confenient for Documentation written automatically from the Notebooks by nbdev.

In [None]:
#| export
def _format_type(type_hint: Any) -> str:
    """Format a type hint into a readable string."""
    if get_origin(type_hint) is not None:
        # Handle generic types like List[str], Optional[int], etc.
        origin = get_origin(type_hint)
        args = get_args(type_hint)
        
        if origin is Union:
            # Handle Optional (Union[X, None])
            if len(args) == 2 and args[1] is type(None):
                return f"Optional[{_format_type(args[0])}]"
            else:
                return f"Union[{', '.join(_format_type(arg) for arg in args)}]"
        
        # Handle other generic types
        origin_name = origin.__name__ if hasattr(origin, "__name__") else str(origin).replace("typing.", "")
        args_str = ", ".join(_format_type(arg) for arg in args)
        return f"{origin_name}[{args_str}]"
    
    # Handle non-generic types
    if hasattr(type_hint, "__name__"):
        return type_hint.__name__
    
    return str(type_hint).replace("typing.", "")

In [None]:
#| export
def _escape_table_cell(text: str) -> str:
    """
    Escape special characters in markdown table cells.
    The key is to escape pipe characters with HTML entity or backslash.
    """
    if not isinstance(text, str):
        text = str(text)
    
    # Replace pipe characters with HTML entity
    # This is the most reliable way to prevent them from being interpreted as column separators
    return text.replace("|", "\|")

In [None]:
#| export
def pydantic_to_markdown_table(model_class: Type[BaseModel]) -> None:
    """
    Convert a Pydantic model class to a markdown table and display it in Jupyter notebook.
    
    Args:
        model_class: A Pydantic model class (subclass of BaseModel)
    """
    if not issubclass(model_class, BaseModel):
        raise TypeError("Input must be a Pydantic BaseModel class")
    
    md_name = f"## {model_class.__name__}\n"
    md_docstring = f"{inspect.getdoc(model_class)}\n" or ""
    
    # Get source code lines to extract comments
    try:
        source_lines = inspect.getsource(model_class).splitlines()
    except (OSError, TypeError):
        source_lines = []
    
    # Extract property comments from source code
    property_comments = {}
    for i, line in enumerate(source_lines):
        if ":" in line and "#" in line:
            # Extract property name and comment
            property_part = line.split(":")[0].strip()
            comment_part = line.split("#")[1].strip()
            property_comments[property_part] = comment_part
    
    # Start building the markdown table
    table = "\n| Variable | Type | Default | Details |\n"
    table += "|---|---|---|---|\n"
    
    # Get type hints and model fields
    type_hints = get_type_hints(model_class)
    
    # Handle both Pydantic v1 and v2
    model_fields = getattr(model_class, "model_fields", None)
    if model_fields is None:
        model_fields = getattr(model_class, "__fields__", {})
    
    # Process each field
    for field_name, field_type in type_hints.items():
        # Skip private fields and methods
        if field_name.startswith('_'):
            continue
        
        # Get field info
        field_info = None
        if model_fields and field_name in model_fields:
            field_info = model_fields[field_name]
        
        # Format type string
        type_str = _format_type(field_type)
        
        # Get default value
        default_value = "..."  # Pydantic's notation for required fields
        
        # Try to get default from field info
        if field_info:
            # For Pydantic v2
            if hasattr(field_info, "default") and field_info.default is not inspect.Signature.empty:
                default_value = _escape_table_cell(repr(field_info.default))
            # For Pydantic v1
            elif hasattr(field_info, "default") and not field_info.required:
                default_value = _escape_table_cell(repr(field_info.default))
        
        # Get description
        description = ""
        
        # Try to get description from Field
        if field_info and hasattr(field_info, "description") and field_info.description:
            description = _escape_table_cell(field_info.description)
        # Fallback to comment
        elif field_name in property_comments:
            description = _escape_table_cell(property_comments[field_name])
        
        # For nested Pydantic models, add a reference note
        if issubclass(field_type, BaseModel) if isinstance(field_type, type) else False:
            description += f" (see `{field_type.__name__}` table)"
        
        # Add row to table
        table += f"| `{field_name}` | `{type_str}` | {default_value} | {description} |\n"
    
    return display(Markdown(md_name + md_docstring + table))

#### Example usage

In [None]:
class DummyChild(BaseModel):
    """A simple dataclass model"""
    model_name: str = Field(..., description="Name or path of the model to use") # Name
    provider: str = Field(default="huggingface", description="Model provider (huggingface, openai, etc)")
    api_key_env_var: Optional[str] = Field(default=None, description="Environment variable name for API key")
    api_base_url: Optional[str] = Field(default=None, description="Base URL for API reqeuest")
    temperature: float = Field(default=0.7, description="Temperature for generation")

class DummyParent(BaseModel):
    """Main configuration for a chat application"""
    app_name: str = Field(..., description="Name of the application")
    description: str = Field(default="", description="Description of the application")
    system_prompt: str = Field(..., description="System prompt for the LLM")
    model: DummyChild
    show_system_prompt: bool = Field(default=True, description="Whether to show system prompt in UI")
    show_context: bool = Field(default=True, description="Whether to show context in UI")

In [None]:
pydantic_to_markdown_table(DummyParent)

## DummyParent
Main configuration for a chat application

| Variable | Type | Default | Details |
|---|---|---|---|
| `app_name` | `str` | PydanticUndefined | Name of the application |
| `description` | `str` | '' | Description of the application |
| `system_prompt` | `str` | PydanticUndefined | System prompt for the LLM |
| `model` | `DummyChild` | PydanticUndefined |  (see `DummyChild` table) |
| `show_system_prompt` | `bool` | True | Whether to show system prompt in UI |
| `show_context` | `bool` | True | Whether to show context in UI |


## Create a tree structure from a Python dictionary

Function to clearly display the structure of a Python dictionary. The output only shows the keys.

In [None]:
#| export
def print_dict_structure(
    d: Dict, # The dictionary that will be pretty printed
    indent=0 # The indent that is used for subkeys
    ) -> str:
    for key, value in d.items():
        print("  " * indent + f"├── {key}")
        if isinstance(value, dict):
            print_dict_structure(value, indent + 1)

#### Example usage

In [None]:
movie_dict = {
    "The Big Lebowski": {
        "characters": {
            "Dude": "White Russian",
            "Walter": "Vietnam",
            "Donny": "Bowling"
        },
        "setting": {
            "location": "Bowling Alley",
            "object": "Rug"
        }
    },
    "Office Space": {
        "characters": {
            "Peter": "TPS report",
            "Milton": "Red stapler",
            "Lumbergh": "Memos"
        },
        "setting": {
            "company": "Initech",
            "object": "Printer"
        }
    },
    "The Princess Bride": {
        "characters": {
            "Westley": "Farm Boy",
            "Inigo": "Revenge"
        },
        "setting": {
            "location": "Cliffs of Insanity"
        }
    },
    "Labyrinth": {
        "characters": {
            "Sarah": "Labyrinth",
            "Jareth": "Goblin King"
        }
    }
}

print_dict_structure(movie_dict)

├── The Big Lebowski
  ├── characters
    ├── Dude
    ├── Walter
    ├── Donny
  ├── setting
    ├── location
    ├── object
├── Office Space
  ├── characters
    ├── Peter
    ├── Milton
    ├── Lumbergh
  ├── setting
    ├── company
    ├── object
├── The Princess Bride
  ├── characters
    ├── Westley
    ├── Inigo
  ├── setting
    ├── location
├── Labyrinth
  ├── characters
    ├── Sarah
    ├── Jareth


## Export notebook variables to toml

A function to create a TOML file from the contents of a Jupyter Notebook. This helps to create a simple interface to set the parameters of the application, without the need to build a complete GUI or demand of the user to edit TOML files directly without the ease and explanations possible in a Jupyter Notebook.

TOML can't handle None values. So if you want to set a value to None, just don't add that variable to the Jupyter Notebook or comment it out.

The function will replace any spaces in a heading with underscores.

In [None]:
#| export
def export_ipynb_toml(
    nb_path: Optional[str] = None,
    output_path: Optional[str] = None
    ) -> None:
    """
    Export the content of the current Jupyter notebook to a TOML file.
    
    This function reads the content of the notebook where it's being executed,
    extracts all level 2 (##) markdown cells as groups, and all parameter assignments
    in code cells as key-value pairs within those groups. Regular text markdown cells are ignored.
    
    Parameters:
    -----------
    nb_path : str, optional
        Path to the notebook file. If None, the function will try to determin the
        current notebook path automatically (works in standard Jupyter but may not
        work in all environments like VS Code).
    output_path : str, optional
        Path where the TOML file should be saved. If None, the TOML file will be
        saved in the same directory as the notebook with the same name but with
        a .toml extension.
    
    Returns:
    --------
    None
    """
    if nb_path is None:
        raise ValueError(
            "Please provide the notebook_path parameter explicitly."
        )
    
    with open(nb_path, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)

    if output_path is None:
        notebook_path = Path(nb_path)
        output_path = notebook_path.with_suffix('.toml')
    
    data = {}
    current_group = None
    
    for cell in notebook.cells:
        if cell.cell_type == 'markdown':
            for line in cell.source.split('\n'):
                if line.startswith('## '):
                    current_group = line[3:].strip().replace(' ', '_')
        
        elif cell.cell_type == 'code' and current_group is not None:
            try:
                tree = ast.parse(cell.source)
            except SyntaxError:
                # Skip malformed code cells
                print(f"skipped: {cell.source}")
                continue
            
            if current_group not in data:
                data[current_group] = {}
            
            for node in tree.body:
                if isinstance(node, ast.Assign):
                    for target in node.targets:
                        if isinstance(target, ast.Name):
                            # TODO: for some reason this doesn't work if there's a variable in the variable, like so: `v = f"{x}eny"`
                            key = target.id
                            if key.startswith('export_ipynb_'):
                                continue
                            value = globals().get(key, None)
                            if value is None:
                                try:
                                    value = ast.literal_eval(node.value)
                                except (ValueError, SyntaxError):
                                    print(f"Warning: Could not evaluate value for '{key}' in group '{current_group}'")
                                    continue
                            data[current_group][key] = value
            
    with open(output_path, 'wb') as f:
        # 'wb', because 
        tomli_w.dump(data, f)
    
    print(f"TOML file saved to: {output_path}")

#### Usage of `globals()` and `ast`

The ast (Abstract Syntax Tree) module in Python allows you to parse Python source code into its syntax tree representation. This lets you analyze and manipulate Python code programmatically.

`ast.parse(source)`: Parses Python code (as a string) into an AST tree.

`globals()` is a built-in function that returns the current global symbol table as a dictionary.

**Why use globals() first?**

_Dynamic or computed variables_: In Jupyter notebooks, variables can be set by code that isn’t just a literal assignment. For example:

```python
a = 1 + 2
b = some_function()
c = [i for i in range(5)]
```

- globals()['a'] will give 3.
- globals()['b'] will give the result of some_function().
- globals()['c'] will give the actual list [0, 1, 2, 3, 4].


**ast.literal_eval limitations**:

- ast.literal_eval can only evaluate simple literals (strings, numbers, lists, dicts, etc.), not arbitrary Python expressions or anything involving variables/functions.
- For example, it fails on a = 1 + 2, b = some_function(), or c = [i for i in range(5)].


**Notebook context:**

- In a notebook, users often assign variables dynamically, not just with literals.
- Using globals() ensures you get the actual value as it exists in the current kernel session, reflecting any computation or function calls.

**Why fall back to ast.literal_eval?**

For simple assignments:
- If the variable isn’t found in globals() (maybe the cell wasn’t run, or the variable was deleted), you can try to parse the value directly from the code if it’s a literal.
-   This works for things like x = 42 or y = "hello", but not for expressions or function calls.

**Summary**

- globals(): Gets the current, actual value in the notebook, including results of computations.
- ast.literal_eval: Only works for simple literals, but is a safe fallback if the variable isn’t in globals().


#### Write TOML files using binary

We write using binary, because tomli-w is designed to work the same way as the built-in tomllib, which only reads from binary streams.

This ensures consistent encoding (UTF-8) and avoids issues with text encodings across platforms.

#### Example usage

In [None]:
somewhere = "/home/jared/lost"

In [None]:
export_ipynb_toml("nbuse.ipynb", "../tests/variables_user.toml")

TOML file saved to: ../tests/variables_user.toml


then read the TOML file using the `tomllib` module by doing:

In [None]:
with open("../tests/variables_user.toml", "rb") as tml:
    usr_toml = tomllib.load(tml)

print_dict_structure(usr_toml)

├── Create_markdown_table_from_a_Pydantic_dataclass
├── Create_a_tree_structure_from_a_Python_dictionary
  ├── movie_dict
    ├── The Big Lebowski
      ├── characters
        ├── Dude
        ├── Walter
        ├── Donny
      ├── setting
        ├── location
        ├── object
    ├── Office Space
      ├── characters
        ├── Peter
        ├── Milton
        ├── Lumbergh
      ├── setting
        ├── company
        ├── object
    ├── The Princess Bride
      ├── characters
        ├── Westley
        ├── Inigo
      ├── setting
        ├── location
    ├── Labyrinth
      ├── characters
        ├── Sarah
        ├── Jareth
├── Export_notebook_variables_to_toml
  ├── somewhere
├── Default_nbdev_code_cells


## Default nbdev code cells

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()