# Avidia Vizard Figures Notebook

A minimal template for creating visualizations with Vizard.

**Quick Reference:**
- `%cc KEYWORDS` - Show current state
- `%cc RESET` - Clear state, start fresh
- `%cc HELP` - Show help

**Basic syntax:**
```
%cc DATA mydata.csv PLOT bar X category Y value
%cc Create a scatter plot from mydata.csv with X col1 and Y col2
```

## Setup & Basics

### Load necessary imports (one-time setup)
Since Vizard defaults to IMPORT false, we load libraries once here.

In [1]:
import os, sys, subprocess, warnings, time, re
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
import pandas as pd
import numpy as np
from pathlib import Path

### Altair

In [2]:
alt.renderers.enable('html')
#alt.renderers.enable('default') # Critical

fontFamily = '' # Seems better to leave empty and go with defaults
@alt.theme.register('bioinformatics_theme', enable=True)
def bioinformatics_theme():
    return alt.theme.ThemeConfig({
        'width':  600,
        'height': 400, 
        'config': {
            # Separate axis configurations
            'axisX': {
                'labelFontSize': 14,
                'titleFontSize': 18,
                'titleFontWeight': 100,
                'labelAngle': 0,
                'labelFont': fontFamily,
                'titleFont': fontFamily,  
            },
            'axisY': {
                'labelFontSize': 14,
                'titleFontSize': 18,
                'titleFontWeight': 100,
                'labelAngle': 0,
                'labelFont': fontFamily,
                'titleFont': fontFamily,  
            },
            # Title configuration
            'title': {                
                'fontSize': 25,       
                'fontWeight': 200,    
                'offset': 20,
                'font': fontFamily,        
            },
            # Legend configuration
            'legend': {
                'labelFontSize': 14,
                'titleFontSize': 18,
                'symbolSize': 600,
                'titleFontWeight': 200,
                'labelFont': fontFamily,
                'titleFont': fontFamily,   
            },
            # Text mark configuration
            'text': {
                'font': fontFamily,
                'fontWeight': 100,
                #'fontSize': 14             # Optional: set default size
            }
        }})

#### Fonts

In [3]:
fonts_to_test = [
    # Most reliable cross-platform
    'Arial',
    'Times New Roman', 
    
    # Generic fallbacks (always work)
    'sans-serif',
    'serif',
    'monospace',
    
    # Additional reliable options
    'Helvetica',        # Mac/Linux, falls back to Arial on Windows
    'Times',           # Cross-platform serif
    'Georgia',         # Web-safe serif, good readability
    
    # Office fonts (if available)
    'Calibri',         # Modern, clean sans-serif
    'Cambria',         # Modern serif designed for screen/print
]
charts = []
for font in fonts_to_test:
    chart = alt.Chart().mark_text(fontSize=14, font=font).encode(
        x=alt.value(100), y=alt.value(50), text=alt.value(f'{font}')
    ).properties(width=180, height=80, title=font[:15])
    charts.append(chart)

# Display in grid (4 columns)
alt.vconcat(*[alt.hconcat(*charts[i:i+4]) for i in range(0, len(charts), 4)])

In [4]:
vega_colors = {
    'blue': '#4c78a8',
    'orange': '#f58518',
    'red': '#e45756',
    'teal': '#72b7b2',
    'green': '#54a24b',
    'yellow': '#eeca3b',
    'purple': '#b279a2',
    'pink': '#ff9da6',
    'brown': '#9d755d',
    'gray': '#bab0ac'
}

### Utilities

In [5]:
def save_chart_alt(chart, title="", fig_path=None, save_as=None, file_format='png', 
                   hide=False, scale_factor=2.0):
    """
    Save an Altair chart with flexible saving options and optional display.
    
    Useful for saving charts that have been modified after generation, such as
    adding layers, annotations, or combining multiple charts. Provides the same
    saving logic and parameter structure as generate_bar_graph_alt.
    
    Parameters
    ----------
    chart : altair.Chart
        The Altair chart object to save and optionally display.
        
    title : str, default ""
        Chart title. Used to generate filename when save_as is not provided.
        
    fig_path : str or None, default None
        Directory path to save the chart. 
        If None, uses global 'fig_path' value or defaults to 'figures'
        
    save_as : str or None, default None
        File name (excluding extension) to save file.
        If None, uses 'title' or generates timestamp-based filename
        
    file_format : str, default 'png'
        File format for saving: 'png', 'pdf', 'svg'
        
    hide : bool, default False
        If True, chart is saved but not displayed.
        If False, chart is both saved and displayed.
        
    scale_factor : float, default 2.0
        Scale factor for PNG output (higher values = higher resolution).
        Only applies to PNG format.
        
    Returns
    -------
    None
        Function saves file and optionally displays chart.
        
    Examples
    --------
    >>> # Generate base chart
    >>> chart, title = generate_bar_graph_alt(df, y_col='Coverage', 
    ...                                       title='Sample Coverage', 
    ...                                       return_chart=True)
    >>> 
    >>> # Add vertical line
    >>> rule = alt.Chart().mark_rule().encode(x=alt.datum(1.5))
    >>> modified_chart = chart + rule
    >>> 
    >>> # Save with same title
    >>> save_chart_alt(modified_chart, title)
    >>> 
    >>> # Save with custom filename and hide display
    >>> save_chart_alt(modified_chart, title, save_as='coverage_with_line', 
    ...                 hide=True)
    >>> 
    >>> # Save as PDF in custom directory
    >>> save_chart_alt(modified_chart, title, fig_path='analysis/plots',
    ...                 file_format='pdf')
        
    Notes
    -----
    - Uses same file naming logic as generate_bar_graph_alt
    - PNG files are saved with scale_factor for higher resolution
    - Other formats (PDF, SVG) ignore scale_factor parameter
    - If no title or save_as provided, generates timestamp-based filename
    """
    if fig_path is None: fig_path = globals().get('fig_path', 'figures')
    if save_as is None: save_as = title or f"chart_{datetime.now().strftime('%y%m%d_%H%M%S')}"
    save_as = sanitize(save_as)
    #path = fname(fig_path, save_as, file_format)
    path = f'{fig_path}/{save_as}.{file_format}'
    if file_format == 'png': chart.save(path, scale_factor=scale_factor)
    else: chart.save(path)
    if not hide: display(chart)

def sanitize(title: str) -> str:                                                                                                                                                                               
    """Sanitize string by replacing specific chars with underscores."""                                                                                                                                        
    sanitized = re.sub(r'[^a-zA-Z0-9\s]', '_', title)                                                                                                                                                          
    sanitized = re.sub(r'\s+', '_', sanitized)                                                                                                                                                                 
    sanitized = re.sub(r'_+', '_', sanitized)                                                                                                                                                                  
    return sanitized.strip('_') 

## Vizard

In [6]:
%load_ext cc_jupyter 


üöÄ Claude Code Magic loaded!
Features:
  ‚Ä¢ Full agentic Claude Code execution
  ‚Ä¢ Cell-based code approval workflow
  ‚Ä¢ Real-time message streaming
  ‚Ä¢ Session state preservation
  ‚Ä¢ Conversation continuity across cells

Usage:
  %cc <instructions>       # Continue with additional instructions (one-line)
  %%cc <instructions>      # Continue with additional instructions (multi-line)
  %cc_new (or %ccn)        # Start fresh conversation
  %cc --help               # Show available options and usage information

Context management:
  %cc --import <file>       # Add a file to be included in initial conversation messages
  %cc --add-dir <dir>       # Add a directory to Claude's accessible directories
  %cc --mcp-config <file>   # Set path to a .mcp.json file containing MCP server configurations
  %cc --cells-to-load <num> # The number of cells to load into a new conversation (default: all for first %cc, none for %cc_new)

Output:
  %cc --model <name>       # Model to use for Cl

In [7]:
%cc RESET

üß† Claude model: claude-sonnet-4-5-20250929
üí≠ Claude: I'll reset the Vizard state to defaults.
‚è∫ Bash("rm -f .vizard_state.json && cat > .vizard_state.json << 'EOF'
{
  "ENGINE": "altair",
  "DF": "polars",
  "WIDTH": 600,
  "HEIGHT": 400,
  "FUNCTION": false,
  "IMPORT": false,
  "OUTPUT": "display"
}
EOF
")
üí≠ Claude: ‚úì State reset to defaults
üìç Claude Code Session ID: a51871d5-9bdb-4ec4-ae71-17380c16358c


In [8]:
%cc --model sonnet

‚úÖ Set model to sonnet. Use %cc_new to pick up the setting.


## Avida Bar Figure

In [9]:
def avida_bar_figure(df: pl.DataFrame, y_label: str, colors: list = ['blue', 'orange']) -> alt.LayerChart:
    """
    Create an Avida-style grouped bar chart with custom positioning and labels.
    
    Parameters
    ----------
    df : pl.DataFrame
        DataFrame with columns: SampleID, Coverage (or other y value), 
        Dna_type, Treatment
    y_label : str
        Label for the y-axis (e.g., 'Coverage')
    colors : list, default ['blue', 'orange']
        Two colors for UMBS and SBS treatments. Can use color names 
        from vega_colors dict or hex codes.
    
    Returns
    -------
    alt.LayerChart
        Layered Altair chart with bars, labels, divider, and annotations
    """
    # Convert color names to vega colors if they exist in the dict
    color_range = [
        vega_colors.get(colors[0], colors[0]),
        vega_colors.get(colors[1], colors[1])
    ]
    
    # Add custom x positions for grouped layout
    df_positioned = df.with_columns(
        pl.when(pl.col('SampleID') == 'CU1').then(pl.lit(0))
          .when(pl.col('SampleID') == 'CU2').then(pl.lit(1))
          .when(pl.col('SampleID') == 'CU3').then(pl.lit(2))
          .when(pl.col('SampleID') == 'CA1').then(pl.lit(4))
          .when(pl.col('SampleID') == 'CA2').then(pl.lit(5))
          .when(pl.col('SampleID') == 'CA3').then(pl.lit(6))
          .when(pl.col('SampleID') == 'GU1').then(pl.lit(10))
          .when(pl.col('SampleID') == 'GU2').then(pl.lit(11))
          .when(pl.col('SampleID') == 'GU3').then(pl.lit(12))
          .when(pl.col('SampleID') == 'GA1').then(pl.lit(14))
          .when(pl.col('SampleID') == 'GA2').then(pl.lit(15))
          .when(pl.col('SampleID') == 'GA3').then(pl.lit(16))
          .alias('x_position')
    )
    
    # Calculate y-axis domain
    y_col = df.columns[1]  # Assumes second column is the y value
    max_val = df_positioned[y_col].max()
    y_domain_max = max_val * 1.2
    
    # Bars
    bars = alt.Chart(df_positioned).mark_bar(width=21).encode(
        x=alt.X('x_position:Q', 
                title=None,
                axis=alt.Axis(labels=False, ticks=False, grid=False, domain=False),
                scale=alt.Scale(domain=[-0.3, 16.3])),
        y=alt.Y(f'{y_col}:Q', 
                title=y_label,
                scale=alt.Scale(domain=[0, y_domain_max]),
                axis=alt.Axis(grid=False)),
        color=alt.Color('Treatment:N', 
                        title='Treatment',
                        sort=['UMBS', 'SBS'],
                        scale=alt.Scale(domain=['UMBS', 'SBS'], range=color_range),
                        legend=None)
    )
    
    # Value labels
    value_labels = alt.Chart(df_positioned).mark_text(dy=-8, fontSize=12).encode(
        x=alt.X('x_position:Q'),
        y=alt.Y(f'{y_col}:Q'),
        text=alt.Text(f'{y_col}:Q', format='.0f')
    )
    
    # Divider line
    divider = alt.Chart(pl.DataFrame({'x': [8]})).mark_rule(
        color='gray',
        strokeWidth=1
    ).encode(x=alt.X('x:Q'))
    
    # DNA type labels
    top_label_y = y_domain_max * 0.95
    top_labels = alt.Chart(pl.DataFrame({
        'x': [3, 13],
        'y': [top_label_y, top_label_y],
        'label': ['cfDNA', 'gDNA']
    })).mark_text(fontSize=16).encode(
        x=alt.X('x:Q'),
        y=alt.Y('y:Q'),
        text='label:N'
    )
    
    # Treatment labels
    treatment_label_y = -y_domain_max * 0.05
    treatment_labels = alt.Chart(pl.DataFrame({
        'x': [1, 5, 11, 15],
        'y': [treatment_label_y] * 4,
        'label': ['UMBS', 'SBS', 'UMBS', 'SBS']
    })).mark_text(fontSize=14).encode(
        x=alt.X('x:Q'),
        y=alt.Y('y:Q'),
        text='label:N'
    )
    
    # Compose chart
    chart = (bars + value_labels + divider + top_labels + treatment_labels).properties(
        width=600,
        height=400
    ).configure_view(
        stroke='gray',
        strokeWidth=2
    )
    
    return chart

## Publication Coverage Figure

In [10]:
%cc_new --add-dir aux

‚úÖ Added /home/cdaniels/junk/dsllm/aux to accessible directories. Use %cc_new to pick up the setting.


In [11]:
samples=['CU1', 'CU2', 'CU3', 'CA1', 'CA2', 'CA3', 'GU1', 'GU2', 'GU3', 'GA1', 'GA2', 'GA3']

In [12]:
df = (pl.read_csv('aux/capture.gatk.metrics',separator='\t')
    .select(['SampleID', 'MEDIAN_TARGET_COVERAGE'])
    .with_columns([
        pl.when(pl.col('SampleID').str.starts_with('C'))
          .then(pl.lit('cfDNA'))
          .otherwise(pl.lit('gDNA'))
          .alias('Dna_type'),
        pl.when(pl.col('SampleID').str.contains('U'))
          .then(pl.lit('UMBS'))
          .otherwise(pl.lit('SBS'))
          .alias('Treatment')
    ])
    .rename({'MEDIAN_TARGET_COVERAGE': 'Coverage'})
)

df

SampleID,Coverage,Dna_type,Treatment
str,f64,str,str
"""CU1""",125.0,"""cfDNA""","""UMBS"""
"""CU2""",138.0,"""cfDNA""","""UMBS"""
"""CU3""",118.0,"""cfDNA""","""UMBS"""
"""CA1""",94.0,"""cfDNA""","""SBS"""
"""CA2""",111.0,"""cfDNA""","""SBS"""
…,…,…,…
"""GU2""",70.0,"""gDNA""","""UMBS"""
"""GU3""",94.0,"""gDNA""","""UMBS"""
"""GA1""",62.0,"""gDNA""","""SBS"""
"""GA2""",84.0,"""gDNA""","""SBS"""


In [13]:
chart = avida_bar_figure(df, 'Coverage',['blue','orange'])
chart

In [15]:
save_chart_alt(chart, title="Avida_Median_Base_Coverage_Depth", fig_path=".", save_as=None, file_format='pdf', 
                   hide=False, scale_factor=2.0)