# 🦤 Dodo Source

[doit](https://github.com/pydoit/doit) is a simple, yet powerful task execution tool,
written in Python. The `DodoSource` can find the _tasks_ defined in a `dodo.py`, as well
as the _files_ it depends on.

This demo features a `ForceGraph` with a `DodoSource` for this project's own `dodo.py`,
along with a number of other Jupyter Widget libraries to demonstrate a full, interactive
application.

> **Hint**
>
> _For a "cleaner" notebook, see the [DodoApp](./DodoApp.ipynb) which uses `importnb` to
> present this application with less behind-the-scenes details._

In [None]:
import asyncio
import functools
import itertools
import subprocess
from pathlib import Path

import anyio.streams.text
import ipylab as L
import ipywidgets as W
import networkx as nx
import traitlets as T
from ipydatagrid import DataGrid
from ipyforcegraph import behaviors as B
from ipyforcegraph import graphs as G
from ipyforcegraph.behaviors import shapes as S
from ipyforcegraph.sources.dodo import DodoSource

## Create a Panel

Creating a single panel in the JupyterLab `main` area next to the notebook we're working
on makes it easy to see how the application develops.

In [None]:
lab = L.JupyterFrontEnd()
split = L.SplitPanel(
    [], orientation="vertical", layout=dict(height="100%", overflow="hidden")
)
app_style = W.HTML(
    """<style>
.ipfg-dodo {
    --jp-widgets-inline-width-short: auto;
}
</style>"""
)

app = W.VBox(
    [app_style, split],
    _dom_classes=["ipfg-dodo"],
    layout=dict(display="flex", flex="1", flex_flow="column"),
)

panel = L.Panel([app], layout=dict(overflow="hidden", height="100%"))
ipfg_icon = L.Icon(
    name="ipyfg:logo", svgstr=Path("datasets/logo.svg").read_text(encoding="utf-8")
)
panel.title.icon = ipfg_icon

In [None]:
def show_app(area, **options):
    lab.shell.add(panel, area, options)
    return panel

In [None]:
if __name__ == "__main__":
    show_app("main", mode="split-right")

## Create the Graph

We know we want to show a graph.

In [None]:
fg = G.ForceGraph(layout=dict(width="100%", height="100%", flex="3"))
split.children = [fg]

## Create the `DodoSource`

A `DodoSource` needs to know its `project_root` in order to find your `dodo.py`, and
establish the right current working directory.

> While `doit` has pluggable _loaders_, `DodoSource` only supports discovering a
> `dodo.py`

In [None]:
source = DodoSource(
    project_root="..", node_preserve_columns=["x", "y", "vx", "vy", "_selected"]
)
T.dlink((source, "project_root"), (panel.title, "label"), lambda x: x.name)
fg.source = source

> The app should now show a big pile of `nodes`, based on the underlying
> `pandas.DataFrame`, to be explored more fully below.

## Add Graph Behaviors

### Customize Node Shapes

In [None]:
node_shape = B.NodeShapes()
fg.behaviors = [*fg.behaviors, node_shape]

In [None]:
file_shape = S.Rectangle()
task_shape = S.Text()
subtask_shape = S.Ellipse()

In [None]:
node_shape.shapes = [file_shape, subtask_shape, task_shape]

#### Files as Boxes

In [None]:
file_shape.width = file_shape.height = B.Nunjucks(
    "{% if node.type in ['file', 'directory', 'file_dep', 'targets'] %}10"
    "{% else %}0{% endif %}",
)
file_shape.fill = B.Nunjucks(
    "{% if node.type == 'file' and node.exists %}grey{% else %}white{% endif %}",
)
file_shape.stroke = B.Nunjucks(
    "{% if node._selected %}orange"
    "{% elif node.type == 'file' %}black"
    "{% else %}grey{% endif %}",
)
file_shape.stroke_width = B.Nunjucks(
    "{% if node._selected %}4" "{% else %}1{% endif %}",
)
file_shape.line_dash = B.Nunjucks(
    "{% if node.exists %}[]{% else %}[1]{% endif %}",
)

#### Tasks as Text

In [None]:
task_shape.text = B.Nunjucks(
    "{% if node.type == 'task' and not node.subtask_of %}{{ node.name }}{% endif %}",
)

task_shape.stroke = B.Nunjucks(
    "{% if node._selected %}orange"
    "{% elif node.status == 'run' %}blue"
    "{% elif node.status == 'error' %}red"
    "{% endif %}",
)
task_shape.stroke_width = B.Nunjucks(
    "{% if node._selected or node.status == 'error' %}4" "{% else %}1{% endif %}",
)
task_shape.fill = "black"
task_shape.size = 20
task_shape.stroke_width = 1
task_shape.scale_on_zoom = False
task_shape.line_dash = B.Nunjucks(
    "{% if node.status == 'error' %}[1]{% else %}[]{% endif %}",
)

#### Subtasks as Circles

In [None]:
subtask_shape.width = subtask_shape.height = B.Nunjucks(
    "{% if node.type == 'task' and node.subtask_of %}10{% else %}0{% endif %}",
)
subtask_shape.fill = B.Nunjucks(
    "{% if node.status == 'up-to-date' %}blue"
    "{% elif node.status == 'run' %}white"
    "{% else %}red{% endif %}",
)
subtask_shape.stroke = B.Nunjucks(
    "{% if node._selected %}orange"
    "{% elif node.status == 'run' %}blue"
    "{% elif node.status == 'error' %}red"
    "{% endif %}",
)
subtask_shape.stroke_width = B.Nunjucks(
    "{% if node._selected %}4" "{% else %}1{% endif %}",
)

## Customize Link Shapes

In [None]:
link_shape = B.LinkShapes()
fg.behaviors = [*fg.behaviors, link_shape]

In [None]:
link_shape.color = B.Nunjucks(
    "rgba({% if link.exists in [False, 'run'] %}150,0,0,0.125"
    "{% else %}0,0,150,0.125)"
    "{% endif %}"
)

### Add `Tooltip` Behaviors

Tooltips provide a quick way to inspect parts of the graph.

In [None]:
node_tooltip = B.NodeTooltip()
link_tooltip = B.LinkTooltip()
fg.behaviors = [*fg.behaviors, node_tooltip, link_tooltip]

In [None]:
node_tooltip.label = B.Nunjucks(
    "[<b>{{ node.type }}</b>"
    "{% if node.type == 'task' %}"
    " {{  node.status}}"
    "{% elif node.exists %} exists"
    "{% elif node.type in ['file', 'directory', 'targets', 'file_dep'] %} missing"
    "{% endif %}"
    "] {{ node.name }}<br/>"
    "{{ node.doc }}{{ (node.paths or []) | join('\n') }}"
)
link_tooltip.label = B.Nunjucks(
    "{{ link.source.name }}<br/><b>[{{ link.type }} {{ link.exists }}]</b> <br/>{{ link.target.name }}"
)

### Toggle Graph Features

`doit` task graphs can quickly get _large_. Some graph features decrease (or increase)
the number of nodes or edges.

In [None]:
button_bar_style = dict(layout=dict(height="2.5em", overflow="hidden"))
view_toggles = W.HBox(**button_bar_style)
view_toggles.layout.flex_flow = "row-reverse"
app.children = [app_style, view_toggles, split]

#### Show Directories

Directories can't be acted upon by `doit`, generally, and add a lot of extra edges, but
can be useful for more directly visualizing the project layout.

In [None]:
show_directories = W.ToggleButton(icon="folder", tooltip="Show Directories")
T.link((source, "show_directories"), (show_directories, "value"))
view_toggles.children = [*view_toggles.children, show_directories]

#### Group File Dependencies and Targets

A project with a lot of inter-related files will quickly get very dense. Grouping files
by their membership in tasks' `file_dep` or `target` reduces the node and edge count
significantly.

In [None]:
show_files = W.ToggleButton(icon="project-diagram", tooltip="Show Files")
T.link((source, "show_files"), (show_files, "value"))
view_toggles.children = [*view_toggles.children, show_files]

> Now, hovering over each of the nodes and edges should show some data about them.

## Toggle Particles

Animated _particles_ are drawn on top of links, and are a more active mark than styled
links.

In [None]:
lp = B.LinkParticles()
fg.behaviors = [*fg.behaviors, lp]
show_particle_paths = W.ToggleButtons(
    options=[("", ""), (" ", "upstream"), ("  ", "between"), ("   ", "downstream")],
    tooltips=["No particles", "Upstream", "Paths Between", "Downstreams"],
    icons=[
        "minus",
        "chevron-circle-up",
        "bezier-curve",
        "chevron-circle-down",
    ],
)
path_busy = W.Button(
    icon="circle-notch spin",
    disabled=True,
    button_style="info",
    layout=dict(visibility="hidden"),
)
particle_mode = W.HTML()
T.dlink((show_particle_paths, "value"), (particle_mode, "value"))
view_toggles.children = [
    show_particle_paths,
    particle_mode,
    path_busy,
    W.HTML(layout=dict(flex="1")),
    *view_toggles.children,
]

### Show Dependency Particles

Selecting two nodes shows the dependency paths between them, following `file_dep`,
`targets`, and `task_dep`. As these can rapidly overpower a graph, one mode at a time is
shown.

In [None]:
def find_link_indices(paths):
    link_idx = []
    for path in paths:
        pairs = sorted(itertools.pairwise(path))
        link_idx += source.links[
            source.links.apply(
                lambda x: (x.source, x.target) in pairs
                or (x.target, x.source) in pairs,
                axis=1,
            )
        ].index.to_list()
    return sorted(set(link_idx))

In [None]:
def toggle_particles(*x):
    mode = show_particle_paths.value
    path_busy.layout.visibility = "visible"
    lp.width = 0
    selected = []
    try:
        selected = node_selection.selected
    except Exception:
        pass
    nxg = nx.convert_matrix.from_pandas_edgelist(source.links, create_using=nx.DiGraph)
    paths = []

    if mode == "between" and len(selected) >= 2:
        source_node = source.nodes.iloc[selected[0]].id
        target_node = source.nodes.iloc[selected[1]].id
        paths = list(nx.all_simple_paths(nxg, source_node, target_node))
        if not paths:
            paths = list(nx.all_simple_paths(nxg, target_node, source_node))

    if selected and mode in ["upstream", "downstream"]:
        source_node = source.nodes.iloc[selected[0]].id
        relative_finder = nx.ancestors if mode == "upstream" else nx.descendants
        relatives = relative_finder(nxg, source_node)
        for target_node in relatives:
            node_pair = (
                [source_node, target_node]
                if mode == "downstream"
                else [target_node, source_node]
            )
            paths += list(nx.all_simple_paths(nxg, *node_pair))

    particle_mode.value = f"{mode} ({len(paths)} paths)"
    if not paths:
        lp.width = 0
    else:
        link_idx = find_link_indices(paths)
        with lp.hold_trait_notifications():
            lp.color = {"between": "orange", "upstream": "red", "downstream": "blue"}[
                mode
            ]
            lp.speed = 0.01
            lp.width = B.Nunjucks(
                "{% if link.index in " + str(link_idx) + "%}5{% endif %}"
            )
            lp.density = 10
    path_busy.layout.visibility = "hidden"

In [None]:
show_particle_paths.observe(lambda *x: toggle_particles(), "value")

## Create some `DataGrid`s

[ipydatagrid](https://github.com/bloomberg/ipydatagrid) provides a rich `DataGrid`
class, which also speaks `pandas.DataFrame`. Since even a relatively small `doit` task
graph can be quite large, displaying the raw data in a grid provides a more readly
inspectable (and filterable) view.

In [None]:
grid_opts = dict(
    layout=dict(height="100%", max_height="100%", min_height="100%"),
)
node_grid = DataGrid(fg.source.nodes, **grid_opts)
link_grid = DataGrid(fg.source.links, **grid_opts)
grid_panel = L.SplitPanel(
    [node_grid, link_grid],
    orientation="horizontal",
    layout=dict(overflow="hidden", height="400px", min_height="400px"),
)
split.children = [fg, grid_panel]

> The app should now contain the graph, with two grids below it.

## Add Node and Link Selection

Both `ForceGraph` and `DataGrid` support the concept of _selection_.

In [None]:
node_grid.selection_mode = "row"
link_grid.selection_mode = "row"
node_selection = B.NodeSelection(column_name="_selected")
link_selection = B.LinkSelection(column_name="_selected")
fg.behaviors = [*fg.behaviors, node_selection, link_selection]

node_selection.observe(lambda *x: toggle_particles(), "selected")

> Nodes, links, and grid rows can now all be selected

## Link the Selections

While the graph and grids are now selectable, they are not expressed in the same format,
and there is no relationship between them. With `traitlets.dlink`, we can create a
semi-stable, bidrectional behavior between them.

In [None]:
def link_grid_and_graph(grid, graph_selection, source, kind):
    last_selected = set()

    def on_grid_select(*x):
        nonlocal last_selected
        if not grid.selections:
            graph_selection.selected = []
            return
        visible = grid.get_visible_data()
        selected = []
        for selection in grid.selections:
            for i in range(selection["r1"], selection["r2"] + 1):
                data_idx = int(visible.iloc[i].name)
                selected += [data_idx]
        if last_selected != set(selected):
            last_selected = set(selected)
            graph_selection.selected = sorted(set(selected))

    grid.observe(on_grid_select, ["selections"])

    def on_graph_select(*x):
        nonlocal last_selected
        last_selected = set(graph_selection.selected)
        if not graph_selection.selected:
            grid.selections = []
            return
        visible = grid.get_visible_data()
        visible_index = visible.index.to_list()
        c2 = visible.shape[1] - 1
        selections = []
        getattr(source, kind)
        for index in graph_selection.selected:
            try:
                r1 = int(visible_index.index(index))
            except Exception as err:  # noqa
                continue
            selection = {"r1": r1, "r2": r1, "c1": 0, "c2": c2}
            selections += [selection]
        grid.selections = selections

    graph_selection.observe(on_graph_select, ["selected"])

In [None]:
link_grid_and_graph(node_grid, node_selection, source, "nodes")
link_grid_and_graph(link_grid, link_selection, source, "links")

## Add a button bar

In [None]:
action_buttons = W.HBox(**button_bar_style)
app.children = [app_style, view_toggles, split, action_buttons]

### A button helper

This helper will react to the state of a given graph selection, showing when a command
is relevant.

In [None]:
def add_a_button(label, icon, row_filter, selection, items):
    button = W.Button(description=label, icon=icon)
    action_buttons.children = [*action_buttons.children, button]

    def on_selection(*x):
        selected = selection.selected
        filtered = []
        df = getattr(source, items)
        for i in selected:
            item = df.loc[i]
            keep = True
            for key, value in row_filter.items():
                if getattr(item, key) not in value:
                    keep = False
                    break
            if keep:
                filtered += [i]
        with button.hold_sync():
            if filtered:
                button.disabled = False
                button.button_style = "primary"
                button.description = f"{label} ({len(filtered)})"
            else:
                button.disabled = True
                button.button_style = ""
                button.description = label

    selection.observe(on_selection)
    on_selection()
    return button


add_node_button = functools.partial(
    add_a_button, selection=node_selection, items="nodes"
)
add_link_button = functools.partial(
    add_a_button, selection=link_selection, items="links"
)

### Refresh the Graph

You might work somewhere else on the tasks, and want to reload the data.

In [None]:
def refresh():
    source.refresh()
    node_grid.data = source.nodes
    link_grid.data = source.links

In [None]:
refresh_button = W.Button(description="Refresh", icon="refresh")
action_buttons.children = [refresh_button, *action_buttons.children]

In [None]:
refresh_button.on_click(lambda x: refresh())

### View Files

`ipylab` exposes the
[JupyterLab command](https://jupyterlab.readthedocs.io/en/stable/user/commands.html)
system, which allows for changing the state of the running client application from the
kernel. T

In [None]:
view_file_button = add_node_button(
    "Open Files", "folder-open", {"type": ["file"], "exists": [True]}
)


def on_view_click():
    cmd_id = "filebrowser:open-path"
    for i in node_selection.selected:
        node = source.nodes.iloc[i]
        if node.type == "file" and node.exists:
            node.id.split("file:")[1]
            lab.commands.execute(cmd_id, {"path": node["name"]})


view_file_button.on_click(lambda *x: on_view_click())

### Run Tasks

In [None]:
run_task_button = add_node_button("Run", "play", {"type": ["task"]})

In [None]:
async def run_tasks_async(tasks, output_panel):
    cmd_str = f"""doit {" ".join(tasks)}"""
    output_panel.children[0].append_stderr(f">>> {cmd_str}")

    async def watch_one(stream, name):
        async for line in anyio.streams.text.TextReceiveStream(process.stdout):
            output = output_panel.children[0]
            output.append_stdout(line)
            if len(output.outputs) >= 100:
                output = W.Output()
                output_panel.children = [output, *output_panel.children]
            if process.returncode is not None:
                return

    kwargs = dict(
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=str(source.project_root)
    )
    async with await anyio.open_process(["doit", *tasks], **kwargs) as process:
        await asyncio.gather(watch_one(process.stdout, "stdout"), process.wait())
    output_panel.children[0].append_stderr(f"{cmd_str} returned {process.returncode}")
    refresh()
    run_task_button.button_style = "primary"

In [None]:
def on_run_click():
    tasks = []
    for i in node_selection.selected:
        node = source.nodes.iloc[i]
        if node.type == "task":
            tasks += [node["name"]]
    if tasks:
        output = W.Output()
        style = W.HTML(
            """<style>
            .ipfg-stream { display: flex; flex-direction: column-reverse; }
            .ipfg-stream .jp-OutputPrompt {display: none;}
        </style>"""
        )
        output_panel = L.Panel(
            [output, style],
            _dom_classes=["ipfg-stream"],
            layout=dict(overflow="scroll"),
        )
        output_panel.title.label = " ".join(tasks)
        output_panel.title.icon = ipfg_icon
        lab.shell.add(output_panel, "main", {"mode": "split-right"})

        run_task_button.button_style = "warning"
        asyncio.create_task(run_tasks_async(tasks, output_panel))

In [None]:
run_task_button.on_click(lambda *x: on_run_click())

### Forget Tasks

If a task _thinks_ it's up-to-date, but you _know_ it isn't, it can be useful to
_forget_ a task, forcing it (and any dependent tasks) to be re-run.

In [None]:
forget_task_button = add_node_button("Forget", "eraser", {"type": ["task"]})

In [None]:
def forget_tasks():
    tasks = []
    for i in node_selection.selected:
        node = source.nodes.iloc[i]
        if node.type == "task":
            tasks += [node["name"]]
    if not tasks:
        return
    forget_task_button.button_style = "warning"
    try:
        subprocess.check_call(["doit", "forget", *tasks], cwd=str(source.project_root))
        forget_task_button.button_style = "primary"
    except Exception:
        forget_task_button.button_style = "danger"
    refresh()

In [None]:
forget_task_button.on_click(lambda *x: forget_tasks())