# Workflow diagram

In [None]:
import pkg_resources
import json
import os
from glob import glob

import matplotlib.pyplot as plt
import networkx as nx
import pydot
import numpy as np
from IPython.display import Image, display

In [None]:
PROCESSES = ["motioncor2", "ctffind", "ctfsim", "imod_align", "imod_recon", "aretomo_recon", "savu_recon", "rlf_deconv"]

In [None]:
def read_ipynb(filename):
    fn = pkg_resources.resource_filename("RepoTemp.templates", filename)

    with open(fn, 'r') as f:
        return json.load(f)

In [None]:
def get_processes(plist):
    node_list = []
    for idx, curr_proc in enumerate(plist):
        # Find relevant log files
        file_lookfor = f"{os.getcwd()}/o2r_{curr_proc}.log"
        file_found = len(glob(file_lookfor))==1

        if file_found:
            node_list.append(idx+1)
    
    return node_list

In [None]:
def create_graph(plist: list):
    proc_dict = {
        1: "Motion Corr.",
        2: "CTF estimation",
        3: "CTF simulation",
        4: "TS alignment (IMOD)",
        5: "Reconstruction (IMOD)",
        6: "Reconstruction (Aretomo)",
        7: "Reconstruction (Savu)",
        8: "Deconvolution (RedLionfish)"
    }
    
    o2r_flow = [
        (1, 2), (2, 3), (3, 8),
        (1, 4), (4, 5), (5, 8),
        (4, 6), (6, 8),
        (4, 7), (7, 8),
    ]
    
    proc_names = [proc_dict[i] for i in plist]
    
    # graph stuff
    g = pydot.Dot(graph_type="digraph", rankdir="LR")
    for _, curr_node in enumerate(proc_names):
        node = pydot.Node(curr_node)
        node.set_shape("box3d")
        g.add_node(node)
        
    get_dict = lambda x: tuple([proc_dict[i] for i in x])
    proc_flow = [get_dict(i) for i in overall_flow if set(i).issubset(plist)]
    for _, curr_edge in enumerate(proc_flow):
        edge = pydot.Edge(*curr_edge)
        g.add_edge(edge)
        
    g_plot = Image(g.create_png())
    display(g_plot)

In [None]:
nodes = get_processes(PROCESSES)
create_graph(nodes)