In [None]:
import sys, shutil
#print("python:", sys.executable)  # sanity check: which interpreter this kernel uses
%pip install -q graphviz          
#print("dot on PATH:", shutil.which("dot"))

In [None]:
from graphviz import Digraph

# Create a new directed graph
dot = Digraph(name='CONSORT_Diagram', format='pdf')

# Layout & spline style
dot.attr(rankdir='TB', splines='ortho')
dot.attr('graph', dpi='300')  # set 300 dpi

# Global node styling:
dot.attr(
    'node',
    shape='box',           # Basic box shape
    style='rounded,filled',# Rounded corners and filled background
    fillcolor='white',     # White background
    color='black',         # Black outline
    fontname='Helvetica',
    fontsize='12',
    penwidth='1.5'         # Thicker border line
)

# Global edge styling:
dot.attr(
    'edge',
    arrowhead='normal',
    color='black',
    fontname='Helvetica',
    fontsize='12',
    penwidth='1.5'
)

# Define the first two nodes, inserting "n = " for counts
dot.node('A', 
         'Emergency and inpatient encounters enriched for hypercapnic\n'
         'respiratory failure in TriNetX database (USA, 2022)\n'
         'N = 733,142'
        )
dot.node('B', 
         'Adequate data completeness\n'
         'N = 515,286\n'
         '(N = 171,814 emergency; N = 343,559 inpatient)'
        )

# Calculate the number of encounters removed
removed_encounters = 733142 - 515286  # 217,856 removed

# Edge from A to B
dot.edge(
    'A', 
    'B', 
    label=f'   Not all required data elements present\n(n = {removed_encounters:,} encounters removed)'
)

# Add new node "Simulate Case Definitions"
dot.node('C', 'Simulate hypercapnic respiratory failure case definitions')

# Edge from B to C
dot.edge('B', 'C')

# Define the 10 study arms with their respective counts
arms = {
    'Adler': 18118,
    'Thille': 16263,
    'Ouanes-Besbes': 37336,
    'Calvo': 6566,
    'Bülbül': 57813,
    'Meservey': 29009,
    'Vonderbank': 86137,
    'Wilson': 18118,
    'Cavalot': 58481,
    'Chung': 55454
}

# Create a subgraph to align all study arm nodes on the same horizontal rank
with dot.subgraph() as s:
    s.attr(rank='same')
    for name, count in arms.items():
        s.node(name, f"{name}\nN = {count:,}")

# Connect "Simulate Case Definitions" (C) to each study arm node
for arm_name in arms.keys():
    dot.edge('C', arm_name)

# Render
dot.render('consort_diagram', format='tiff', view=True)