## Setup and Graph Libraries

In [None]:
import requests
import json
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# API Configuration
API_BASE = 'http://localhost:3001/api'

print("✓ Graph Analytics Notebook Initialized")
print(f"API Base: {API_BASE}")

## Section 1: Create Protocol DAG

Build a directed acyclic graph representing lab protocol steps and dependencies

In [None]:
# Create a protocol DAG
dag_params = {
    'protocol': 'plasmid-prep',
    'template': 'crispr-cloning'
}

response = requests.post(
    f'{API_BASE}/v1/graphs/protocol-dag',
    json=dag_params
)

graph_data = response.json()['graph']

print(f"✓ Protocol DAG Created: {graph_data['graphId']}")
print(f"Protocol: {graph_data['protocol']}")
print(f"Nodes (steps): {len(graph_data['nodes'])}")
print(f"Edges (dependencies): {len(graph_data['edges'])}")
print(f"\nGraph Type: {graph_data['graphType']}")

## Section 2: Analyze Graph Structure

In [None]:
# Analyze nodes
print("PROTOCOL STEPS (Nodes):")
print("-" * 60)
nodes_df = pd.DataFrame(graph_data['nodes'])
if 'id' in nodes_df.columns:
    print(nodes_df[['id', 'label']].to_string(index=False))
else:
    print(nodes_df.to_string(index=False))

print(f"\n\nSTEP DEPENDENCIES (Edges):")
print("-" * 60)
edges_df = pd.DataFrame(graph_data['edges'])
if len(edges_df) > 0:
    print(edges_df.to_string(index=False))
else:
    print("No explicit dependencies found")

## Section 3: Calculate Graph Centrality

Identify critical steps using centrality measures

In [None]:
# Get centrality metrics
centrality_params = {'graphId': graph_data['graphId']}

response = requests.get(
    f'{API_BASE}/v1/graphs/{graph_data["graphId"]}/centrality',
    params=centrality_params
)

centrality_data = response.json()['centrality']

print("CENTRALITY ANALYSIS:")
print("=" * 60)
print(f"\nDegree Centrality (step connectivity):")
if centrality_data.get('degree'):
    for node, score in sorted(centrality_data['degree'].items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f"  {node}: {score:.3f}")

print(f"\nBetweenness Centrality (critical pathway nodes):")
if centrality_data.get('betweenness'):
    for node, score in sorted(centrality_data['betweenness'].items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f"  {node}: {score:.3f}")

print(f"\nClustering Coefficient:")
print(f"  Network clustering: {centrality_data.get('clusteringCoefficient', 'N/A')}")

## Section 4: Shortest Path Analysis

Find optimal paths through the protocol workflow

# Find shortest paths if nodes exist
if len(graph_data['nodes']) >= 2:
    start_node = graph_data['nodes'][0]['id'] if 'id' in graph_data['nodes'][0] else 'step-1'
    end_node = graph_data['nodes'][-1]['id'] if 'id' in graph_data['nodes'][-1] else 'step-final'
    
    path_params = {
        'from': start_node,
        'to': end_node
    }
    
    response = requests.get(
        f'{API_BASE}/v1/graphs/{graph_data["graphId"]}/paths',
        params=path_params
    )
    
    path_data = response.json().get('paths', {})
    
    print(f"SHORTEST PATHS: {start_node} → {end_node}")
    print("=" * 60)
    if path_data:
        for i, path in enumerate(path_data.get('shortestPaths', [])[:3], 1):
            print(f"\nPath {i} (Length {len(path)}):")
            print(" → ".join(path))
    else:
        print("No paths found")
else:
    print("Not enough nodes for path analysis")

## Section 5: Create Instrument Network

Model lab instrument dependencies and job flows

In [None]:
# Create instrument network
instr_params = {
    'protocol': 'plasmid-prep',
    'instruments': ['thermocycler', 'centrifuge', 'magnetic-stand', 'incubator']
}

response = requests.post(
    f'{API_BASE}/v1/graphs/instrument-network',
    json=instr_params
)

instr_graph = response.json()['graph']

print(f"✓ Instrument Network Created: {instr_graph['graphId']}")
print(f"Instruments (nodes): {len(instr_graph.get('nodes', []))}")
print(f"Job flows (edges): {len(instr_graph.get('edges', []))}")

# Display instrument details
if instr_graph.get('nodes'):
    print("\nINSTRUMENT DETAILS:")
    instr_df = pd.DataFrame(instr_graph['nodes'])
    if 'id' in instr_df.columns and 'label' in instr_df.columns:
        print(instr_df[['id', 'label']].to_string(index=False))

## Section 6: Sample Lineage Graph

Track sample ancestry through the workflow

In [None]:
# Create sample lineage graph
lineage_params = {
    'protocol': 'plasmid-prep',
    'numSamples': 3,
    'trackMetadata': True
}

response = requests.post(
    f'{API_BASE}/v1/graphs/sample-lineage',
    json=lineage_params
)

lineage_graph = response.json()['graph']

print(f"✓ Sample Lineage Created: {lineage_graph['graphId']}")
print(f"Sample nodes: {len(lineage_graph.get('nodes', []))}")
print(f"Lineage edges: {len(lineage_graph.get('edges', []))}")

# Display sample tracking
if lineage_graph.get('nodes'):
    print("\nSAMPLE TRACKING:")
    sample_nodes = [n for n in lineage_graph['nodes'] if 'sample' in str(n).lower()]
    if sample_nodes:
        sample_df = pd.DataFrame(sample_nodes)
        print(f"Found {len(sample_nodes)} sample tracking nodes")

## Key Insights

From the graph analysis:
1. **Critical steps**: Steps with high centrality require careful control
2. **Workflow efficiency**: Shortest paths show ideal sequencing
3. **Instrument bottlenecks**: Identify instrument usage patterns
4. **Sample tracking**: Complete lineage enables traceability

Use these insights to optimize protocol planning and resource allocation.