In [2]:
# Import the GLI library
import sys
import os

# Add the parent directory to the path to import gli
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python'))

import gli
from gli import Graph, get_available_backends, set_backend, get_current_backend, create_random_graph
import time
import random

print("GLI Tutorial - Graph Operations Demo")
print(f"Available backends: {get_available_backends()}")
print(f"Current backend: {get_current_backend()}")

GLI Tutorial - Graph Operations Demo
Available backends: ['python', 'rust']
Current backend: rust


In [3]:
g = Graph()
    
# Create employees with predictable attributes for clear filtering
departments = ['Engineering', 'Sales', 'Marketing']
roles = ['Junior', 'Senior', 'Manager']

employee_ids = []

# Create 30000 employees with controlled attributes
for i in range(30000):
    # Create predictable patterns for clear filtering
    dept = departments[i % 3]
    
    # Make some employees clearly senior
    if i < 50:
        role = 'Manager'
        salary = random.randint(100000, 150000)
        performance = random.uniform(4.0, 5.0)
    elif i < 150:
        role = 'Senior'
        salary = random.randint(80000, 120000)
        performance = random.uniform(3.5, 4.5)
    else:
        role = 'Junior'
        salary = random.randint(50000, 80000)
        performance = random.uniform(3.0, 4.0)
    
    employee_id = g.add_node(
        name=f"Employee_{i:03d}",
        department=dept,
        role=role,
        salary=salary,
        performance_score=round(performance, 1),
        employee_id=i,
        is_remote=i % 4 == 0  # Every 4th person is remote
    )
    employee_ids.append(employee_id)

print(f"✅ Created graph: {g.node_count()} employees")

# Add some management relationships
for i in range(0, 500):  # First 500 are managers
    manager = employee_ids[i]
    # Each manager oversees 3-5 people
    for j in range(3):
        if i * 3 + j + 500 < len(employee_ids):
            report = employee_ids[i * 3 + j + 500]
            g.add_edge(manager, report, relationship='manages')

print(f"✅ Added management relationships: {g.edge_count()} edges")

# Commit initial state
print("\n💾 Committing initial state...")
initial_hash = g.save_state("Initial company structure")
print(f"✅ Initial state: {initial_hash}")

# Initial filtering
print("\n🔍 Initial State Filtering:")

if g.use_rust:
    # Filter managers
    managers = g.filter_nodes_by_attributes({'role': 'Manager'})
    print(f"📊 Managers: {len(managers)}")
    
    # Filter engineering department
    engineers = g.filter_nodes_by_attributes({'department': 'Engineering'})
    print(f"📊 Engineering employees: {len(engineers)}")
    
    # Filter remote workers
    remote_workers = g.filter_nodes_by_attributes({'is_remote': True})
    print(f"📊 Remote workers: {len(remote_workers)}")
    
    # Filter high earners (>= 90k)
    high_earners = []
    for node_id in employee_ids:
        node = g.get_node(node_id)
        if node and node.attributes.get('salary', 0) >= 90000:
            high_earners.append(node_id)
    print(f"📊 High earners (≥$90k): {len(high_earners)}")

# Make significant changes
print("\n🔄 Making significant changes...")

changes_made = {
    'promotions': 0,
    'salary_bumps': 0,
    'new_remote': 0
}

# Promote some juniors to seniors
juniors = g.filter_nodes_by_attributes({'role': 'Junior'})
for i, node_id in enumerate(juniors[:30]):  # Promote first 30 juniors
    g.set_node_attribute(node_id, 'role', 'Senior')
    # Give them a salary bump
    current_node = g.get_node(node_id)
    if current_node:
        new_salary = int(current_node.attributes.get('salary', 50000) * 1.2)
        g.set_node_attribute(node_id, 'salary', new_salary)
    changes_made['promotions'] += 1

# Give raises to top performers
for node_id in employee_ids:
    node = g.get_node(node_id)
    if node and node.attributes.get('performance_score', 0) >= 4.0:
        current_salary = node.attributes.get('salary', 50000)
        new_salary = int(current_salary * 1.15)
        g.set_node_attribute(node_id, 'salary', new_salary)
        changes_made['salary_bumps'] += 1

# Make some employees remote
for i in range(0, 300, 7):  # Every 7th employee becomes remote
    if i < len(employee_ids):
        g.set_node_attribute(employee_ids[i], 'is_remote', True)
        changes_made['new_remote'] += 1

print(f"✅ Changes applied:")
print(f"   📈 Promotions: {changes_made['promotions']}")
print(f"   💰 Salary increases: {changes_made['salary_bumps']}")
print(f"   🏠 New remote workers: {changes_made['new_remote']}")

# Commit modified state
print("\n💾 Committing modified state...")
modified_hash = g.commit("Annual review - promotions and raises")
print(f"✅ Modified state: {modified_hash}")

# Modified state filtering
print("\n🔍 Modified State Filtering:")

if g.use_rust:
    # Same filters on modified state
    managers_mod = g.filter_nodes_by_attributes({'role': 'Manager'})
    print(f"📊 Managers: {len(managers_mod)}")
    
    seniors_mod = g.filter_nodes_by_attributes({'role': 'Senior'})
    print(f"📊 Seniors: {len(seniors_mod)}")
    
    engineers_mod = g.filter_nodes_by_attributes({'department': 'Engineering'})
    print(f"📊 Engineering employees: {len(engineers_mod)}")
    
    remote_workers_mod = g.filter_nodes_by_attributes({'is_remote': True})
    print(f"📊 Remote workers: {len(remote_workers_mod)}")
    
    # Count high earners again
    high_earners_mod = []
    for node_id in employee_ids:
        node = g.get_node(node_id)
        if node and node.attributes.get('salary', 0) >= 90000:
            high_earners_mod.append(node_id)
    print(f"📊 High earners (≥$90k): {len(high_earners_mod)}")
    
    # Show changes
    print(f"\n📈 Comparison:")
    print(f"   👔 Managers: {len(managers)} → {len(managers_mod)} ({len(managers_mod) - len(managers):+d})")
    print(f"   🎓 Seniors: ? → {len(seniors_mod)} (after promotions)")
    print(f"   🔧 Engineers: {len(engineers)} → {len(engineers_mod)} ({len(engineers_mod) - len(engineers):+d})")
    print(f"   🏠 Remote workers: {len(remote_workers)} → {len(remote_workers_mod)} ({len(remote_workers_mod) - len(remote_workers):+d})")
    print(f"   💰 High earners: {len(high_earners)} → {len(high_earners_mod)} ({len(high_earners_mod) - len(high_earners):+d})")
    
    # Show some specific examples
    print(f"\n👑 Sample promoted employees:")
    promoted_seniors = g.filter_nodes({'role': 'Senior'})
    for i, node_id in enumerate(promoted_seniors[:5]):
        node = g.get_node(node_id)
        if node:
            print(f"   • {node.attributes.get('name')}: {node.attributes.get('role')} in {node.attributes.get('department')} (${node.attributes.get('salary', 0):,})")
    
    # Storage stats
    stats = g.get_storage_stats()
    print(f"\n📊 Final storage stats: {stats}")



✅ Created graph: 30000 employees
✅ Added management relationships: 1500 edges

💾 Committing initial state...
✅ Initial state: 8d2017024cc3dd15

🔍 Initial State Filtering:
📊 Managers: 50
📊 Engineering employees: 10000
📊 Remote workers: 7500
📊 High earners (≥$90k): 128

🔄 Making significant changes...
✅ Initial state: 8d2017024cc3dd15

🔍 Initial State Filtering:
📊 Managers: 50
📊 Engineering employees: 10000
📊 Remote workers: 7500
📊 High earners (≥$90k): 128

🔄 Making significant changes...
✅ Changes applied:
   📈 Promotions: 30
   💰 Salary increases: 1529
   🏠 New remote workers: 43

💾 Committing modified state...
✅ Modified state: 98cc4299e2d85d8b

🔍 Modified State Filtering:
📊 Managers: 50
📊 Seniors: 130
📊 Engineering employees: 10000
📊 Remote workers: 7532
📊 High earners (≥$90k): 225

📈 Comparison:
   👔 Managers: 50 → 50 (+0)
   🎓 Seniors: ? → 130 (after promotions)
   🔧 Engineers: 10000 → 10000 (+0)
   🏠 Remote workers: 7500 → 7532 (+32)
   💰 High earners: 128 → 225 (+97)

👑 Sample p

In [None]:
# 🔄 Loading Previous States

print("🕰️ Time travel: Loading the initial state...")

# Method 1: Using load_state() if available
try:
    # Save current state hash for later
    current_state = modified_hash
    
    # Load the initial state
    if hasattr(g, 'load_state'):
        g.load_state(initial_hash)
        print(f"✅ Loaded initial state: {initial_hash}")
    else:
        print("⚠️ load_state method not available")
except Exception as e:
    print(f"❌ Error loading state: {e}")

# Check the data after loading initial state
print(f"\n🔍 After loading initial state:")
initial_seniors = g.filter_nodes({'role': 'Senior'})
initial_remote = g.filter_nodes({'is_remote': True}) 
initial_high_earners = g.filter_nodes(lambda nid, attrs: attrs.get('salary', 0) >= 90000)

print(f"   👔 Senior employees: {len(initial_seniors)}")
print(f"   🏠 Remote workers: {len(initial_remote)}")
print(f"   💰 High earners (≥$90k): {len(initial_high_earners)}")

print(f"\n📊 Comparison with modified state:")
print(f"   👔 Seniors: {len(initial_seniors)} (initial) vs {len(current_seniors)} (modified)")
print(f"   🏠 Remote: {len(initial_remote)} (initial) vs {len(current_remote)} (modified)")

🕰️ Time travel: Loading the initial state...
✅ Loaded initial state: 683eaad4aa340052

🔍 After loading initial state:
   👔 Senior employees: 100
   🏠 Remote workers: 7500
   💰 High earners (≥$90k): 120

📊 Comparison with modified state:
   👔 Seniors: 100 (initial) vs 130 (modified)
   🏠 Remote: 7500 (initial) vs 7532 (modified)
   👔 Senior employees: 100
   🏠 Remote workers: 7500
   💰 High earners (≥$90k): 120

📊 Comparison with modified state:
   👔 Seniors: 100 (initial) vs 130 (modified)
   🏠 Remote: 7500 (initial) vs 7532 (modified)


In [None]:
# 🎯 Complete State Management Workflow

print("🎯 Demonstrating complete state loading workflow:")

# Store current state info
current_seniors = len(g.filter_nodes({'role': 'Senior'}))
current_remote = len(g.filter_nodes({'is_remote': True}))
current_high_earners = len(g.filter_nodes(lambda nid, attrs: attrs.get('salary', 0) >= 90000))

print(f"\n📊 BEFORE loading initial state:")
print(f"   👔 Senior employees: {current_seniors}")
print(f"   🏠 Remote workers: {current_remote}")
print(f"   💰 High earners (≥$90k): {current_high_earners}")

# Try to load the initial state
print(f"\n⏪ Loading initial state {initial_hash[:12]}...")
try:
    success = g.load_state(initial_hash)
    if success:
        print(f"✅ Successfully loaded initial state!")
        
        # Check the data after loading
        loaded_seniors = len(g.filter_nodes({'role': 'Senior'}))
        loaded_remote = len(g.filter_nodes({'is_remote': True}))
        loaded_high_earners = len(g.filter_nodes(lambda nid, attrs: attrs.get('salary', 0) >= 90000))
        
        print(f"\n📊 AFTER loading initial state:")
        print(f"   👔 Senior employees: {loaded_seniors}")
        print(f"   🏠 Remote workers: {loaded_remote}")
        print(f"   💰 High earners (≥$90k): {loaded_high_earners}")
        
        print(f"\n📈 State comparison:")
        print(f"   👔 Seniors: {current_seniors} → {loaded_seniors} ({loaded_seniors - current_seniors:+d})")
        print(f"   🏠 Remote: {current_remote} → {loaded_remote} ({loaded_remote - current_remote:+d})")
        print(f"   💰 High earners: {current_high_earners} → {loaded_high_earners} ({loaded_high_earners - current_high_earners:+d})")
        
    else:
        print(f"❌ Failed to load state")
        
except Exception as e:
    print(f"❌ Error: {e}")

print(f"\n💾 Updated storage stats:")
updated_stats = g.get_storage_stats()
for key, value in updated_stats.items():
    print(f"   {key}: {value}")

🎯 Demonstrating complete state loading workflow:

📊 BEFORE loading initial state:
   👔 Senior employees: 100
   🏠 Remote workers: 7500
   💰 High earners (≥$90k): 120

⏪ Loading initial state 683eaad4aa34...
✅ Successfully loaded initial state!

📊 AFTER loading initial state:
   👔 Senior employees: 100
   🏠 Remote workers: 7500
   💰 High earners (≥$90k): 120

📈 State comparison:
   👔 Seniors: 100 → 100 (+0)
   🏠 Remote: 7500 → 7500 (+0)
   💰 High earners: 120 → 120 (+0)

💾 Updated storage stats:
   pooled_nodes: 31628
   edge_refs_tracked: 1500
   branches: 1
   total_states: 3
   node_refs_tracked: 31628
   pooled_edges: 1500

📊 AFTER loading initial state:
   👔 Senior employees: 100
   🏠 Remote workers: 7500
   💰 High earners (≥$90k): 120

📈 State comparison:
   👔 Seniors: 100 → 100 (+0)
   🏠 Remote: 7500 → 7500 (+0)
   💰 High earners: 120 → 120 (+0)

💾 Updated storage stats:
   pooled_nodes: 31628
   edge_refs_tracked: 1500
   branches: 1
   total_states: 3
   node_refs_tracked: 3162

In [None]:
# 🚀 Testing New Lazy-Loaded Properties (FIXED!)

# Note: Restart kernel to get updated Graph class
import sys
sys.path.insert(0, '/Users/michaelroth/Documents/Code/gli/python')
import gli

# Create a fresh graph to test the new properties
print("🚀 Creating fresh graph to test lazy-loaded properties:")
fresh_g = gli.Graph(backend='rust')

print(f"\n📚 fresh_g.states (lazy-loaded):")
states = fresh_g.states
for key, value in states.items():
    if key != 'auto_states':  # Skip the deque for cleaner output
        print(f"   {key}: {value}")

print(f"\n🌿 fresh_g.branches (lazy-loaded):")
branches = fresh_g.branches
for name, hash_val in branches.items():
    print(f"   {name}: {hash_val}")

# Add some data and test again
fresh_g.add_node('test1', department='Engineering', role='Junior')
fresh_g.add_node('test2', department='Marketing', role='Senior')
fresh_g.add_edge('test1', 'test2', relationship='collaborates')

# Commit and create branches
commit1 = fresh_g.commit('Initial data')
print(f"\n💾 Committed: {commit1}")

# Test filtering with new graph
marketing_folks = fresh_g.filter_nodes({'department': 'Marketing'})
seniors = fresh_g.filter_nodes({'role': 'Senior'})
print(f"\n🔍 Filtering test:")
print(f"   Marketing: {len(marketing_folks)} nodes")
print(f"   Seniors: {len(seniors)} nodes")

# Create a branch
fresh_g.create_branch('experiment', commit1)

print(f"\n📊 Final state:")
print(f"   States: {fresh_g.states['total_states']} total states")
print(f"   Branches: {fresh_g.branches}")

print(f"\n✅ Key improvements:")
print(f"   • g.states: lazy-loaded dict (no method calls needed)")
print(f"   • g.branches: lazy-loaded dict (replaces list_branches())")
print(f"   • Always in sync with Rust backend")
print(f"   • Properties computed on demand")
print(f"   • filter_nodes() supports both lambdas and attribute dicts")

🚀 Creating fresh graph to test lazy-loaded properties:

📚 fresh_g.states (lazy-loaded):
   total_states: 1
   pooled_nodes: 0
   pooled_edges: 0
   node_refs_tracked: 0
   edge_refs_tracked: 0
   current_hash: initial
   state_hashes: ['initial']
   branches_count: 1

🌿 fresh_g.branches (lazy-loaded):
   main: initial

💾 Committed: b58dbf422e7ee754

🔍 Filtering test:
   Marketing: 1 nodes
   Seniors: 1 nodes

📊 Final state:
   States: 2 total states
   Branches: {'main': 'initial', 'experiment': 'b58dbf422e7ee754'}

✅ Key improvements:
   • g.states: lazy-loaded dict (no method calls needed)
   • g.branches: lazy-loaded dict (replaces list_branches())
   • Always in sync with Rust backend
   • Properties computed on demand
   • filter_nodes() supports both lambdas and attribute dicts


In [None]:
g.nodes.items()[0:5]

[('node_66b4a24e',
  Node(id='node_66b4a24e', attributes={'salary': 63258, 'name': 'Employee_18968', 'is_remote': True, 'employee_id': 18968, 'department': 'Marketing', 'performance_score': 3.2, 'role': 'Junior'})),
 ('node_e3447fd0',
  Node(id='node_e3447fd0', attributes={'salary': 71218, 'performance_score': 3.5, 'is_remote': False, 'name': 'Employee_26633', 'department': 'Marketing', 'employee_id': 26633, 'role': 'Junior'})),
 ('node_2c9b3f4a',
  Node(id='node_2c9b3f4a', attributes={'salary': 68941, 'name': 'Employee_24589', 'department': 'Sales', 'performance_score': 3.8, 'role': 'Junior', 'employee_id': 24589, 'is_remote': False})),
 ('node_7359686d',
  Node(id='node_7359686d', attributes={'employee_id': 24550, 'department': 'Sales', 'role': 'Junior', 'name': 'Employee_24550', 'performance_score': 3.0, 'salary': 64820, 'is_remote': False})),
 ('node_c801e277',
  Node(id='node_c801e277', attributes={'department': 'Marketing', 'employee_id': 22397, 'role': 'Junior', 'is_remote': Fal

In [None]:
g.load_state(g.states['state_hashes'][-2])

True

In [None]:
g.nodes.items()[0:5]

[('node_89ce275d',
  Node(id='node_89ce275d', attributes={'salary': 66523, 'department': 'Sales', 'employee_id': 27055, 'performance_score': 3.8, 'name': 'Employee_27055', 'role': 'Junior', 'is_remote': False})),
 ('node_1cf06c4b',
  Node(id='node_1cf06c4b', attributes={'is_remote': False, 'performance_score': 3.9, 'role': 'Junior', 'employee_id': 526, 'department': 'Sales', 'name': 'Employee_526', 'salary': 57658})),
 ('node_ad588d9a',
  Node(id='node_ad588d9a', attributes={'name': 'Employee_10171', 'role': 'Junior', 'salary': 60237, 'is_remote': False, 'performance_score': 3.8, 'employee_id': 10171, 'department': 'Sales'})),
 ('node_782d8338',
  Node(id='node_782d8338', attributes={'salary': 75940, 'role': 'Junior', 'performance_score': 3.1, 'name': 'Employee_11759', 'employee_id': 11759, 'is_remote': False, 'department': 'Marketing'})),
 ('node_cf19fa0d',
  Node(id='node_cf19fa0d', attributes={'name': 'Employee_13553', 'employee_id': 13553, 'department': 'Marketing', 'salary': 66373

In [None]:
g.save_state("Final commit after testing")

'd9390fc131145899'

In [None]:
g.states

{'total_states': 4,
 'pooled_nodes': 31628,
 'pooled_edges': 1500,
 'node_refs_tracked': 31628,
 'edge_refs_tracked': 1500,
 'current_hash': 'd9390fc131145899',
 'state_hashes': ['d9390fc131145899', 'ca554ab1d40bc329', '683eaad4aa340052'],
 'auto_states': ['683eaad4aa340052', 'ca554ab1d40bc329', 'd9390fc131145899'],
 'branches_count': 1}

In [None]:
# 🎯 Final GLI API: save_state vs commit

print("🎯 NEW: save_state method (more intuitive than commit)")

# Create a fresh graph to demonstrate
demo_g = gli.Graph(backend='rust')
demo_g.add_node('alice', role='Engineer', team='Backend')
demo_g.add_node('bob', role='Designer', team='Frontend')

# Use the new save_state method
state1 = demo_g.save_state("Initial team setup")
print(f"✅ Saved state: {state1}")

# Add more data
demo_g.add_node('charlie', role='Manager', team='Backend')
state2 = demo_g.save_state("Added management layer")
print(f"✅ Saved state: {state2}")

# Test backward compatibility
demo_g.add_edge('alice', 'charlie', relationship='reports_to')
state3 = demo_g.commit("Using legacy commit method")  # Still works!
print(f"✅ Legacy commit: {state3}")

print(f"\n📊 Final state overview:")
final_states = demo_g.states
final_branches = demo_g.branches
print(f"   Total states: {final_states['total_states']}")
print(f"   State hashes: {final_states['state_hashes']}")
print(f"   Branches: {final_branches}")

# Test state loading
print(f"\n⏪ Testing state loading:")
print(f"Current nodes: {len(demo_g.nodes)}")
demo_g.load_state(state1)  # Go back to first state
print(f"After loading state1: {len(demo_g.nodes)} nodes")
demo_g.load_state(state2)  # Go to second state
print(f"After loading state2: {len(demo_g.nodes)} nodes")

print(f"\n🎉 GLI API Summary:")
print(f"   • g.save_state(message) - primary state saving")
print(f"   • g.commit(message) - backward compatibility")
print(f"   • g.load_state(hash) - time travel to any state")
print(f"   • g.states - comprehensive state info + hashes")
print(f"   • g.branches - branch dictionary")
print(f"   • g.filter_nodes/edges - dual interface (dict + lambda)")
print(f"   • All properties lazy-loaded from Rust backend!")

print(f"\n✅ GLI refactoring complete: maintainable, performant, robust!")

🎯 NEW: save_state method (more intuitive than commit)
✅ Saved state: 78f31ad65ee86645
✅ Saved state: e401809df9d584ce
✅ Legacy commit: 13df97228bd97dd3

📊 Final state overview:
   Total states: 4
   State hashes: ['78f31ad65ee86645', 'e401809df9d584ce', '13df97228bd97dd3']
   Branches: {'main': 'initial'}

⏪ Testing state loading:
Current nodes: 3
After loading state1: 2 nodes
After loading state2: 3 nodes

🎉 GLI API Summary:
   • g.save_state(message) - primary state saving
   • g.commit(message) - backward compatibility
   • g.load_state(hash) - time travel to any state
   • g.states - comprehensive state info + hashes
   • g.branches - branch dictionary
   • g.filter_nodes/edges - dual interface (dict + lambda)
   • All properties lazy-loaded from Rust backend!

✅ GLI refactoring complete: maintainable, performant, robust!


In [None]:
# Test new batch operations and mixed ID types
print("=== Testing New GLI Features ===")

# Create a new graph to test latest features
test_g = Graph()
print(f"✅ Created graph with Rust backend: {test_g.use_rust}")

# Test mixed ID types (string and integer)
print("\n1. Mixed ID Types:")
alice_id = test_g.add_node("alice", name="Alice", age=30, department="Engineering")
bob_id = test_g.add_node(42, name="Bob", age=25, department="Design")
charlie_id = test_g.add_node("charlie", name="Charlie", age=35, department="Engineering")

print(f"   Alice (string ID): {alice_id}")
print(f"   Bob (integer ID): {bob_id}")
print(f"   Charlie (string ID): {charlie_id}")

# Test new get_edge API with (source, target) parameters
test_g.add_edge(alice_id, bob_id, relationship="collaborates")
test_g.add_edge(bob_id, charlie_id, relationship="reports_to")

edge = test_g.get_edge(alice_id, bob_id)
print(f"   Edge Alice->Bob: {edge.attributes}")

# Test efficient batch attribute updates
print("\n2. Batch Operations:")
import time

# Prepare batch updates
batch_updates = {
    alice_id: {"salary": 100000, "level": "senior", "remote": True},
    bob_id: {"salary": 85000, "level": "mid", "remote": False},
    charlie_id: {"salary": 120000, "level": "senior", "remote": True}
}

start_time = time.time()
test_g.set_nodes_attributes_batch(batch_updates)
batch_time = time.time() - start_time

print(f"   Batch updated 3 nodes in {batch_time:.4f}s")
print(f"   Alice's salary: {test_g.nodes[alice_id]['salary']}")
print(f"   Bob's level: {test_g.nodes[bob_id]['level']}")

# Test lazy-loaded properties
print("\n3. Lazy Properties:")
print(f"   States info: {test_g.states}")
print(f"   Branches: {test_g.branches}")

# Test state management with new API
print("\n4. Enhanced State Management:")
initial_state = test_g.save_state("Mixed ID test graph")
print(f"   Saved state: {initial_state}")

# Create and switch branches
test_g.create_branch("feature/new-hires", switch=True)
print(f"   Created and switched to branch: feature/new-hires")

# Add more employees with batch operations
new_hires = {
    "diana": {"name": "Diana", "department": "Marketing", "level": "junior"},
    500: {"name": "Eve", "department": "Engineering", "level": "mid"},
    "frank": {"name": "Frank", "department": "Design", "level": "senior"}
}

for emp_id, attrs in new_hires.items():
    test_g.add_node(emp_id, **attrs)

print(f"   Added {len(new_hires)} new hires")
print(f"   Total nodes: {len(test_g.nodes)}")

# Test filtering with new API
engineers = test_g.filter_nodes({"department": "Engineering"})
print(f"   Engineers: {len(engineers)}")

# Test branch switching performance
start_time = time.time()
test_g.switch_branch("main")
switch_time = time.time() - start_time
print(f"   Branch switch took: {switch_time:.4f}s")
print(f"   Nodes after switch: {len(test_g.nodes)}")

print("\n✅ All new features working perfectly!")

=== Testing New GLI Features ===
✅ Created graph with Rust backend: True

1. Mixed ID Types:
   Alice (string ID): alice
   Bob (integer ID): 42
   Charlie (string ID): charlie
   Edge Alice->Bob: {'relationship': 'collaborates'}

2. Batch Operations:
   Batch updated 3 nodes in 0.0001s
   Alice's salary: 100000
   Bob's level: mid

3. Lazy Properties:
   States info: {'total_states': 1, 'pooled_nodes': 0, 'pooled_edges': 0, 'node_refs_tracked': 0, 'edge_refs_tracked': 0, 'current_hash': 'initial', 'state_hashes': ['initial'], 'auto_states': [], 'branches_count': 1}
   Branches: {'main': 'initial'}

4. Enhanced State Management:
   Saved state: 391c98df52417fbb
Switched to branch 'feature/new-hires' (state: 391c98df52417fbb)
   Created and switched to branch: feature/new-hires
   Added 3 new hires
   Total nodes: 6
   Engineers: 3
Switched to branch 'main' (state: initial)
   Branch switch took: 0.0001s
   Nodes after switch: 0

✅ All new features working perfectly!


In [None]:

# Enhanced Filtering with Subgraph Creation and Query Language Testing

print("=== Enhanced Filtering and Subgraph Features ===")

# Test 1: Current filtering behavior (returns node/edge IDs)
print("\n1. Current filtering behavior:")
managers_ids = g.filter_nodes({'role': 'Manager'})
print(f"Manager IDs (traditional): {len(managers_ids)} found")

# Test 2: Enhanced filtering with return_graph parameter
print("\n2. Enhanced filtering with subgraph creation:")
try:
    # This should create a new Graph object containing only managers
    managers_subgraph = g.filter_nodes({'role': 'Manager'}, return_graph=True)
    print(f"Managers subgraph: {managers_subgraph.node_count()} nodes, {managers_subgraph.edge_count()} edges")
    
    # Save this subgraph as a state
    g.save_state("managers_only")
    
    # The subgraph should be a fully functional Graph
    for node_id in list(managers_subgraph.nodes)[:3]:  # Show first 3
        node = managers_subgraph.get_node(node_id)
        print(f"  Manager: {node.attributes.get('name')} - {node.attributes.get('department')}")
        
except AttributeError as e:
    print(f"Enhanced filtering not yet implemented: {e}")

# Test 3: String-based query language
print("\n3. String-based query language:")
query_tests = [
    "role == 'Manager'",
    "salary > 80000", 
    "department == 'Engineering'",
    "is_remote == True",
    "performance_score >= 4.0",
    "role == 'Junior' AND salary > 70000",
    "department == 'Sales' OR department == 'Marketing'"
]

for query in query_tests:
    try:
        # Test both node ID return and subgraph return
        result_ids = g.filter_nodes(query)
        result_subgraph = g.filter_nodes(query, return_graph=True)
        print(f"Query '{query}': {len(result_ids)} nodes")
        
        if len(result_ids) > 0 and len(result_ids) <= 3:
            # Show details for small result sets
            for node_id in result_ids:
                node = g.get_node(node_id)
                relevant_attrs = {k: v for k, v in node.attributes.items() 
                                if k in ['name', 'role', 'department', 'salary', 'is_remote', 'performance_score']}
                print(f"  {relevant_attrs}")
                
    except (TypeError, NotImplementedError, AttributeError) as e:
        print(f"Query '{query}': Not yet implemented - {e}")

# Test 4: Edge filtering with subgraphs
print("\n4. Edge filtering with subgraph creation:")
try:
    # Filter edges and create subgraph
    collaboration_edges = g.filter_edges({'relationship': 'collaborates_with'}, return_graph=True)
    print(f"Collaboration subgraph: {collaboration_edges.node_count()} nodes, {collaboration_edges.edge_count()} edges")
    
    # String-based edge queries
    edge_queries = [
        "relationship == 'collaborates_with'",
        "strength > 0.7",
        "project_count >= 3"
    ]
    
    for query in edge_queries:
        edge_result = g.filter_edges(query, return_graph=True)
        print(f"Edge query '{query}': {edge_result.edge_count()} edges")
        
except (AttributeError, TypeError) as e:
    print(f"Enhanced edge filtering not yet implemented: {e}")

# Test 5: Subgraph state management and branching
print("\n5. Subgraph state management:")
try:
    # Create multiple filtered subgraphs and save as branches
    departments = ['Engineering', 'Sales', 'Marketing']
    
    for dept in departments:
        # Create department subgraph
        dept_subgraph = g.filter_nodes({'department': dept}, return_graph=True)
        
        # Save as a branch
        branch_name = f"dept_{dept.lower()}"
        dept_subgraph.create_branch(branch_name)
        dept_subgraph.save_state(f"{dept}_team")
        
        print(f"{dept} branch: {dept_subgraph.node_count()} nodes saved as '{branch_name}'")
        
        # Test working on the subgraph
        # Add department-specific analysis attributes
        with dept_subgraph.batch_operations() as batch:
            for node_id in dept_subgraph.nodes:
                node = dept_subgraph.get_node(node_id)
                # Add department size info
                batch.add_node_attribute(node_id, 'dept_size', dept_subgraph.node_count())
                # Add role distribution
                roles_in_dept = [dept_subgraph.get_node(nid).attributes.get('role') 
                               for nid in dept_subgraph.nodes]
                role_counts = {role: roles_in_dept.count(role) for role in set(roles_in_dept)}
                batch.add_node_attribute(node_id, 'role_distribution', role_counts)
        
        print(f"  Enhanced {dept} team with department analytics")
        
except (AttributeError, TypeError) as e:
    print(f"Subgraph state management not yet implemented: {e}")

# Test 6: Complex query combinations
print("\n6. Complex query combinations:")
try:
    # Multi-level filtering
    # 1. Filter high performers
    high_performers = g.filter_nodes("performance_score >= 4.0", return_graph=True)
    
    # 2. Within high performers, find managers
    high_performing_managers = high_performers.filter_nodes("role == 'Manager'", return_graph=True)
    
    # 3. Within those, find remote workers
    remote_high_performing_managers = high_performing_managers.filter_nodes("is_remote == True", return_graph=True)
    
    print(f"Query pipeline results:")
    print(f"  High performers: {high_performers.node_count()}")
    print(f"  High performing managers: {high_performing_managers.node_count()}")
    print(f"  Remote high performing managers: {remote_high_performing_managers.node_count()}")
    
    # Save this as a highly specific cached result
    remote_high_performing_managers.save_state("elite_remote_managers")
    
except (AttributeError, TypeError) as e:
    print(f"Complex query combinations not yet implemented: {e}")

# Test 7: Subgraph comparison and analysis
print("\n7. Subgraph analysis capabilities:")
try:
    # Compare different department subgraphs
    eng_graph = g.filter_nodes("department == 'Engineering'", return_graph=True)
    sales_graph = g.filter_nodes("department == 'Sales'", return_graph=True)
    
    # Analyze department differences
    eng_avg_salary = sum(eng_graph.get_node(nid).attributes.get('salary', 0) 
                        for nid in eng_graph.nodes) / eng_graph.node_count()
    sales_avg_salary = sum(sales_graph.get_node(nid).attributes.get('salary', 0) 
                          for nid in sales_graph.nodes) / sales_graph.node_count()
    
    print(f"Department analysis:")
    print(f"  Engineering: {eng_graph.node_count()} people, avg salary: ${eng_avg_salary:,.0f}")
    print(f"  Sales: {sales_graph.node_count()} people, avg salary: ${sales_avg_salary:,.0f}")
    
    # Cross-department collaboration analysis
    # Find edges between the two departments in the main graph
    cross_dept_edges = []
    for edge_id in g.edges:
        edge = g.edges[edge_id]
        source_dept = g.get_node(edge.source).attributes.get('department')
        target_dept = g.get_node(edge.target).attributes.get('department')
        if (source_dept == 'Engineering' and target_dept == 'Sales') or \
           (source_dept == 'Sales' and target_dept == 'Engineering'):
            cross_dept_edges.append(edge_id)
    
    print(f"  Cross-department collaboration edges: {len(cross_dept_edges)}")
    
except (AttributeError, TypeError) as e:
    print(f"Subgraph analysis not yet implemented: {e}")

print("\n=== Enhanced Filtering Feature Summary ===")
print("Proposed enhancements:")
print("1. ✓ return_graph parameter for filter_nodes/filter_edges")
print("2. ✓ String-based query language (SQL-like syntax)")
print("3. ✓ Subgraph state management and branching")
print("4. ✓ Complex query pipelines on subgraphs")
print("5. ✓ Cached filtered results as states")
print("6. ✓ Cross-subgraph analysis capabilities")

=== Enhanced Filtering and Subgraph Features ===

1. Current filtering behavior:
Manager IDs (traditional): 50 found

2. Enhanced filtering with subgraph creation:


TypeError: filter_nodes() got an unexpected keyword argument 'return_graph'

In [4]:
# Enhanced Filtering with Subgraph Creation and Query Language Testing

print("=== Enhanced Filtering and Subgraph Features ===")

# Test 1: Current filtering behavior (returns node/edge IDs)
print("\n1. Current filtering behavior:")
managers_ids = g.filter_nodes({'role': 'Manager'})
print(f"Manager IDs (traditional): {len(managers_ids)} found")

# Test 2: Enhanced filtering with return_graph parameter
print("\n2. Enhanced filtering with subgraph creation:")
# This should create a new Graph object containing only managers
managers_subgraph = g.filter_nodes({'role': 'Manager'}, return_graph=True)
print(f"Managers subgraph: {managers_subgraph.node_count()} nodes, {managers_subgraph.edge_count()} edges")

# The subgraph should be a fully functional Graph
for node_id in list(managers_subgraph.nodes)[:3]:  # Show first 3
    node = managers_subgraph.get_node(node_id)
    print(f"  Manager: {node.attributes.get('name')} - {node.attributes.get('department')}")

# Test 3: String-based query language
print("\n3. String-based query language:")
query_tests = [
    "role == 'Manager'",
    "salary > 80000", 
    "department == 'Engineering'",
    "is_remote == True",
    "performance_score >= 4.0",
    "role == 'Junior' AND salary > 70000",
    "department == 'Sales' OR department == 'Marketing'"
]

for query in query_tests:
    # Test both node ID return and subgraph return
    result_ids = g.filter_nodes(query)
    result_subgraph = g.filter_nodes(query, return_graph=True)
    print(f"Query '{query}': {len(result_ids)} nodes")
    
    if len(result_ids) > 0 and len(result_ids) <= 3:
        # Show details for small result sets
        for node_id in result_ids[:3]:  # Limit to first 3
            node = g.get_node(node_id)
            relevant_attrs = {k: v for k, v in node.attributes.items() 
                            if k in ['name', 'role', 'department', 'salary', 'is_remote', 'performance_score']}
            print(f"  {relevant_attrs}")

# Test 4: Edge filtering with subgraphs
print("\n4. Edge filtering with subgraph creation:")
# Filter edges and create subgraph
collaboration_edges = g.filter_edges({'relationship': 'collaborates_with'}, return_graph=True)
print(f"Collaboration subgraph: {collaboration_edges.node_count()} nodes, {collaboration_edges.edge_count()} edges")

# String-based edge queries
edge_queries = [
    "relationship == 'collaborates_with'",
    "relationship == 'manages'"
]

for query in edge_queries:
    edge_result = g.filter_edges(query, return_graph=True)
    print(f"Edge query '{query}': {edge_result.edge_count()} edges, {edge_result.node_count()} nodes")

# Test 5: Subgraph state management and branching
print("\n5. Subgraph state management:")
# Create multiple filtered subgraphs and save as branches
departments = ['Engineering', 'Sales', 'Marketing']

for dept in departments:
    # Create department subgraph
    dept_subgraph = g.filter_nodes({'department': dept}, return_graph=True)
    
    print(f"{dept} subgraph: {dept_subgraph.node_count()} nodes")
    
    if dept_subgraph.node_count() > 0:
        # Save as a state using the original graph's state management
        state_name = f"{dept.lower()}_team"
        g.save_state(state_name)
        print(f"  Saved state '{state_name}' for {dept} analysis")

# Test 6: Complex query combinations
print("\n6. Complex query combinations:")
# Multi-level filtering
# 1. Filter high performers
high_performers = g.filter_nodes("performance_score >= 4.0", return_graph=True)

# 2. Within high performers, find managers
if high_performers.node_count() > 0:
    high_performing_managers = high_performers.filter_nodes("role == 'Manager'", return_graph=True)
    
    # 3. Within those, find remote workers
    if high_performing_managers.node_count() > 0:
        remote_high_performing_managers = high_performing_managers.filter_nodes("is_remote == True", return_graph=True)
        
        print(f"Query pipeline results:")
        print(f"  High performers: {high_performers.node_count()}")
        print(f"  High performing managers: {high_performing_managers.node_count()}")
        print(f"  Remote high performing managers: {remote_high_performing_managers.node_count()}")
    else:
        print(f"Query pipeline results:")
        print(f"  High performers: {high_performers.node_count()}")
        print(f"  High performing managers: 0")
else:
    print("No high performers found for pipeline test")

# Test 7: Subgraph comparison and analysis
print("\n7. Subgraph analysis capabilities:")
# Compare different department subgraphs
eng_graph = g.filter_nodes("department == 'Engineering'", return_graph=True)
sales_graph = g.filter_nodes("department == 'Sales'", return_graph=True)

if eng_graph.node_count() > 0:
    eng_avg_salary = sum(eng_graph.get_node(nid).attributes.get('salary', 0) 
                        for nid in eng_graph.nodes) / eng_graph.node_count()
    print(f"Engineering: {eng_graph.node_count()} people, avg salary: ${eng_avg_salary:,.0f}")

if sales_graph.node_count() > 0:
    sales_avg_salary = sum(sales_graph.get_node(nid).attributes.get('salary', 0) 
                          for nid in sales_graph.nodes) / sales_graph.node_count()
    print(f"Sales: {sales_graph.node_count()} people, avg salary: ${sales_avg_salary:,.0f}")

# Cross-department collaboration analysis
cross_dept_edges = []
for edge_id in g.edges:
    edge = g.edges[edge_id]
    source_dept = g.get_node(edge.source).attributes.get('department')
    target_dept = g.get_node(edge.target).attributes.get('department')
    if source_dept and target_dept and source_dept != target_dept:
        cross_dept_edges.append(edge_id)

print(f"Cross-department collaboration edges: {len(cross_dept_edges)}")

print("\n=== Enhanced Filtering Feature Summary ===")
print("✅ String-based query language (SQL-like syntax)")
print("✅ return_graph parameter for filter_nodes/filter_edges")  
print("✅ Subgraph creation from filtered results")
print("✅ Complex query pipelines on subgraphs")
print("✅ State management integration")
print("✅ Cross-subgraph analysis capabilities")
print("✅ Backward compatibility with existing filter methods")

=== Enhanced Filtering and Subgraph Features ===

1. Current filtering behavior:
Manager IDs (traditional): 50 found

2. Enhanced filtering with subgraph creation:
Managers subgraph: 50 nodes, 0 edges
  Manager: Employee_007 - Sales
  Manager: Employee_033 - Engineering
  Manager: Employee_032 - Marketing

3. String-based query language:
Query 'role == 'Manager'': 50 nodes
Query 'salary > 80000': 660 nodes
Query 'department == 'Engineering'': 10000 nodes
Query 'is_remote == True': 7532 nodes
Query 'performance_score >= 4.0': 1529 nodes
Query 'role == 'Junior' AND salary > 70000': 10406 nodes
Query 'department == 'Sales' OR department == 'Marketing'': 20000 nodes

4. Edge filtering with subgraph creation:
Collaboration subgraph: 0 nodes, 0 edges
Edge query 'relationship == 'collaborates_with'': 0 edges, 0 nodes
Edge query 'relationship == 'manages'': 1500 edges, 2000 nodes

5. Subgraph state management:
Engineering subgraph: 10000 nodes
  Saved state 'engineering_team' for Engineering a

In [6]:
g.filter_nodes("performance_score >= 4.0", return_graph=True).save_state("high_performers")


'98479532a05d4015'

In [7]:
g.states

{'total_states': 6,
 'pooled_nodes': 31584,
 'pooled_edges': 1500,
 'node_refs_tracked': 31584,
 'edge_refs_tracked': 1500,
 'current_hash': 'e0bbe7eeedff4ffb',
 'state_hashes': ['e0bbe7eeedff4ffb',
  '8d2017024cc3dd15',
  '72a3c906f23581a1',
  '98cc4299e2d85d8b',
  'c436dff18cec4bf1'],
 'auto_states': ['8d2017024cc3dd15',
  '98cc4299e2d85d8b',
  '72a3c906f23581a1',
  'c436dff18cec4bf1',
  'e0bbe7eeedff4ffb'],
 'branches_count': 1}