# DreamStreets: AI-Powered Street Network Analysis

### OpenAI Open Model Hackathon Submission

Transforming natural language queries into sophisticated network analysis using GPT-OSS-120b.

---

## 1. System Setup

In [1]:
# Standard library
import json
import math
import os
import time
from collections import deque
from pathlib import Path
from typing import Dict, Any, List

# Data manipulation and analysis
import pandas as pd
import numpy as np
import geopandas as gpd

# Database
import duckdb

# Network analysis
import networkx as nx
import osmnx as ox

# Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import pydeck as pdk

# LangChain and LLM
from langchain_ollama import ChatOllama
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.prebuilt import create_react_agent

# Rich console output
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from rich.markdown import Markdown
from rich.text import Text
from rich import box

# Jupyter display
from IPython.display import display, Markdown as IPMarkdown, HTML, clear_output

# Configure OSMnx
ox.settings.use_cache = True
ox.settings.log_console = True

# Initialize console for Rich output
console = Console()

  backends = _get_backends("networkx.backends")
  backend_info.update(_get_backends("networkx.backend_info", load_and_call=True))


In [2]:
# Global state management
state = {
    'graph': None,
    'db': None,
    'schema': {},
    'tool_history': deque(maxlen=5),
    'attempted_queries': set(),
    'last_errors': {}
}

In [3]:
def get_recent_results(n: int = 2) -> str:
    """Get recent tool results for context."""
    if not state['tool_history']:
        return "No previous analysis available."
    
    recent = list(state['tool_history'])[-n:]
    context = []
    for entry in recent:
        context.append(f"{entry['tool']}: {entry['summary']}")
    return "\n".join(context)

def diagnose_error(error: str, code: str) -> str:
    """Provide hints for common errors."""
    if "is not in the graph" in error:
        return "Node IDs in the graph are STRINGS. Use str(node_id) or '5340680144' not 5340680144."
    elif "name 'G' is not defined" in error:
        return "G should be accessed directly without checks"
    elif "got an unexpected keyword argument 'keys'" in error:
        return "MultiDiGraph.edges() doesn't support keys=True. Use G.edges(data=True) instead."
    elif "is not defined" in error:
        return "Variable not persisting in exec scope. Ensure all code is in ONE continuous block."
    elif "unsupported operand type" in error:
        return "Type mismatch - ensure numeric attributes are floats"
    elif "KeyError" in error:
        return "Attribute not found - check available node/edge attributes"
    elif "Referenced column" in error and "not found" in error:
        return "Column doesn't exist in table. Check actual table schema first"
    elif "ST_SetSRID" in error.lower():
        return "DuckDB doesn't have ST_SetSRID. Use ST_Point directly"
    elif "list indices must be integers" in error:
        return "Indexing error - check data structure types"
    return "Check code syntax and variable usage"

## 2. Environment Initialization

In [4]:
def initialize_environment(graphml_path: str = 'chinatown.graphml', db_path: str = 'chinatown.duckdb'):
    """Initialize graph and database for analysis."""
    print(f"\n🚀 Initializing DreamStreets Analysis System...")
    
    try:
        state['graph'] = nx.read_graphml(graphml_path)
        
        # CRITICAL FIX: Convert ALL numeric edge attributes from string to float
        for u, v, data in state['graph'].edges(data=True):
            for key, value in data.items():
                if isinstance(value, str):
                    try:
                        data[key] = float(value)
                    except (ValueError, TypeError):
                        pass
        
        # Also convert node attributes
        for node, data in state['graph'].nodes(data=True):
            for key, value in data.items():
                if isinstance(value, str) and key in ['x', 'y', 'street_count']:
                    try:
                        data[key] = float(value)
                    except (ValueError, TypeError):
                        pass
        
        state['db'] = duckdb.connect(db_path, read_only=False)
        state['db'].execute("INSTALL spatial; LOAD spatial;")
        
        # Get exact schema
        state['schema'] = {
            'nodes': state['graph'].number_of_nodes(),
            'edges': state['graph'].number_of_edges(),
            'tables': {}
        }
        
        # Get table schemas
        for table in ['nodes', 'edges', 'pois']:
            try:
                cols = state['db'].execute(f"PRAGMA table_info({table})").fetchdf()
                state['schema']['tables'][table] = cols['name'].tolist()
            except:
                pass
        
        print(f"📊 Network: {state['schema']['nodes']} nodes, {state['schema']['edges']} edges")
        print(f"📁 Database tables: {list(state['schema']['tables'].keys())}")
        print(f"✅ All numeric attributes converted from strings")
        
    except Exception as e:
        print(f"❌ Initialization error: {e}")
        raise
    
    return True

# Initialize with default files
initialize_environment()


🚀 Initializing DreamStreets Analysis System...
📊 Network: 139 nodes, 274 edges
📁 Database tables: ['nodes', 'edges', 'pois']
✅ All numeric attributes converted from strings


True

## 3. AI-Powered Analysis Tools

In [5]:
@tool
def network_analyst(task: str) -> str:
    """
    Analyzes street network topology using NetworkX algorithms.
    
    USE THIS TOOL WHEN:
    - Computing network metrics (centrality, connectivity, clustering)
    - Finding shortest paths between intersections
    - Analyzing network structure and topology
    - Calculating accessibility metrics
    - Identifying critical nodes or edges
    
    DO NOT USE WHEN:
    - Looking up specific places or POIs
    - Querying facility information
    - Needing exact addresses or names
    """
    print(f"\n📊 Network Analyst processing: '{task}'")
    
    llm = ChatOllama(model="gpt-oss:120b", temperature=0.1)
    
    # Get recent context
    recent_context = get_recent_results()
    
    for attempt in range(3):
        if attempt > 0:
            print(f"   🔄 Retry {attempt}/2 with enhanced guidance")
        
        # Build prompt with progressive enhancement
        prompt = f"""
You are an expert Python programmer specializing in NetworkX library for graph analysis.

EXACT GRAPH SCHEMA:
- Graph object named 'G' is a MultiDiGraph with {state['schema']['nodes']} nodes and {state['schema']['edges']} edges
- G EXISTS in the global namespace - DO NOT check for it, just use it directly
- ALL nodes represent street intersections (not facilities or POIs)
- Node IDs are STRINGS like '5340680144' NOT integers
- Node attributes: 'y' (lat), 'x' (lon), 'street_count' (float)
- Edge attributes: 'length' (meters, float), 'name' (string), 'highway' (string)

RECENT CONTEXT:
{recent_context}

TASK: {task}

CRITICAL RULES:
1. Write ALL code as ONE CONTINUOUS BLOCK - no blank lines, no separate sections
2. NEVER split variable definitions from their usage
3. Node IDs are ALWAYS strings: use '5340680144' not 5340680144
4. Set FINAL_RESULT at the END of your code block
5. Keep results concise - top 5-10 items, not all {state['schema']['nodes']} nodes

TEMPLATE TO FOLLOW:
# Everything in one continuous block
metric = nx.some_algorithm(G, weight='length')
sorted_items = sorted(metric.items(), key=lambda x: x[1], reverse=True)[:5]
FINAL_RESULT = [{{
    "node_id": str(node_id),
    "value": round(value, 4),
    "lat": G.nodes[node_id].get('y', 0),
    "lon": G.nodes[node_id].get('x', 0)
}} for node_id, value in sorted_items]

Provide ONLY executable Python code. No explanations, no markdown, no blank lines."""

        if attempt == 1:
            prompt += """

DEBUGGING HINTS:
- If you see "name 'X' is not defined", you split the code incorrectly
- Write EVERYTHING in one block like this (NO BLANK LINES):
source = '5340680144'
dists = nx.single_source_dijkstra_path_length(G, source, weight='length')
sorted_dists = sorted(dists.items(), key=lambda x: x[1])[:5]
FINAL_RESULT = [{"node": n, "dist": d} for n, d in sorted_dists]
"""

        if attempt == 2:
            prompt += """

USE THIS EXACT PATTERN (copy and modify):
# NO BLANK LINES, ALL ONE BLOCK
centrality = nx.degree_centrality(G)
top = sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]
FINAL_RESULT = [{"node": str(n), "score": round(s, 4), "lat": G.nodes[n]['y'], "lon": G.nodes[n]['x']} for n, s in top]
"""
        
        try:
            response = llm.invoke(prompt)
            code = response.content.strip().replace('```python', '').replace('```', '')
            
            # FIX: Remove ALL blank lines to ensure single block execution
            lines = [line for line in code.split('\n') if line.strip()]
            code = '\n'.join(lines)
            
            # Remove import statements
            code = '\n'.join([line for line in code.split('\n') 
                            if 'import networkx' not in line and 'from networkx' not in line])
            
            print(f"   📝 Generated code length: {len(code)} chars")
            print(f"   🔧 Code preview: {code[:200]}...")
            
            # FIX: Create a wrapper to ensure all variables stay in scope
            wrapped_code = f"""
# All variables defined here
{code}
# Ensure FINAL_RESULT exists
if 'FINAL_RESULT' not in locals():
    FINAL_RESULT = None
"""
            
            # FIX: Execute with both globals and locals merged
            exec_namespace = {
                'nx': nx,
                'G': state['graph'],
                'json': json,
                'math': math,
                'list': list,
                'dict': dict,
                'str': str,
                'float': float,
                'int': int,
                'round': round,
                'sorted': sorted,
                'len': len,
                'min': min,
                'max': max,
                'sum': sum,
                'enumerate': enumerate,
                'FINAL_RESULT': None,
                '__builtins__': __builtins__
            }
            
            # Execute in single namespace
            exec(wrapped_code, exec_namespace, exec_namespace)
            
            result = exec_namespace.get('FINAL_RESULT')
            
            if result is not None:
                print(f"   ✅ FINAL_RESULT type: {type(result)}")
                print(f"   ✅ FINAL_RESULT preview: {str(result)[:200]}")
                
                # Store in history
                state['tool_history'].append({
                    'tool': 'network_analyst',
                    'summary': f"Analyzed {task[:50]}",
                    'result': result
                })
                return f"Analysis complete: {json.dumps(result, default=str)}"
            else:
                raise ValueError("FINAL_RESULT was not set")
                
        except Exception as e:
            error_msg = str(e)
            print(f"   ❌ Execution error: {error_msg}")
            
            if attempt < 2:
                print(f"   🔍 Diagnosis: {diagnose_error(error_msg, code)}")
                continue
            else:
                return f"Network analysis failed: {error_msg}. Try simplifying the query."
    
    return "Network analysis could not be completed"

In [6]:
@tool
def database_analyst(task: str) -> str:
    """
    Queries POIs and performs spatial database operations.
    
    USE THIS TOOL WHEN:
    - Finding specific places (shops, hospitals, restaurants, etc.)
    - Calculating distances to/from POIs
    - Counting facilities by type
    - Spatial queries (within distance, nearest neighbor)
    - Filtering POIs by attributes
    
    DO NOT USE WHEN:
    - Computing graph algorithms
    - Analyzing network topology
    - Working only with intersection data
    """
    print(f"\n🔍 Database Analyst processing: '{task}'")
    
    llm = ChatOllama(model="gpt-oss:120b", temperature=0.1)
    
    recent_context = get_recent_results()
    
    for attempt in range(2):
        if attempt > 0:
            print(f"   🔄 Retry with simpler query approach")
        
        prompt = f"""
You are an expert in DuckDB SQL with spatial extensions.

EXACT DATABASE SCHEMA:

Table 'nodes' (street intersections ONLY - NO facilities here):
- node_id: VARCHAR (e.g., '5340680144')
- lat: DOUBLE
- lon: DOUBLE  
- street_count: INTEGER
- geom: GEOMETRY

Table 'pois' (ALL facilities and amenities are HERE):
- lat: DOUBLE
- lon: DOUBLE
- geom: GEOMETRY
- amenity: VARCHAR (values include: 'hospital', 'clinic', 'restaurant', 'school', etc.)
- building: VARCHAR
- name: VARCHAR
NOTE: No 'shop', 'cuisine', 'neighborhood' columns exist

MEDICAL FACILITIES are in POIs table where:
- amenity = 'hospital' OR amenity = 'clinic' OR amenity = 'health_center'

RECENT CONTEXT:
{recent_context}

TASK: {task}

Write a SINGLE, SIMPLE SQL query.
For medical facilities: SELECT * FROM pois WHERE amenity IN ('hospital', 'clinic', 'health_center')
For nearest to point: ORDER BY ST_Distance(geom, ST_Point(lon, lat)) LIMIT 1

Provide ONLY the SQL query. No explanations."""
        
        try:
            response = llm.invoke(prompt)
            sql = response.content.strip().replace('```sql', '').replace('```', '')
            
            print(f"   📝 Generated SQL length: {len(sql)} chars")
            preview = sql[:200] + "..." if len(sql) > 200 else sql
            print(f"   🔧 SQL preview: {preview}")
            
            result_df = state['db'].execute(sql).fetchdf()
            
            print(f"   ✅ Query returned {len(result_df)} rows")
            if not result_df.empty:
                print(f"   ✅ Columns: {list(result_df.columns)[:5]}")
            
            state['tool_history'].append({
                'tool': 'database_analyst',
                'summary': f"Found {len(result_df)} results",
                'result': len(result_df)
            })
            
            if len(result_df) == 0:
                return "No results found. The requested amenity type may not exist in this dataset."
            elif len(result_df) > 20:
                return f"Found {len(result_df)} results. First 10:\n{result_df.head(10).to_string()}"
            else:
                return f"Results ({len(result_df)} rows):\n{result_df.to_string()}"
                
        except Exception as e:
            error_msg = str(e)
            print(f"   ❌ Query error: {error_msg}")
            
            if attempt == 0:
                continue
            else:
                return f"Database query failed: {error_msg}"
    
    return "Database query could not be completed"

In [7]:
def analyze(query: str):
    """Process any urban analysis query."""
    print(f"\n{'='*70}\n🌐 Street Network Analysis\n{'='*70}")
    print(f"📋 Query: {query}")
    print('='*70)
    
    tools = [network_analyst, database_analyst]
    llm = ChatOllama(model="gpt-oss:120b", temperature=0.1)
    
    # Clear history for new query
    state['tool_history'].clear()
    
    # Build context
    enhanced_query = f"""
SYSTEM STATE:
- Graph 'G' is loaded with {state['schema']['nodes']} nodes and {state['schema']['edges']} edges
- Database has tables: {list(state['schema']['tables'].keys())}
- All numeric attributes (length, x, y, street_count) are floats
- Node IDs are STRINGS (e.g., '5340680144')

AVAILABLE TOOLS:
1. network_analyst: For graph algorithms, centrality, paths, network metrics
   - Works with the street network graph G
   - Returns JSON with computed metrics
   
2. database_analyst: For finding places, counting facilities, spatial queries
   - Queries the POIs table for amenities and buildings
   - Returns query results as tables

USER QUERY: {query}

Analyze the query and provide actionable insights with specific numbers.
"""
    
    print("\n🤔 Analyzing...\n")
    
    # Create and run agent
    agent = create_react_agent(llm, tools)
    
    start_time = time.time()
    try:
        result = agent.invoke(
            {"messages": [HumanMessage(content=enhanced_query)]},
            config={"recursion_limit": 25}
        )
        final_answer = result["messages"][-1].content
    except Exception as e:
        if "recursion limit" in str(e).lower():
            found = []
            for entry in state['tool_history']:
                found.append(f"- {entry['tool']}: {entry['summary']}")
            
            final_answer = f"""⚠️ Analysis incomplete after maximum attempts.

Partial results found:
{chr(10).join(found) if found else 'No successful tool calls completed.'}

Try a simpler or more specific query."""
        else:
            final_answer = f"❌ Analysis error: {str(e)}"
    
    elapsed = time.time() - start_time
    
    print("\n" + "="*70)
    print("🎯 ANALYSIS RESULT")
    print("="*70)
    print(final_answer)
    print(f"\n⏱️  Time: {elapsed:.1f} seconds")
    
    if state['tool_history']:
        print("\n📊 Tools used:")
        for entry in state['tool_history']:
            print(f"   - {entry['tool']}: {entry['summary']}")
    
    return final_answer

---
## Part 1: Urban Planning in Chinatown, NYC
Dense urban network analysis for business location optimization.

### Query 1: Optimal Coffee Shop Location

In [8]:
console.print("[bold cyan]DreamStreets[/bold cyan] - Powered by [yellow]GPT-OSS-120b[/yellow]")
your_query = "I want to open a coffee shop. Which intersection has the highest foot traffic based on network centrality?"

# Your query execution with pretty output
result = analyze(your_query)


🌐 Street Network Analysis
📋 Query: I want to open a coffee shop. Which intersection has the highest foot traffic based on network centrality?

🤔 Analyzing...


📊 Network Analyst processing: 'compute betweenness centrality for all nodes'
   📝 Generated code length: 308 chars
   🔧 Code preview: metric = nx.betweenness_centrality(G, weight='length')
sorted_items = sorted(metric.items(), key=lambda x: x[1], reverse=True)[:5]
FINAL_RESULT = [{"node_id": str(node_id), "value": round(value, 4), "...
   ✅ FINAL_RESULT type: <class 'list'>
   ✅ FINAL_RESULT preview: [{'node_id': '42427316', 'value': 0.2034, 'lat': 40.7180188, 'lon': -73.9999527}, {'node_id': '5161246307', 'value': 0.1943, 'lat': 40.7161492, 'lon': -73.9961338}, {'node_id': '42435451', 'value': 0.

📊 Network Analyst processing: 'compute degree centrality for all nodes'
   📝 Generated code length: 286 chars
   🔧 Code preview: metric = nx.degree_centrality(G)
sorted_items = sorted(metric.items(), key=lambda x: x[1], reverse=True)

### Query 2: Critical Transit Bottleneck

In [9]:
result = analyze(
    "Which intersection is the most critical bottleneck - where its closure would "
    "disconnect the most nodes from the network?"
)


🌐 Street Network Analysis
📋 Query: Which intersection is the most critical bottleneck - where its closure would disconnect the most nodes from the network?

🤔 Analyzing...


📊 Network Analyst processing: 'Identify node whose removal disconnects the most nodes, and report the number of nodes disconnected.'
   📝 Generated code length: 474 chars
   🔧 Code preview: total=len(G)
results={}
for n in G.nodes:
    H=G.copy()
    H.remove_node(n)
    comps=list(nx.weakly_connected_components(H))
    largest=max((len(c) for c in comps),default=0)
    disconnected=(tot...
   ✅ FINAL_RESULT type: <class 'list'>
   ✅ FINAL_RESULT preview: [{'node_id': '42440798', 'value': 1, 'lat': 40.7156185, 'lon': -73.99425}, {'node_id': '42440825', 'value': 1, 'lat': 40.7205482, 'lon': -74.0034713}, {'node_id': '588455736', 'value': 1, 'lat': 40.71

🎯 ANALYSIS RESULT
**Result – no “network‑wide” bottleneck**

I ran a full articulation‑point analysis on the 139‑node, 274‑edge street graph.  
For every node I me

---
## Part 2: Humanitarian Response in Cox's Bazar

Emergency planning for the world's largest refugee camp.

In [10]:
# Switch to Cox's Bazar data
state['tool_history'].clear()
initialize_environment('coxs_bazar.graphml', 'coxs_bazar.duckdb')


🚀 Initializing DreamStreets Analysis System...
📊 Network: 153 nodes, 378 edges
📁 Database tables: ['nodes', 'edges', 'pois']
✅ All numeric attributes converted from strings


True

### Context: World's Largest Refugee Camp
- **Population**: ~1 million Rohingya refugees
- **Challenge**: Monsoon flooding isolates communities
- **Need**: Strategic placement of emergency resources

### Query 3: Emergency Evacuation Center Placement

In [11]:
result = analyze(
    "If we need to build an emergency evacuation center accessible to the maximum population, "
    "which intersection should we choose based on closeness centrality?"
)


🌐 Street Network Analysis
📋 Query: If we need to build an emergency evacuation center accessible to the maximum population, which intersection should we choose based on closeness centrality?

🤔 Analyzing...


📊 Network Analyst processing: 'compute closeness centrality for all nodes and return top 5 nodes with highest centrality'
   📝 Generated code length: 308 chars
   🔧 Code preview: metric = nx.closeness_centrality(G, distance='length')
sorted_items = sorted(metric.items(), key=lambda x: x[1], reverse=True)[:5]
FINAL_RESULT = [{"node_id": str(node_id), "value": round(value, 4), "...
   ✅ FINAL_RESULT type: <class 'list'>
   ✅ FINAL_RESULT preview: [{'node_id': '5340680144', 'value': 0.002, 'lat': 21.2143361, 'lon': 92.1666239}, {'node_id': '1741257277', 'value': 0.002, 'lat': 21.2141413, 'lon': 92.1670948}, {'node_id': '5239695068', 'value': 0.

🎯 ANALYSIS RESULT
**Recommendation – Best Intersection for an Emergency Evacuation Center**

| Rank | Intersection (node ID) | Closeness Ce

### Query 4: Flood Response Priority Intersections

In [12]:
result = analyze(
    "During flooding, which intersections should be prioritized for emergency supply distribution "
    "to reach isolated communities? Find articulation points that connect separated areas."
)


🌐 Street Network Analysis
📋 Query: During flooding, which intersections should be prioritized for emergency supply distribution to reach isolated communities? Find articulation points that connect separated areas.

🤔 Analyzing...


📊 Network Analyst processing: 'find articulation points'
   📝 Generated code length: 320 chars
   🔧 Code preview: ap=set(nx.articulation_points(G.to_undirected()))
metric={n:G.degree(n) for n in ap}
sorted_items=sorted(metric.items(),key=lambda x:x[1],reverse=True)[:5]
FINAL_RESULT=[{"node_id":str(node_id),"value...
   ✅ FINAL_RESULT type: <class 'list'>
   ✅ FINAL_RESULT preview: [{'node_id': '5239666860', 'value': 8, 'lat': 21.2129011, 'lon': 92.1678763}, {'node_id': '5340680017', 'value': 8, 'lat': 21.2135787, 'lon': 92.165865}, {'node_id': '5191404438', 'value': 8, 'lat': 2

📊 Network Analyst processing: 'connected components after removing each articulation point'
   📝 Generated code length: 466 chars
   🔧 Code preview: art_points = list(nx.articulatio

### Visualizing Critical Infrastructure

In [None]:
# Highlight critical nodes from analysis
# Get betweenness centrality to identify critical nodes
centrality = nx.betweenness_centrality(state['graph'], weight='length')
top_nodes = sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5]

# Visualize with critical nodes highlighted
plt.figure(figsize=(12, 10))
pos = {node: (data['x'], data['y']) for node, data in state['graph'].nodes(data=True)}

# Draw all edges and nodes
nx.draw_networkx_edges(state['graph'], pos, edge_color='gray', alpha=0.3, width=0.5)
nx.draw_networkx_nodes(state['graph'], pos, node_size=5, node_color='lightgray', alpha=0.5)

# Highlight critical nodes
critical_nodes = [node for node, _ in top_nodes]
nx.draw_networkx_nodes(state['graph'], pos, nodelist=critical_nodes, 
                      node_size=100, node_color='red', alpha=0.9)

plt.title("Critical Intersections for Emergency Response (Red = High Priority)")
plt.axis('off')
plt.show()

print("Top 5 Critical Intersections:")
for node, score in top_nodes:
    lat = state['graph'].nodes[node]['y']
    lon = state['graph'].nodes[node]['x']
    print(f"  Node {node}: Score {score:.4f} at ({lat:.6f}, {lon:.6f})")

---
## Impact & Innovation

### Why GPT-OSS-120b?
- **Complex Reasoning**: Understands abstract concepts like "accessibility" and "isolation"
- **Algorithm Selection**: Chooses appropriate graph algorithms based on context
- **Self-Correction**: Debugs generated code automatically
- **Offline Capable**: Runs locally without internet - critical for field deployment

### Real-World Applications
- **Urban Planning**: Optimize business locations, transit systems
- **Humanitarian Aid**: Emergency response, resource distribution
- **Infrastructure**: Identify critical vulnerabilities
- **Accessibility**: Ensure services reach all communities

### From Code to Impact
Traditional approach requires writing complex NetworkX algorithms manually.  
DreamStreets enables planners to ask questions in natural language.

**One model. Infinite applications. From coffee shops to saving lives.**

---

GitHub: [DreamStreets Repository](https://github.com/yourusername/dreamstreets)  
Contact: your.email@example.com