# Network Analysis for Active Transportation

This tutorial demonstrates how to analyze transportation network data for active transportation research using the CISD package.

We'll cover:
1. Generating synthetic transportation network data
2. Analyzing infrastructure interventions on networks
3. Visualizing network data
4. Performing causal inference on networked data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import CISD package components
from cisd.spatial_neighborhood_generator import generate_synthetic_infrastructure_network

# For better visualization
plt.style.use('seaborn-whitegrid')
%matplotlib inline

# Try to import networkx
try:
    import networkx as nx
    HAS_NETWORKX = True
except ImportError:
    HAS_NETWORKX = False
    print("For network analysis capabilities, install networkx: pip install networkx")

## 1. Generating Synthetic Transportation Network Data

First, we'll generate a synthetic transportation network with nodes (intersections) and edges (road segments).

In [None]:
if not HAS_NETWORKX:
    print("Skipping network generation (requires networkx)")
else:
    # Generate a synthetic transportation network
    network_data = generate_synthetic_infrastructure_network(
        n_nodes=40,           # Number of intersections
        edge_density=0.1,     # Connectivity of the network
        treatment_share=0.3,  # Proportion of edges receiving treatment
        seed=42
    )
    
    # Access the components
    nodes_df = network_data['nodes']
    edges_df = network_data['edges']
    treatment_edges = network_data['treatment_edges']
    
    print(f"Generated network with {len(nodes_df)} nodes and {len(edges_df)} edges")
    print(f"Number of treated edges: {len(treatment_edges)}")
    
    # Display sample of nodes
    print("\nSample nodes:")
    display(nodes_df.head(3))
    
    # Display sample of edges
    print("\nSample edges:")
    display(edges_df.head(3))

### Exploring the network dataset

In [None]:
if HAS_NETWORKX:
    # Summary statistics for nodes
    print("Node attributes summary:")
    print(nodes_df.describe())
    
    # Summary statistics for edges
    print("\nEdge attributes summary:")
    print(edges_df.describe())
    
    # Distribution of road types
    plt.figure(figsize=(10, 6))
    sns.countplot(x='road_type', data=edges_df)
    plt.title('Distribution of Road Types')
    plt.ylabel('Count')
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Distribution of intersection types
    plt.figure(figsize=(10, 6))
    sns.countplot(x='intersection_type', data=nodes_df)
    plt.title('Distribution of Intersection Types')
    plt.ylabel('Count')
    plt.grid(True, alpha=0.3)
    plt.show()

## 2. Visualizing the Transportation Network

Let's create a visualization of our network, highlighting treated edges.

In [None]:
if HAS_NETWORKX:
    # Create a networkx graph from our data
    G = nx.Graph()
    
    # Add nodes with positions
    for _, row in nodes_df.iterrows():
        G.add_node(row['node_id'], 
                  pos=(row['x'], row['y']),
                  intersection_type=row['intersection_type'],
                  population_density=row['population_density'])
    
    # Add edges with attributes
    for _, row in edges_df.iterrows():
        G.add_edge(row['source'], row['target'],
                  length=row['length'],
                  road_type=row['road_type'],
                  treatment=row['treatment'],
                  bike_lane=row['bike_lane'],
                  sidewalk=row['sidewalk'],
                  traffic_volume=row['traffic_volume'])
    
    # Get node positions for drawing
    pos = nx.get_node_attributes(G, 'pos')
    
    # Create figure
    plt.figure(figsize=(12, 10))
    
    # Draw untreated edges (thin grey)
    untreated_edges = [(u, v) for u, v, d in G.edges(data=True) if d['treatment'] == 0]
    nx.draw_networkx_edges(G, pos, edgelist=untreated_edges, width=1, alpha=0.5, edge_color='grey')
    
    # Draw treated edges (thick green)
    treated_edges = [(u, v) for u, v, d in G.edges(data=True) if d['treatment'] == 1]
    nx.draw_networkx_edges(G, pos, edgelist=treated_edges, width=3, edge_color='green')
    
    # Draw nodes with color based on intersection type
    node_colors = []
    for node in G.nodes():
        if G.nodes[node]['intersection_type'] == 'signalized':
            node_colors.append('red')
        elif G.nodes[node]['intersection_type'] == 'unsignalized':
            node_colors.append('blue')
        else:  # roundabout
            node_colors.append('orange')
            
    nx.draw_networkx_nodes(G, pos, node_size=80, node_color=node_colors, alpha=0.7)
    
    # Add labels for a subset of nodes to avoid clutter
    labels = {node: str(node) for node in list(G.nodes())[:10]}
    nx.draw_networkx_labels(G, pos, labels=labels, font_size=10)
    
    plt.title('Transportation Network with Treated Edges (Green)')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    
    # Create a legend for the node colors
    plt.figure(figsize=(8, 2))
    plt.scatter([0,1,2], [1,1,1], color=['red', 'blue', 'orange'], s=100)
    plt.text(0, 0.8, 'Signalized', ha='center')
    plt.text(1, 0.8, 'Unsignalized', ha='center')
    plt.text(2, 0.8, 'Roundabout', ha='center')
    plt.title('Intersection Types')
    plt.axis('off')
    plt.show()

## 3. Analyzing Infrastructure Treatment Effects

Now let's analyze how the infrastructure treatments affect traffic volume and safety.

In [None]:
if HAS_NETWORKX:
    # Compare traffic volume between treated and untreated edges
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='treatment', y='traffic_volume', data=edges_df)
    plt.title('Traffic Volume by Treatment Status')
    plt.xlabel('Treatment (0=No, 1=Yes)')
    plt.ylabel('Traffic Volume')
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Calculate and display statistics
    treated_volume = edges_df[edges_df['treatment'] == 1]['traffic_volume'].mean()
    untreated_volume = edges_df[edges_df['treatment'] == 0]['traffic_volume'].mean()
    volume_reduction = (untreated_volume - treated_volume) / untreated_volume * 100
    
    print(f"Mean traffic volume on untreated roads: {untreated_volume:.1f}")
    print(f"Mean traffic volume on treated roads: {treated_volume:.1f}")
    print(f"Traffic volume reduction: {volume_reduction:.1f}%")
    
    # Analyze relationship between bike lanes and traffic volume
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='bike_lane', y='traffic_volume', data=edges_df)
    plt.title('Traffic Volume by Bike Lane Presence')
    plt.xlabel('Bike Lane (0=No, 1=Yes)')
    plt.ylabel('Traffic Volume')
    plt.grid(True, alpha=0.3)
    plt.show()

### Network metrics and accessibility analysis

In [None]:
if HAS_NETWORKX:
    # Calculate network metrics
    print("Basic network metrics:")
    print(f"Number of nodes: {G.number_of_nodes()}")
    print(f"Number of edges: {G.number_of_edges()}")
    print(f"Network density: {nx.density(G):.4f}")
    
    # Calculate average node degree
    degrees = [d for n, d in G.degree()]
    print(f"Average node degree: {np.mean(degrees):.2f}")
    
    # Calculate average shortest path length
    try:
        avg_path = nx.average_shortest_path_length(G, weight='length')
        print(f"Average shortest path length: {avg_path:.2f} meters")
    except nx.NetworkXError:
        print("Network not connected, can't calculate average path length")
    
    # Betweenness centrality (which nodes/intersections are most important)
    print("\nCalculating betweenness centrality...")
    bc = nx.betweenness_centrality(G, weight='length')
    
    # Add betweenness centrality to nodes DataFrame
    nodes_df['betweenness'] = nodes_df['node_id'].map(bc)
    
    # Find top 5 intersections by betweenness
    top_intersections = nodes_df.sort_values('betweenness', ascending=False).head(5)
    print("Top 5 intersections by betweenness centrality:")
    display(top_intersections[['node_id', 'intersection_type', 'betweenness']])

### Visualize betweenness centrality

In [None]:
if HAS_NETWORKX:
    # Create a visualization of betweenness centrality
    plt.figure(figsize=(12, 10))
    
    # Draw edges
    nx.draw_networkx_edges(G, pos, width=1, alpha=0.5, edge_color='grey')
    
    # Draw nodes with size proportional to betweenness centrality
    node_sizes = [bc[node] * 10000 + 20 for node in G.nodes()]
    node_colors = [G.nodes[node]['population_density'] for node in G.nodes()]
    
    nodes = nx.draw_networkx_nodes(G, pos, 
                             node_size=node_sizes, 
                             node_color=node_colors, 
                             cmap='viridis',
                             alpha=0.7)
    
    # Add a colorbar for population density
    plt.colorbar(nodes, label='Population Density')
    
    # Add labels for top 5 nodes
    top_5_nodes = list(top_intersections['node_id'])
    labels = {node: str(node) for node in top_5_nodes}
    nx.draw_networkx_labels(G, pos, labels=labels, font_size=12, font_weight='bold')
    
    plt.title('Intersection Betweenness Centrality (Node Size) and Population Density (Color)')
    plt.axis('off')
    plt.tight_layout()
    plt.show()

## 4. Network-Based Causal Inference

Now we'll analyze how network position affects treatment outcomes.

In [None]:
if HAS_NETWORKX:
    # Create a copy of the edges DataFrame for analysis
    analysis_df = edges_df.copy()
    
    # For each edge, calculate the number of adjacent treated edges
    analysis_df['adjacent_treated'] = 0
    
    # Create a dictionary to store edge adjacency info
    edge_adjacency = {}
    
    # Populate edge adjacency dictionary
    for index, row in edges_df.iterrows():
        source = row['source']
        target = row['target']
        if source not in edge_adjacency:
            edge_adjacency[source] = []
        if target not in edge_adjacency:
            edge_adjacency[target] = []
            
        edge_adjacency[source].append(index)
        edge_adjacency[target].append(index)
    
    # Calculate adjacent treated edges
    for index, row in edges_df.iterrows():
        source = row['source']
        target = row['target']
        
        # Get all adjacent edges (edges connected to either source or target)
        adjacent_edge_indices = set(edge_adjacency[source] + edge_adjacency[target])
        adjacent_edge_indices.remove(index)  # Remove self
        
        # Count adjacent treated edges
        adjacent_treated = 0
        for adj_index in adjacent_edge_indices:
            if edges_df.iloc[adj_index]['treatment'] == 1:
                adjacent_treated += 1
                
        analysis_df.at[index, 'adjacent_treated'] = adjacent_treated
    
    # Display results
    print("Distribution of adjacent treated edges:")
    print(analysis_df['adjacent_treated'].value_counts().sort_index())

### Analyze spillover effects

In [None]:
if HAS_NETWORKX:
    # Group by number of adjacent treated edges
    spillover_analysis = analysis_df.groupby('adjacent_treated')['traffic_volume'].mean().reset_index()
    
    # Plot the relationship
    plt.figure(figsize=(10, 6))
    sns.barplot(x='adjacent_treated', y='traffic_volume', data=spillover_analysis)
    plt.title('Mean Traffic Volume by Number of Adjacent Treated Edges')
    plt.xlabel('Number of Adjacent Treated Edges')
    plt.ylabel('Mean Traffic Volume')
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Analysis by treatment status and adjacent treated
    plt.figure(figsize=(12, 6))
    g = sns.catplot(
        data=analysis_df, 
        x='adjacent_treated', 
        y='traffic_volume', 
        hue='treatment',
        kind='bar',
        palette=['blue', 'green'],
        ci=None,  # No confidence intervals
        height=5,
        aspect=1.5
    )
    g.set_axis_labels("Number of Adjacent Treated Edges", "Mean Traffic Volume")
    g.legend.set_title("Treated")
    plt.title('Spillover Effects: Traffic Volume by Treatment Status and Adjacent Treated Edges')
    plt.tight_layout()
    plt.show()

### Run regression to estimate spillover effects

In [None]:
if HAS_NETWORKX:
    try:
        import statsmodels.api as sm
        import statsmodels.formula.api as smf
        
        # Create formula for regression
        formula = "traffic_volume ~ treatment + adjacent_treated + road_type + speed_limit"
        
        # Fit the model
        model = smf.ols(formula=formula, data=analysis_df).fit()
        
        # Print results
        print(model.summary())
        
        # Add interaction term
        formula_interaction = "traffic_volume ~ treatment + adjacent_treated + treatment:adjacent_treated + road_type + speed_limit"
        model_interaction = smf.ols(formula=formula_interaction, data=analysis_df).fit()
        
        print("\nModel with interaction term:")
        print(model_interaction.summary())
        
    except ImportError:
        print("Regression analysis requires statsmodels: pip install statsmodels")

## 5. Bike-Friendly Route Planning

Let's create a function to find bike-friendly routes in our network.

In [None]:
if HAS_NETWORKX:
    # Create a copy of the graph for routing
    G_routing = G.copy()
    
    # Define a weight function that prioritizes bike lanes
    for u, v, data in G_routing.edges(data=True):
        # Base weight is the road length
        weight = data['length']
        
        # Heavily penalize high traffic volumes
        weight *= 1 + data['traffic_volume'] / 1000
        
        # Big discount for bike lanes
        if data['bike_lane'] == 1:
            weight *= 0.5
        
        # Some discount for sidewalks (relevant for walking)
        if data['sidewalk'] == 1:
            weight *= 0.8
            
        # Add modified weight
        G_routing[u][v]['bike_weight'] = weight
    
    # Choose random source and target nodes
    source = np.random.choice(G.nodes())
    target = np.random.choice([n for n in G.nodes() if n != source])
    
    # Find shortest path using standard distance weights
    standard_path = nx.shortest_path(G_routing, source=source, target=target, weight='length')
    
    # Find bike-friendly path using bike weights
    bike_friendly_path = nx.shortest_path(G_routing, source=source, target=target, weight='bike_weight')
    
    # Print results
    print(f"Finding routes from node {source} to node {target}")
    print(f"Standard path: {standard_path}")
    print(f"Bike-friendly path: {bike_friendly_path}")
    
    # Calculate and compare path metrics
    standard_length = sum(G_routing[standard_path[i]][standard_path[i+1]]['length']
                        for i in range(len(standard_path)-1))
    
    bike_length = sum(G_routing[bike_friendly_path[i]][bike_friendly_path[i+1]]['length']
                     for i in range(len(bike_friendly_path)-1))
    
    # Count bike lanes in each path
    standard_bike_lanes = sum(G_routing[standard_path[i]][standard_path[i+1]]['bike_lane']
                            for i in range(len(standard_path)-1))
    
    bike_friendly_bike_lanes = sum(G_routing[bike_friendly_path[i]][bike_friendly_path[i+1]]['bike_lane']
                                 for i in range(len(bike_friendly_path)-1))
    
    print(f"\nStandard path length: {standard_length:.0f} meters with {standard_bike_lanes} bike lanes")
    print(f"Bike-friendly path length: {bike_length:.0f} meters with {bike_friendly_bike_lanes} bike lanes")

### Visualize the two routes

In [None]:
if HAS_NETWORKX:
    # Create edges for both paths
    standard_path_edges = list(zip(standard_path[:-1], standard_path[1:]))
    bike_path_edges = list(zip(bike_friendly_path[:-1], bike_friendly_path[1:]))
    
    # Create figure
    plt.figure(figsize=(12, 10))
    
    # Draw all edges lightly
    nx.draw_networkx_edges(G_routing, pos, width=1, alpha=0.2, edge_color='grey')
    
    # Draw standard path
    nx.draw_networkx_edges(G_routing, pos, edgelist=standard_path_edges, width=3, edge_color='blue', label='Standard')
    
    # Draw bike-friendly path
    nx.draw_networkx_edges(G_routing, pos, edgelist=bike_path_edges, width=3, edge_color='green', label='Bike-Friendly')
    
    # Draw all nodes lightly
    nx.draw_networkx_nodes(G_routing, pos, node_size=30, node_color='grey', alpha=0.5)
    
    # Highlight source and target
    nx.draw_networkx_nodes(G_routing, pos, nodelist=[source], node_size=100, node_color='lime')
    nx.draw_networkx_nodes(G_routing, pos, nodelist=[target], node_size=100, node_color='red')
    
    # Add labels
    node_labels = {source: 'Start', target: 'End'}
    nx.draw_networkx_labels(G_routing, pos, labels=node_labels, font_size=12)
    
    # Add a custom legend
    plt.plot([0], [0], color='blue', linewidth=3, label='Standard Route')
    plt.plot([0], [0], color='green', linewidth=3, label='Bike-Friendly Route')
    plt.legend()
    
    plt.title('Standard vs. Bike-Friendly Route Comparison')
    plt.axis('off')
    plt.tight_layout()
    plt.show()

## 6. Infrastructure Intervention Analysis

Finally, let's simulate how adding more bike lanes would affect network connectivity for cyclists.

In [None]:
if HAS_NETWORKX:
    # Create a network with only bike-friendly edges
    G_bike = nx.Graph()
    
    # Add all nodes
    for node in G.nodes():
        G_bike.add_node(node, **G.nodes[node])
    
    # Add only edges with bike lanes
    bike_edges = [(u, v) for u, v, d in G.edges(data=True) if d['bike_lane'] == 1]
    for u, v in bike_edges:
        G_bike.add_edge(u, v, **G[u][v])
    
    # Check connectivity
    connected_components = list(nx.connected_components(G_bike))
    print(f"Bike network has {len(connected_components)} connected components")
    print(f"Largest component has {len(max(connected_components, key=len))} nodes")
    
    # Calculate what percentage of the network is accessible by bike lanes
    largest_component = max(connected_components, key=len)
    bike_network_coverage = len(largest_component) / G.number_of_nodes() * 100
    print(f"Bike network covers {bike_network_coverage:.1f}% of the total network")
    
    # Simulate adding new bike lanes strategically
    # We'll add bike lanes to edges with highest betweenness centrality
    edge_betweenness = nx.edge_betweenness_centrality(G, weight='length')
    
    # Sort edges by betweenness
    sorted_edges = sorted(edge_betweenness.items(), key=lambda x: x[1], reverse=True)
    
    # Create a copy of the bike graph
    G_improved = G_bike.copy()
    
    # Add bike lanes to top edges until we reach good connectivity
    added_edges = 0
    for (u, v), bc in sorted_edges:
        # Skip if already in the bike network
        if G_improved.has_edge(u, v):
            continue
            
        # Add this edge
        G_improved.add_edge(u, v, **G[u][v])
        added_edges += 1
        
        # Check if we've reached good connectivity
        largest_comp = max(nx.connected_components(G_improved), key=len)
        coverage = len(largest_comp) / G.number_of_nodes() * 100
        
        if coverage > 90 or added_edges >= 10:
            break
    
    # Calculate final connectivity
    final_components = list(nx.connected_components(G_improved))
    final_largest = max(final_components, key=len)
    final_coverage = len(final_largest) / G.number_of_nodes() * 100
    
    print(f"\nAfter adding {added_edges} strategic bike lanes:")
    print(f"Bike network has {len(final_components)} connected components")
    print(f"Largest component has {len(final_largest)} nodes")
    print(f"Bike network covers {final_coverage:.1f}% of the total network")

### Visualize the improved bike network

In [None]:
if HAS_NETWORKX:
    # Create figure
    plt.figure(figsize=(15, 8))
    
    # Plot original bike network
    plt.subplot(1, 2, 1)
    
    # Draw all edges lightly
    nx.draw_networkx_edges(G, pos, width=1, alpha=0.2, edge_color='grey')
    
    # Draw bike lanes
    nx.draw_networkx_edges(G_bike, pos, width=2, edge_color='green')
    
    # Draw nodes
    nx.draw_networkx_nodes(G, pos, node_size=30, alpha=0.5)
    
    plt.title(f'Original Bike Network ({bike_network_coverage:.1f}% coverage)')
    plt.axis('off')
    
    # Plot improved bike network
    plt.subplot(1, 2, 2)
    
    # Draw all edges lightly
    nx.draw_networkx_edges(G, pos, width=1, alpha=0.2, edge_color='grey')
    
    # Draw original bike lanes
    nx.draw_networkx_edges(G_bike, pos, width=2, edge_color='green')
    
    # Draw new bike lanes
    new_edges = [(u, v) for u, v in G_improved.edges() if not G_bike.has_edge(u, v)]
    nx.draw_networkx_edges(G, pos, edgelist=new_edges, width=2, edge_color='red')
    
    # Draw nodes
    nx.draw_networkx_nodes(G, pos, node_size=30, alpha=0.5)
    
    plt.title(f'Improved Bike Network ({final_coverage:.1f}% coverage)')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # Create a custom legend
    plt.figure(figsize=(8, 1))
    plt.plot([0], [0], color='green', linewidth=2, label='Existing Bike Lanes')
    plt.plot([1], [0], color='red', linewidth=2, label='Proposed New Bike Lanes')
    plt.plot([2], [0], color='grey', linewidth=1, alpha=0.2, label='Regular Roads')
    plt.legend(ncol=3, loc='center')
    plt.axis('off')
    plt.show()

## 7. Conclusion

In this tutorial, we've explored how to work with transportation network data for active transportation research using the CISD package. 

We covered:
1. Generating synthetic transportation network data
2. Visualizing and analyzing transportation networks
3. Understanding the impacts of bike infrastructure
4. Analyzing spillover effects from infrastructure interventions
5. Finding bike-friendly routes
6. Strategically improving bike network connectivity

These techniques can be used to analyze real-world transportation networks and plan effective active transportation interventions.