In [8]:
# Cell 1: Research Statement and Setup
print("=" * 60)
print("ASSIGNMENT 4: NETWORKS ANALYSIS")
print("=" * 60)

print("\n RESEARCH STATEMENT:")
print("""
This study investigates the spatial network of NYC restaurants with critical pest violations 
to understand the differences between Euclidean (straight-line) and network (actual walking) 
distances. We define a network where nodes are restaurants with pest violations and edges 
represent proximity relationships. By calculating distances between different elements of this 
network, we can discuss the experiential differences between Euclidean and network distance 
for public health assessment. This analysis includes quantitative exploration of network 
metrics as described in Xin et al. (2022) for bonus points.
""")

print("\n🎯 OBJECTIVES:")
print("1. Define a network of restaurants with pest violations")
print("2. Calculate distances between different network elements")
print("3. Compare Euclidean vs network distances")
print("4. Discuss experiential differences")
print("5. Perform quantitative network exploration (bonus points)")

print("\n📊 SETUP: Importing required libraries...")

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import networkx as nx
import osmnx as ox
import folium
from folium.plugins import MarkerCluster
import requests
from shapely.geometry import Point
import warnings
from scipy.spatial.distance import pdist, squareform
from geopy.distance import geodesic

warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("✅ Libraries imported successfully!")
print("📊 Ready to begin network analysis...")

ASSIGNMENT 4: NETWORKS ANALYSIS

 RESEARCH STATEMENT:

This study investigates the spatial network of NYC restaurants with critical pest violations 
to understand the differences between Euclidean (straight-line) and network (actual walking) 
distances. We define a network where nodes are restaurants with pest violations and edges 
represent proximity relationships. By calculating distances between different elements of this 
network, we can discuss the experiential differences between Euclidean and network distance 
for public health assessment. This analysis includes quantitative exploration of network 
metrics as described in Xin et al. (2022) for bonus points.


🎯 OBJECTIVES:
1. Define a network of restaurants with pest violations
2. Calculate distances between different network elements
3. Compare Euclidean vs network distances
4. Discuss experiential differences
5. Perform quantitative network exploration (bonus points)

📊 SETUP: Importing required libraries...
✅ Libraries import

In [None]:
# Cell 3: Network Creation and Data Cleaning (CORRECTED)
print("\n" + "="*50)
print("STEP 2: NETWORK CREATION")
print("="*50)

# Clean the data - remove invalid coordinates
df_clean = df[(df['latitude'] != 0) & (df['longitude'] != 0) & 
              (df['latitude'].between(40.4, 41.0)) & 
              (df['longitude'].between(-74.3, -73.5))]

print(f"Cleaned dataset: {len(df_clean)} restaurants with valid NYC coordinates")

# Create GeoDataFrame for spatial analysis
gdf = gpd.GeoDataFrame(
    df_clean,
    geometry=gpd.points_from_xy(df_clean['longitude'], df_clean['latitude']),
    crs="EPSG:4326"
)

print(f"Created GeoDataFrame with {len(gdf)} pest violation restaurants")

# Get the bounding box for our network
bbox = gdf.total_bounds
print(f"Bounding box: {bbox}")

# Create street network using OSMnx (CORRECTED SYNTAX)
print("Building street network from OpenStreetMap...")
try:
    # Create network from bounding box - CORRECTED
    network = ox.graph_from_bbox(
        bbox=[bbox[3], bbox[1], bbox[2], bbox[0]],  # north, south, east, west
        network_type="walk"
    )
    print(f"Network created with {len(network.nodes)} nodes and {len(network.edges)} edges")
except Exception as e:
    print(f"Network creation error: {e}")
    # Fallback: create a smaller network around the center
    center_lat, center_lon = gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean()
    try:
        network = ox.graph_from_point((center_lat, center_lon), dist=5000, network_type="walk")
        print(f"Fallback network created around center point")
    except Exception as e2:
        print(f"Fallback also failed: {e2}")
        # Create a simple network manually
        print("Creating minimal network for demonstration...")
        network = nx.Graph()
        # Add some dummy nodes and edges for demonstration
        for i in range(10):
            network.add_node(i)
        for i in range(9):
            network.add_edge(i, i+1)
        print("Created minimal demonstration network")

# Convert network to GeoDataFrames for visualization
try:
    nodes_gdf, edges_gdf = ox.graph_to_gdfs(network)
    print(f"Network components: {len(nodes_gdf)} nodes, {len(edges_gdf)} edges")
except:
    print("Could not convert network to GeoDataFrames - using minimal data")
    # Create dummy GeoDataFrames
    nodes_gdf = gpd.GeoDataFrame({'geometry': [Point(0,0)]})
    edges_gdf = gpd.GeoDataFrame({'geometry': [Point(0,0)]})

# Show network statistics
print(f"\nNETWORK STATISTICS:")
print(f"   • Restaurants with pest violations: {len(gdf)}")
print(f"   • Street network nodes: {len(nodes_gdf)}")
print(f"   • Street network edges: {len(edges_gdf)}")
print(f"   • Coverage area: {bbox[2]-bbox[0]:.3f}° longitude × {bbox[3]-bbox[1]:.3f}° latitude")


STEP 2: NETWORK CREATION
Cleaned dataset: 2973 restaurants with valid NYC coordinates
Created GeoDataFrame with 2973 pest violation restaurants
Bounding box: [-74.24801199  40.51336881 -73.71332795  40.90634077]
Building street network from OpenStreetMap...


In [10]:
# Cell 3: Network Creation and Data Cleaning
print("\n" + "="*50)
print("STEP 2: NETWORK CREATION")
print("="*50)

# Clean the data - remove invalid coordinates
df_clean = df[(df['latitude'] != 0) & (df['longitude'] != 0) & 
              (df['latitude'].between(40.4, 41.0)) & 
              (df['longitude'].between(-74.3, -73.5))]

print(f"Cleaned dataset: {len(df_clean)} restaurants with valid NYC coordinates")

# Create GeoDataFrame for spatial analysis
gdf = gpd.GeoDataFrame(
    df_clean,
    geometry=gpd.points_from_xy(df_clean['longitude'], df_clean['latitude']),
    crs="EPSG:4326"
)

print(f"Created GeoDataFrame with {len(gdf)} pest violation restaurants")

# Get the bounding box for our network
bbox = gdf.total_bounds
print(f"Bounding box: {bbox}")

# Create street network using OSMnx
print("Building street network from OpenStreetMap...")
try:
    # Create network from bounding box
    network = ox.graph_from_bbox(
        bbox[3], bbox[1], bbox[2], bbox[0],  # north, south, east, west
        network_type="walk"
    )
    print(f"Network created with {len(network.nodes)} nodes and {len(network.edges)} edges")
except Exception as e:
    print(f"Network creation error: {e}")
    # Fallback: create a smaller network around the center
    center_lat, center_lon = gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean()
    network = ox.graph_from_point((center_lat, center_lon), dist=5000, network_type="walk")
    print(f"Fallback network created around center point")

# Convert network to GeoDataFrames for visualization
nodes_gdf, edges_gdf = ox.graph_to_gdfs(network)
print(f"Network components: {len(nodes_gdf)} nodes, {len(edges_gdf)} edges")

# Show network statistics
print(f"\nNETWORK STATISTICS:")
print(f"   • Restaurants with pest violations: {len(gdf)}")
print(f"   • Street network nodes: {len(nodes_gdf)}")
print(f"   • Street network edges: {len(edges_gdf)}")
print(f"   • Coverage area: {bbox[2]-bbox[0]:.3f}° longitude × {bbox[3]-bbox[1]:.3f}° latitude")


STEP 2: NETWORK CREATION
Cleaned dataset: 2973 restaurants with valid NYC coordinates
Created GeoDataFrame with 2973 pest violation restaurants
Bounding box: [-74.24801199  40.51336881 -73.71332795  40.90634077]
Building street network from OpenStreetMap...
Network creation error: graph_from_bbox() takes 1 positional argument but 4 positional arguments (and 1 keyword-only argument) were given


KeyboardInterrupt: 