In [None]:
"""
NOTEBOOK 00: Setup & Configuration (PRECISION FIXED)
Generates tiles with precise coordinates (14+ decimal places) to avoid overlap
"""

from google.colab import drive
from pathlib import Path
import json
import numpy as np

drive.mount('/content/drive')

# ============================================================
# CONFIGURATION
# ============================================================

DRIVE_BASE = Path('/content/drive/MyDrive/NYU/VizMLProject/manhattan_pipeline')

print("="*60)
print("üó∫Ô∏è  MANHATTAN PIPELINE SETUP (PRECISION FIX)")
print("="*60 + "\n")

# # Create folder structure
# folders = [
#     'config',
#     'notebooks',
#     'raw_output',
#     'web_ready/tiles'
# ]

# for folder in folders:
#     (DRIVE_BASE / folder).mkdir(parents=True, exist_ok=True)

# print(f"‚úÖ Created structure at: {DRIVE_BASE}\n")

# ============================================================
# PRECISE TILE SIZING
# ============================================================

print("="*60)
print("TILE GRID CALCULATION (HIGH PRECISION)")
print("="*60 + "\n")

# Manhattan bounds (keep high precision)
MANHATTAN_BOUNDS = {
    "north": 40.882013588150486,   # Inwood (precise)
    "south": 40.700066184466224,   # Battery Park (precise)
    "west": -74.02007877826929,    # Hudson River (precise)
    "east": -73.90702547912598     # East River (precise)
}

# Calculate tile size based on target number of tiles
# Let's aim for ~700m tiles which worked for your test cases

# At NYC latitude (~40.7¬∞):
# 1¬∞ latitude ‚âà 111 km
# 1¬∞ longitude ‚âà 85 km (varies with latitude)

TARGET_TILE_SIZE_METERS = 750  # meters per tile

# Convert to degrees (at NYC latitude)
TILE_SIZE_LAT = TARGET_TILE_SIZE_METERS / 111000  # ‚âà 0.00675¬∞
TILE_SIZE_LON = TARGET_TILE_SIZE_METERS / 85000   # ‚âà 0.00882¬∞

print(f"Target tile size: {TARGET_TILE_SIZE_METERS}m")
print(f"Tile size (latitude):  {TILE_SIZE_LAT:.15f}¬∞")
print(f"Tile size (longitude): {TILE_SIZE_LON:.15f}¬∞")
print()

# Calculate how many tiles we'll get
manhattan_height = MANHATTAN_BOUNDS['north'] - MANHATTAN_BOUNDS['south']
manhattan_width = MANHATTAN_BOUNDS['east'] - MANHATTAN_BOUNDS['west']

num_rows = int(manhattan_height / TILE_SIZE_LAT) + 1
num_cols = int(manhattan_width / TILE_SIZE_LON) + 1

print(f"Manhattan dimensions:")
print(f"  Height: {manhattan_height:.15f}¬∞ ({manhattan_height * 111:.1f}km)")
print(f"  Width:  {manhattan_width:.15f}¬∞ ({manhattan_width * 85:.1f}km)")
print(f"\nEstimated grid: {num_rows} rows √ó {num_cols} cols = {num_rows * num_cols} tiles\n")

# ============================================================
# GENERATE PRECISE TILE GRID
# ============================================================

print("="*60)
print("GENERATING TILES WITH HIGH PRECISION")
print("="*60 + "\n")

tiles = []
tile_id = 0

# Use numpy for precise calculations
lat_start = MANHATTAN_BOUNDS['south']
lon_start = MANHATTAN_BOUNDS['west']

row_idx = 0
while lat_start < MANHATTAN_BOUNDS['north']:
    col_idx = 0
    lon_start = MANHATTAN_BOUNDS['west']

    while lon_start < MANHATTAN_BOUNDS['east']:
        # Calculate precise bounds for this tile
        tile_south = lat_start
        tile_north = min(lat_start + TILE_SIZE_LAT, MANHATTAN_BOUNDS['north'])
        tile_west = lon_start
        tile_east = min(lon_start + TILE_SIZE_LON, MANHATTAN_BOUNDS['east'])

        # Skip tiles that are too small (edge cases)
        actual_height = tile_north - tile_south
        actual_width = tile_east - tile_west

        # Only skip if VERY small (< 50% of target size)
        if actual_height < TILE_SIZE_LAT * 0.5 or actual_width < TILE_SIZE_LON * 0.5:
            lon_start += TILE_SIZE_LON
            col_idx += 1
            continue

        # Create tile with FULL PRECISION (no rounding!)
        tile = {
            "id": f"manhattan_tile_{tile_id}",
            "name": f"Manhattan Tile {tile_id} (R{row_idx}C{col_idx})",
            "row": row_idx,
            "col": col_idx,
            "bounds": {
                "north": tile_north,  # Keep full precision
                "south": tile_south,
                "west": tile_west,
                "east": tile_east
            },
            # tile2net format: "north, west, south, east"
            "location": f"{tile_north}, {tile_west}, {tile_south}, {tile_east}",
            "center": {
                "lat": (tile_north + tile_south) / 2,
                "lon": (tile_east + tile_west) / 2
            },
            "size_deg": {
                "lat": actual_height,
                "lon": actual_width
            },
            "size_m": {
                "lat": actual_height * 111000,
                "lon": actual_width * 85000
            }
        }

        tiles.append(tile)
        tile_id += 1
        col_idx += 1

        # Move to next column (precise increment)
        lon_start += TILE_SIZE_LON

    # Move to next row (precise increment)
    lat_start += TILE_SIZE_LAT
    row_idx += 1

print(f"‚úÖ Generated {len(tiles)} tiles\n")

# ============================================================
# VALIDATE NO OVERLAP
# ============================================================

print("="*60)
print("üîç VALIDATING NO OVERLAP")
print("="*60 + "\n")

overlap_found = False

for i, tile1 in enumerate(tiles):
    for tile2 in tiles[i+1:]:
        b1 = tile1['bounds']
        b2 = tile2['bounds']

        # Check for overlap
        lat_overlap = not (b1['north'] <= b2['south'] or b1['south'] >= b2['north'])
        lon_overlap = not (b1['east'] <= b2['west'] or b1['west'] >= b2['east'])

        if lat_overlap and lon_overlap:
            print(f"‚ö†Ô∏è  OVERLAP: {tile1['id']} and {tile2['id']}")
            overlap_found = True

if not overlap_found:
    print("‚úÖ No overlaps detected!\n")

# ============================================================
# VALIDATE GAPS
# ============================================================

print("="*60)
print("üîç CHECKING FOR GAPS")
print("="*60 + "\n")

# Check for gaps between adjacent tiles
gaps_found = False

for tile in tiles:
    tile_row = tile['row']
    tile_col = tile['col']

    # Check right neighbor
    right_neighbor = next((t for t in tiles if t['row'] == tile_row and t['col'] == tile_col + 1), None)
    if right_neighbor:
        gap = right_neighbor['bounds']['west'] - tile['bounds']['east']
        if abs(gap) > 1e-10:  # Allow tiny floating point errors
            print(f"‚ö†Ô∏è  GAP: {gap:.15f}¬∞ between {tile['id']} and {right_neighbor['id']}")
            gaps_found = True

    # Check top neighbor
    top_neighbor = next((t for t in tiles if t['row'] == tile_row + 1 and t['col'] == tile_col), None)
    if top_neighbor:
        gap = top_neighbor['bounds']['south'] - tile['bounds']['north']
        if abs(gap) > 1e-10:
            print(f"‚ö†Ô∏è  GAP: {gap:.15f}¬∞ between {tile['id']} and {top_neighbor['id']}")
            gaps_found = True

if not gaps_found:
    print("‚úÖ No gaps detected!\n")

# ============================================================
# SAVE CONFIGURATION
# ============================================================

tiles_config = {
    "project": "Manhattan Sidewalk Timeline",
    "tile_size_deg": {
        "lat": TILE_SIZE_LAT,
        "lon": TILE_SIZE_LON
    },
    "tile_size_meters": TARGET_TILE_SIZE_METERS,
    "total_tiles": len(tiles),
    "grid_dimensions": {
        "rows": num_rows,
        "cols": num_cols
    },
    "manhattan_bounds": MANHATTAN_BOUNDS,
    "tiles": tiles
}

config_file = DRIVE_BASE / 'config/tiles_manhattan.json'
with open(config_file, 'w') as f:
    json.dump(tiles_config, f, indent=2)

print(f"‚úÖ Saved: {config_file}\n")

# Create processing log
processing_log = {
    "tiles": {},
    "last_updated": None
}

log_file = DRIVE_BASE / 'config/processing_log.json'
with open(log_file, 'w') as f:
    json.dump(log_file, f, indent=2)

print(f"‚úÖ Saved: {log_file}\n")

# ============================================================
# PREVIEW SAMPLE TILES
# ============================================================

print("="*60)
print("üìã SAMPLE TILES (showing precision)")
print("="*60 + "\n")

for i, tile in enumerate(tiles[:3]):
    print(f"Tile {i} ({tile['name']}):")
    print(f"   Bounds:")
    print(f"      North: {tile['bounds']['north']:.15f}")
    print(f"      South: {tile['bounds']['south']:.15f}")
    print(f"      West:  {tile['bounds']['west']:.15f}")
    print(f"      East:  {tile['bounds']['east']:.15f}")
    print(f"   Location string: {tile['location']}")
    print(f"   Size: {tile['size_m']['lat']:.0f}m √ó {tile['size_m']['lon']:.0f}m")
    print()

# ============================================================
# SUMMARY
# ============================================================

print("="*60)
print("‚úÖ SETUP COMPLETE!")
print("="*60 + "\n")

print("Key improvements:")
print(f"  ‚Ä¢ Precise coordinates (15 decimal places)")
print(f"  ‚Ä¢ No overlap between tiles")
print(f"  ‚Ä¢ No gaps in coverage")
print(f"  ‚Ä¢ Accurate {TARGET_TILE_SIZE_METERS}m √ó {TARGET_TILE_SIZE_METERS}m tiles")
print(f"  ‚Ä¢ Total: {len(tiles)} tiles covering Manhattan")
print()
print("Next: Run 01_tile2net_extract.ipynb")
print("      Should now get consistent 4√ó4 grids (16 tiles each)")