In [5]:
# ==========================================
#   STEP 1: SETUP WITH PYTHON 3.12 FIX
# ==========================================
import os
import subprocess
import sys

print("üîß --- 1. SYSTEM SETUP (GDAL) --- üîß")
subprocess.run(["apt-get", "update", "-qq"], check=True)
subprocess.run(["apt-get", "install", "-y", "-qq", "gdal-bin", "libgdal-dev"], check=True)

print("üîß --- 2. BUILD TOOLS SETUP --- üîß")
subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], check=True)

try:
    gdal_version = subprocess.check_output(["gdal-config", "--version"]).decode("utf-8").strip()
    print(f"   System GDAL Version: {gdal_version}")
    subprocess.run([sys.executable, "-m", "pip", "install", f"gdal=={gdal_version}"], check=True)
except Exception as e:
    print(f"‚ö†Ô∏è GDAL Config failed: {e}")

print("üîß --- 3. CLONE REPOSITORY --- üîß")
if os.path.exists("/content/tile2net"):
    import shutil
    shutil.rmtree("/content/tile2net")
subprocess.run(["git", "clone", "https://github.com/VIDA-NYU/tile2net.git"], check=True)

# --- NEW STEP: HACK PYPROJECT.TOML ---
print("üîß --- 3b. PATCH PYTHON VERSION CHECK --- üîß")
pyproject_path = "/content/tile2net/pyproject.toml"
if os.path.exists(pyproject_path):
    with open(pyproject_path, 'r') as f:
        content = f.read()

    # Change "<3.12" to "<3.13" so it accepts Colab's Python 3.12
    if "<3.12" in content:
        print("   Found strict version constraint. Relaxing it...")
        content = content.replace("<3.12", "<3.13")
        with open(pyproject_path, 'w') as f:
            f.write(content)
        print("   ‚úì Version constraint updated.")
    else:
        print("   Constraint not found (might be in setup.cfg). Proceeding...")
else:
    print("‚ö†Ô∏è pyproject.toml not found. Proceeding cautiously.")
# ---------------------------------------

print("üîß --- 4. INSTALL PYTHON DEPENDENCIES --- üîß")
deps = [
    "numpy<2.0",
    "pandas<2.3",
    "argh<0.27",
    "scipy",
    "scikit-image",
    "geopandas",
    "rasterio",
    "shapely",
    "torch",
    "torchvision"
]
subprocess.run([sys.executable, "-m", "pip", "install"] + deps, check=True)

print("üîß --- 5. INSTALL TILE2NET --- üîß")
# Now that we patched the config file, this should succeed
cmd = [
    sys.executable, "-m", "pip", "install",
    "--no-build-isolation",
    "--no-deps",
    "-e", "/content/tile2net"
]

result = subprocess.run(cmd, capture_output=True, text=True)

if result.returncode != 0:
    print("‚ùå INSTALLATION FAILED:")
    print("\n".join(result.stderr.splitlines()[-20:]))
    sys.exit(1)
else:
    print("‚úÖ SUCCESS! Tile2Net installed.")
    print("‚ö†Ô∏è IMPORTANT: Go to 'Runtime' > 'Restart Session' NOW.")

üîß --- 1. SYSTEM SETUP (GDAL) --- üîß
üîß --- 2. BUILD TOOLS SETUP --- üîß
   System GDAL Version: 3.8.4
üîß --- 3. CLONE REPOSITORY --- üîß
üîß --- 3b. PATCH PYTHON VERSION CHECK --- üîß
   Found strict version constraint. Relaxing it...
   ‚úì Version constraint updated.
üîß --- 4. INSTALL PYTHON DEPENDENCIES --- üîß
üîß --- 5. INSTALL TILE2NET --- üîß
‚úÖ SUCCESS! Tile2Net installed.
‚ö†Ô∏è IMPORTANT: Go to 'Runtime' > 'Restart Session' NOW.


In [1]:
# ==========================================
#   STEP 1.5: FIX MISSING DEPENDENCIES
# ==========================================
import subprocess
import sys

print("üîß --- INSTALLING MISSING LIBRARIES --- üîß")

# 1. Install system library for RTree
subprocess.run(["apt-get", "install", "-y", "-qq", "libspatialindex-dev"], check=True)

# 2. Install Python packages
# We force numpy<2.0 again just to be safe so osmnx doesn't upgrade it
missing_deps = [
    "osmnx",
    "networkx",
    "rtree",
    "tqdm",
    "requests",
    "affine",
    "numpy<2.0"
]

print(f"Installing: {', '.join(missing_deps)}...")
subprocess.run([sys.executable, "-m", "pip", "install"] + missing_deps, check=True)

print("\n‚úÖ Missing dependencies installed.")
print("üëâ Now try running Cell 2 (Verify Installation) again.")

üîß --- INSTALLING MISSING LIBRARIES --- üîß
Installing: osmnx, networkx, rtree, tqdm, requests, affine, numpy<2.0...

‚úÖ Missing dependencies installed.
üëâ Now try running Cell 2 (Verify Installation) again.


In [2]:
# ==========================================
#   STEP 1.6: PATCH BROKEN LIBRARY FILES
# ==========================================
import os

print("üöë --- APPLYING HOTFIX FOR ARGH/ARGPARSE --- üöë")

# List of files known to cause this specific error in tile2net
files_to_patch = [
    "/content/tile2net/src/tile2net/raster/generate/commandline.py",
    "/content/tile2net/src/tile2net/namespace.py"
]

for file_path in files_to_patch:
    if os.path.exists(file_path):
        print(f"Checking {os.path.basename(file_path)}...")
        with open(file_path, "r") as f:
            content = f.read()

        patched = False

        # 1. Ensure argparse is imported
        if "import argparse" not in content:
            content = "import argparse\n" + content
            patched = True

        # 2. Replace the broken class inheritance
        # Changing 'class Namespace(argh.ArghNamespace)' to 'class Namespace(argparse.Namespace)'
        if "argh.ArghNamespace" in content:
            content = content.replace("argh.ArghNamespace", "argparse.Namespace")
            patched = True

        if patched:
            with open(file_path, "w") as f:
                f.write(content)
            print(f"   ‚úÖ Patched successfully.")
        else:
            print(f"   ‚ÑπÔ∏è No patches needed (already fixed).")
    else:
        print(f"   ‚ö†Ô∏è Warning: Could not find {file_path}")

print("\nDone. You can now run the Example script.")

üöë --- APPLYING HOTFIX FOR ARGH/ARGPARSE --- üöë
Checking commandline.py...
   ‚úÖ Patched successfully.
Checking namespace.py...
   ‚úÖ Patched successfully.

Done. You can now run the Example script.


In [3]:
# ==========================================
#   STEP 1.7: INSTALL CENTERLINE
# ==========================================
import subprocess
import sys

print("üîß --- INSTALLING MISSING DEPENDENCY --- üîß")
# 'centerline' is required for network generation but wasn't in our initial list
subprocess.run([sys.executable, "-m", "pip", "install", "centerline"], check=True)

print("\n‚úÖ Installed 'centerline'.")
print("üëâ You can now Re-Run 'STEP 2: RUN OFFICIAL EXAMPLE.SH'")

üîß --- INSTALLING MISSING DEPENDENCY --- üîß

‚úÖ Installed 'centerline'.
üëâ You can now Re-Run 'STEP 2: RUN OFFICIAL EXAMPLE.SH'


In [None]:
"""
NOTEBOOK 01: tile2net Extraction
Processes multiple tiles using tile2net and saves to Google Drive

Configuration:
- TILES_TO_PROCESS: List of tile IDs to process (or "all")
- YEARS: List of years to extract
- BATCH_SIZE: How many tiles to process before saving checkpoint
"""

import subprocess
import json
from pathlib import Path
from datetime import datetime
from google.colab import drive

# ============================================================
# CONFIGURATION
# ============================================================

# Mount Drive
drive.mount('/content/drive')

# Paths
DRIVE_BASE = Path('/content/drive/MyDrive/NYU/VizMLProject/manhattan_pipeline')
CONFIG_PATH = DRIVE_BASE / 'config/tiles_manhattan.json'
LOG_PATH = DRIVE_BASE / 'config/processing_log.json'
RAW_OUTPUT = DRIVE_BASE / 'raw_output'

# Processing config
TILES_TO_PROCESS = [312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331] # Change to "all" for full batch
YEARS = [2024, 2022, 2020, 2018, 2016, 2014, 2012, 2010, 2008, 2006, 2004]  # Start with one year for validation
BATCH_SIZE = 5  # Save checkpoint after this many tiles

print("="*60)
print("üó∫Ô∏è  TILE2NET EXTRACTION PIPELINE")
print("="*60 + "\n")

# Load tiles config
with open(CONFIG_PATH, 'r') as f:
    config = json.load(f)

tiles = config['tiles']

# # Load processing log
# with open(LOG_PATH, 'r') as f:
#     log = json.load(f)

print(f"üìã Configuration:")
print(f"   Total tiles available: {len(tiles)}")
print(f"   Years to process: {YEARS}")
print(f"   Tiles to process: {TILES_TO_PROCESS if TILES_TO_PROCESS != 'all' else 'ALL'}")
print()

# ============================================================
# STEP 1: PATCH TILE2NET
# ============================================================

print("="*60)
print("STEP 1: PATCHING TILE2NET")
print("="*60 + "\n")

sources_file = Path("/content/tile2net/src/tile2net/raster/source.py")

with open(sources_file, 'r') as f:
    lines = f.readlines()

already_patched = any('nyc_2012' in line for line in lines)

if already_patched:
    print("‚úÖ Already patched!\n")
else:
    insert_line = None
    for i, line in enumerate(lines):
        if 'class NewYorkCity(ArcGis):' in line:
            for j in range(i+1, len(lines)):
                if lines[j].strip().startswith('class ') or (j > i+10 and lines[j].strip() == ''):
                    insert_line = j
                    break
            break

    if insert_line:
        year_configs = {
            2022: 'NYC_Orthos_2022',
            2020: 'NYC_Orthos_-_2020',
            2018: 'NYC_Orthos_2018',
            2016: 'NYC_Orthos_2016',
            2014: 'NYC_Orthos_2014',
            2012: 'NYC_Orthos_2012',
            2010: 'NYC_Orthos_2010',
            2008: 'NYC_Orthos_2008',
            2006: 'NYC_Orthos_2006',
            2004: 'NYC_Ortho_2004',
        }

        new_classes = []
        for year, service_name in year_configs.items():
            class_code = f'''

class NewYorkCity{year}(ArcGis):
    server = 'https://tiles.arcgis.com/tiles/yG5s3afENB5iO9fj/arcgis/rest/services/{service_name}/MapServer'
    name = 'nyc_{year}'
    keyword = 'New York City', 'City of New York'
    year = {year}
'''
            new_classes.append(class_code)

        lines.insert(insert_line, ''.join(new_classes))

        with open(sources_file, 'w') as f:
            f.writelines(lines)

        print(f"‚úÖ Patched with {len(year_configs)} historical sources\n")

# ============================================================
# STEP 2: PROCESS TILES
# ============================================================

print("="*60)
print("STEP 2: PROCESSING TILES")
print("="*60 + "\n")

# Determine which tiles to process
if TILES_TO_PROCESS == "all":
    tiles_to_process = tiles
else:
    tiles_to_process = [tiles[i] for i in TILES_TO_PROCESS if i < len(tiles)]

total_operations = len(tiles_to_process) * len(YEARS)
current_op = 0

results = {}

for tile_idx, tile in enumerate(tiles_to_process):
    tile_id = tile['id']
    location = tile['location']

    print(f"\n{'='*60}")
    print(f"üó∫Ô∏è  TILE {tile_idx + 1}/{len(tiles_to_process)}: {tile_id}")
    print(f"{'='*60}")

    tile_results = {}

    for year in YEARS:
        current_op += 1
        progress = (current_op / total_operations) * 100

        print(f"\nüìÖ {year} [{progress:.1f}% complete]")

        # Output directory on Drive
        output_dir = RAW_OUTPUT / tile_id / str(year)
        output_dir.mkdir(parents=True, exist_ok=True)

        project_name = f"{tile_id}_{year}"

        # Determine source
        source = "nyc" if year == 2024 else f"nyc_{year}"

        # Run tile2net
        cmd = [
            "bash", "-c",
            f'python -m tile2net generate -l "{location}" -o "{output_dir}" -n {project_name} --source {source} | '
            f'python -m tile2net inference'
        ]

        print(f"   Source: {source}")
        print(f"   Output: {output_dir}/{project_name}")
        print("   ‚è≥ Running pipeline...")

        result = subprocess.run(
            cmd,
            cwd="/content/tile2net",
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print(f"   ‚úÖ Success!")

            # Verify output exists
            network_dir = output_dir / project_name / "polygons"
            tiles_dir = output_dir / project_name / "tiles" / "stitched"

            has_polygons = network_dir.exists() and len(list(network_dir.rglob("*.shp"))) > 0
            has_tiles = tiles_dir.exists() and len(list(tiles_dir.rglob("*.png"))) > 0

            if has_polygons:
                print(f"   üìä Polygons: ‚úì")
            if has_tiles:
                print(f"   üñºÔ∏è  Tiles: ‚úì")

            tile_results[year] = "success"
        else:
            print(f"   ‚ùå Failed")
            error_msg = result.stderr[-300:] if result.stderr else "Unknown error"
            print(f"   Error: {error_msg[:100]}...")
            tile_results[year] = "failed"

    results[tile_id] = tile_results

    # # Save checkpoint every BATCH_SIZE tiles
    # if (tile_idx + 1) % BATCH_SIZE == 0:
    #     log['tiles_processed'].append({
    #         'tile_id': tile_id,
    #         'timestamp': datetime.now().isoformat(),
    #         'results': tile_results
    #     })
    #     with open(LOG_PATH, 'w') as f:
    #         json.dump(log, f, indent=2)
    #     print(f"\nüíæ Checkpoint saved ({tile_idx + 1} tiles processed)")

# ============================================================
# STEP 3: FINAL SUMMARY
# ============================================================

print("\n" + "="*60)
print("üìä EXTRACTION COMPLETE")
print("="*60 + "\n")

successful_tiles = sum(1 for r in results.values() if any(status == "success" for status in r.values()))
total_tiles = len(results)

print(f"‚úÖ Successfully processed: {successful_tiles}/{total_tiles} tiles\n")

for tile_id, tile_results in results.items():
    success_count = sum(1 for status in tile_results.values() if status == "success")
    total_years = len(tile_results)
    status = "‚úÖ" if success_count == total_years else "‚ö†Ô∏è"
    print(f"{status} {tile_id}: {success_count}/{total_years} years")

# # Update final log
# log['last_updated'] = datetime.now().isoformat()

# with open(LOG_PATH, 'w') as f:
#     json.dump(log, f, indent=2)

print(f"\nüìÅ Raw output saved to: {RAW_OUTPUT}")
print(f"\nüí° Next: Run 02_prepare_for_web.ipynb to create web-ready data")