In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from collections import deque
import json

def tile_to_lat_lon(tile_x, tile_y, zoom):
    """Convert tile coordinates to latitude and longitude."""
    n = 2.0 ** zoom
    lon = tile_x / n * 360.0 - 180.0
    lat_rad = np.arctan(np.sinh(np.pi * (1 - 2 * tile_y / n)))
    lat = np.degrees(lat_rad)
    return lat, lon

def find_yard_squares(df):
    """
    Find squares that have all 4 neighbors (N, E, S, W) present.
    These are the squares that form the Yard.
    """
    # Get visited tiles
    visited_tiles = df[df['inside_polygon'] == 1][['tile_x', 'tile_y']].values
    
    if len(visited_tiles) == 0:
        return []
    
    # Create a set for fast lookup
    visited_set = set(map(tuple, visited_tiles))
    
    # Find tiles that have all 4 neighbors
    yard_squares = []
    for x, y in visited_tiles:
        # Check if all 4 neighbors exist
        north = (x, y - 1)
        east = (x + 1, y)
        south = (x, y + 1)
        west = (x - 1, y)
        
        if (north in visited_set and 
            east in visited_set and 
            south in visited_set and 
            west in visited_set):
            yard_squares.append((x, y))
    
    return yard_squares

def find_connected_components(squares):
    """
    Find connected components among yard squares using 4-connectivity.
    Returns a list of components, sorted by size (largest first).
    """
    if not squares:
        return []
    
    squares_set = set(squares)
    unassigned = squares_set.copy()
    components = []
    
    while unassigned:
        # Start a new component
        start_square = next(iter(unassigned))
        component = []
        queue = deque([start_square])
        unassigned.remove(start_square)
        
        # BFS to find all connected squares
        while queue:
            x, y = queue.popleft()
            component.append((x, y))
            
            # Check 4 neighbors
            neighbors = [
                (x, y - 1),  # North
                (x + 1, y),  # East
                (x, y + 1),  # South
                (x - 1, y),  # West
            ]
            
            for neighbor in neighbors:
                if neighbor in unassigned:
                    queue.append(neighbor)
                    unassigned.remove(neighbor)
        
        components.append(component)
    
    # Sort by size (largest first)
    components.sort(key=len, reverse=True)
    
    return components

def compute_yard_statistics(component, zoom):
    """Compute statistics for a yard."""
    tiles = np.array(component)
    
    stats = {
        'num_squares': len(component),
        'tile_x_min': int(tiles[:, 0].min()),
        'tile_x_max': int(tiles[:, 0].max()),
        'tile_y_min': int(tiles[:, 1].min()),
        'tile_y_max': int(tiles[:, 1].max()),
        'tiles': component
    }
    
    # Compute bounding box in lat/lon
    lat_nw, lon_nw = tile_to_lat_lon(stats['tile_x_min'], stats['tile_y_min'], zoom)
    lat_se, lon_se = tile_to_lat_lon(stats['tile_x_max'] + 1, stats['tile_y_max'] + 1, zoom)
    
    stats['lat_min'] = lat_se
    stats['lat_max'] = lat_nw
    stats['lon_min'] = lon_nw
    stats['lon_max'] = lon_se
    stats['center_lat'] = (lat_nw + lat_se) / 2
    stats['center_lon'] = (lon_nw + lon_se) / 2
    
    return stats

def visualize_yards(df, yard_squares, components, zoom, region, title_prefix="Squadrat"):
    """Create visualization showing all visited squares and yard squares."""
    fig, ax = plt.subplots(figsize=(12, 10))
    
    # Get all visited tiles
    visited_tiles = df[df['inside_polygon'] == 1][['tile_x', 'tile_y']].values
    
    # Determine tile size
    if len(visited_tiles) > 0:
        sample_tile = visited_tiles[0]
        lat1, lon1 = tile_to_lat_lon(sample_tile[0], sample_tile[1], zoom)
        lat2, lon2 = tile_to_lat_lon(sample_tile[0] + 1, sample_tile[1] + 1, zoom)
        tile_width = abs(lon2 - lon1)
        tile_height = abs(lat2 - lat1)
    else:
        tile_width = tile_height = 0.01
    
    # Plot all visited tiles in light beige/peach (like in example)
    for tile_x, tile_y in visited_tiles:
        lat, lon = tile_to_lat_lon(tile_x, tile_y, zoom)
        rect = patches.Rectangle((lon, lat), tile_width, -tile_height,
                                 linewidth=1, edgecolor='#6B4C9A',
                                 facecolor='#FFE4C4', alpha=0.7)
        ax.add_patch(rect)
    
    # Plot each yard component with tab:orange color
    for idx, component in enumerate(components):
        for tile_x, tile_y in component:
            lat, lon = tile_to_lat_lon(tile_x, tile_y, zoom)
            rect = patches.Rectangle((lon, lat), tile_width, -tile_height,
                                     linewidth=2, edgecolor='#6B4C9A',
                                     facecolor='tab:orange', alpha=0.8)
            ax.add_patch(rect)
    
    # Add text labels with tile count for each yard
    for idx, component in enumerate(components):
        # Compute center of this yard component
        tiles = np.array(component)
        center_x = tiles[:, 0].mean()
        center_y = tiles[:, 1].mean()
        center_lat, center_lon = tile_to_lat_lon(center_x, center_y, zoom)
        
        # Add text with number of tiles in this yard
        num_tiles = len(component)
        ax.text(center_lon, center_lat, str(num_tiles),
                fontsize=14, fontweight='bold', color='tab:red',
                ha='center', va='center',
                bbox=dict(boxstyle='round,pad=0.3', facecolor='white', 
                         edgecolor='tab:red', alpha=0.8, linewidth=2))
    
    # Set labels and title
    ax.set_xlabel('Longitude', fontsize=12)
    ax.set_ylabel('Latitude', fontsize=12)
    ax.set_title(f'{title_prefix} (Zoom {zoom}) - {region}', 
                 fontsize=14, fontweight='bold')
    
    # Set aspect and limits
    ax.set_aspect('equal')
    all_lats = [tile_to_lat_lon(x, y, zoom)[0] for x, y in visited_tiles]
    all_lons = [tile_to_lat_lon(x, y, zoom)[1] for x, y in visited_tiles]
    
    if all_lats and all_lons:
        ax.set_xlim(min(all_lons) - tile_width, max(all_lons) + 2 * tile_width)
        ax.set_ylim(min(all_lats) - 2 * tile_height, max(all_lats) + tile_height)
    
    # Add grid
    ax.grid(True, alpha=0.3, color='gray', linewidth=0.5)
    ax.set_aspect(1 / np.cos(np.radians(center_lat)))
    plt.tight_layout()
    
    return fig

def process_grid(csv_file, output_prefix, region, min_yard_size=10):
    """Process a grid CSV file and compute yards."""
    print(f"\nProcessing {csv_file}...")
    print("=" * 60)
    
    # Read the CSV
    df = pd.read_csv(csv_file)
    zoom = df['zoom'].iloc[0]
    
    total_visited = (df['inside_polygon'] == 1).sum()
    print(f"Total tiles in grid: {len(df)}")
    print(f"Visited tiles (squadrats): {total_visited}")
    
    # Find yard squares (squares with all 4 neighbors)
    yard_squares = find_yard_squares(df)
    print(f"Tiles with all 4 neighbors: {len(yard_squares)}")
    
    if not yard_squares:
        print("No yard found! (No squares have all 4 neighbors)")
        return None
    
    # Find connected components among yard squares
    components = find_connected_components(yard_squares)
    print(f"\nFound {len(components)} yard component(s) before filtering")
    
    # Filter out yards smaller than minimum size
    components = [comp for comp in components if len(comp) >= min_yard_size]
    
    if not components:
        print(f"No yards found with at least {min_yard_size} squares!")
        return None
    
    print(f"Found {len(components)} yard(s) with at least {min_yard_size} squares")
    
    # Compute statistics for each yard
    yards_stats = []
    for idx, component in enumerate(components):
        yard_name = "Yard" if idx == 0 else f"Yard_{idx}"
        stats = compute_yard_statistics(component, zoom)
        stats['name'] = yard_name
        stats['rank'] = idx + 1
        yards_stats.append(stats)
        
        print(f"\n{yard_name}:")
        print(f"  Number of squares: {stats['num_squares']}")
        print(f"  Tile bounds: X=[{stats['tile_x_min']}, {stats['tile_x_max']}], "
              f"Y=[{stats['tile_y_min']}, {stats['tile_y_max']}]")
        print(f"  Center: ({stats['center_lat']:.6f}, {stats['center_lon']:.6f})")
    
    # Save statistics to JSON
    stats_for_json = []
    for stats in yards_stats:
        stats_copy = stats.copy()
        stats_copy['num_tiles'] = len(stats_copy['tiles'])
        del stats_copy['tiles']
        stats_for_json.append(stats_copy)
    
    json_file = f'output/{output_prefix}_yards_stats.json'
    with open(json_file, 'w') as f:
        json.dump(stats_for_json, f, indent=2)
    print(f"\nStatistics saved to: {json_file}")
    
    # Save tiles for each yard to CSV
    for idx, stats in enumerate(yards_stats):
        yard_name = stats['name']
        tiles_df = pd.DataFrame(stats['tiles'], columns=['tile_x', 'tile_y'])
        tiles_df['zoom'] = zoom
        tiles_df['yard_name'] = yard_name
        
        tiles_csv = f'output/{output_prefix}_{yard_name}_tiles.csv'
        tiles_df.to_csv(tiles_csv, index=False)
        print(f"Tiles for {yard_name} saved to: {tiles_csv}")
    
    # Create visualization
    title_prefix = "Yards" if "squadrat_" in csv_file.lower() else "Yardinhos"
    fig = visualize_yards(df, yard_squares, components, zoom, region, title_prefix)
    
    # Save figure
    output_file = f'figure/{region}_{output_prefix}.png'
    fig.savefig(output_file, dpi=150, bbox_inches='tight')
    print(f"\nVisualization saved to: {output_file}")
    
    plt.close()
    
    return yards_stats

def main():
    #Define region (Briancon, Nyon, Paris)
    REGION = "Nyon"
    # Minimum yard size threshold
    MIN_YARD_SIZE = 10
    
    print("\n" + "=" * 60)
    print("YARD COMPUTATION")
    print("=" * 60)
    print("\nA yard consists of connected squares where each square")
    print("has all 4 neighbors (North, East, South, West) present.")
    print(f"Minimum yard size: {MIN_YARD_SIZE} squares\n")
    
    # Process both grids
    squadrat_stats = process_grid(
        f'output/squadrat_grid_mask_{REGION}.csv',
        'squadrat',
        region=REGION,
        min_yard_size=MIN_YARD_SIZE
    )
    
    squadratinho_stats = process_grid(
        f'output/squadratinho_grid_mask_{REGION}.csv',
        'squadratinho',
        region=REGION,
        min_yard_size=MIN_YARD_SIZE
    )
    
    # Summary
    print("\n" + "=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"(Only showing yards with at least {MIN_YARD_SIZE} squares)")
    
    if squadrat_stats:
        print(f"\nSquadrat Grid (Zoom 14):")
        print(f"  Main Yard: {squadrat_stats[0]['num_squares']} squares")
        if len(squadrat_stats) > 1:
            print(f"  Secondary yards: {len(squadrat_stats) - 1}")
            for stats in squadrat_stats[1:]:
                print(f"    {stats['name']}: {stats['num_squares']} squares")
    else:
        print(f"\nSquadrat Grid (Zoom 14): No yard found with at least {MIN_YARD_SIZE} squares")
    
    if squadratinho_stats:
        print(f"\nSquadratinho Grid (Zoom 17):")
        print(f"  Main Yard: {squadratinho_stats[0]['num_squares']} squares")
        if len(squadratinho_stats) > 1:
            print(f"  Secondary yards: {len(squadratinho_stats) - 1}")
            for stats in squadratinho_stats[1:]:
                print(f"    {stats['name']}: {stats['num_squares']} squares")
    else:
        print(f"\nSquadratinho Grid (Zoom 17): No yard found with at least {MIN_YARD_SIZE} squares")
    
    print("\n" + "=" * 60)

if __name__ == "__main__":
    main()


YARD COMPUTATION

A yard consists of connected squares where each square
has all 4 neighbors (North, East, South, West) present.
Minimum yard size: 10 squares


Processing output/squadrat_grid_mask_Nyon.csv...
Total tiles in grid: 680
Visited tiles (squadrats): 177
Tiles with all 4 neighbors: 84

Found 8 yard component(s) before filtering
Found 1 yard(s) with at least 10 squares

Yard:
  Number of squares: 71
  Tile bounds: X=[8469, 8480], Y=[5795, 5811]
  Center: (46.384681, 6.218262)

Statistics saved to: output/squadrat_yards_stats.json
Tiles for Yard saved to: output/squadrat_Yard_tiles.csv

Visualization saved to: figure/Nyon_squadrat.png

Processing output/squadratinho_grid_mask_Nyon.csv...
Total tiles in grid: 39864
Visited tiles (squadrats): 3230
Tiles with all 4 neighbors: 753

Found 111 yard component(s) before filtering
Found 10 yard(s) with at least 10 squares

Yard:
  Number of squares: 289
  Tile bounds: X=[67792, 67817], Y=[46413, 46441]
  Center: (46.385774, 6.231995)
