# Generate tiles for Cloud Factory

In [1]:
# Import functions
%run inference-functions.ipynb import load_data, get_bounds, visualize_tile_predvOSM, assign_cbgs_by_coverage
%run ADU_permit_matching-polygon-pipeline.ipynb import load_sources, get_tile_dicts_all_years

In [2]:
import json
import geopandas as gpd
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pyproj
from pyproj import Geod
import rasterio
import rasterio.plot
from rasterio.transform import from_bounds
import rasterio.features
import seaborn as sns
import shapely
from shapely.geometry import box, Polygon
from shapely.ops import orient
from tqdm import tqdm

tqdm.pandas()

In [3]:
# Paths
SHAPEFILE_FP = '/oak/stanford/groups/deho/building_compliance/shapefiles'
OAK_FP = '/oak/stanford/groups/deho/building_compliance/'
TIF_FP, INFERENCES_DIR, _ = load_sources()
OUTPUT_FP = os.path.join(OAK_FP, 'outputs', 'cbg-inference-2020')

# Aligned tiles specifically for Cloud Factory
CF_IMG_FP = {
    '2016': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2016/cf_tiles/',
    '2018': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2018/cf_tiles/',
    '2020': os.path.join(OAK_FP, 'san_jose_naip_512', 'phase2_superresx2')
}

YEARS = ['2016', '2018', '2020']
TEMPORAL_OUTPUT_FP = os.path.join(OAK_FP, 'outputs', 'Temporal-Error-Analysis')

# CF Params
CF_ROUND = 1
assert CF_ROUND in [1, 2]

In [4]:
cbg_sj, zoning = load_data(shapefile_fp=SHAPEFILE_FP, oak_fp=OAK_FP)
building_footprints_osm = gpd.read_file(os.path.join(OUTPUT_FP,'osm_building_processed'))
cbg_footprints = gpd.read_file(os.path.join(OUTPUT_FP, 'cbg_aggregate'))

[INFO] Restricting zoning data to R-1, R-2 and R-M (ex. R-MH)


In [5]:
# Load tile dict
tile_bounds_dict_all, _ = get_tile_dicts_all_years(oak_fp=OAK_FP, inferences_dir=INFERENCES_DIR)

100%|██████████| 3192/3192 [00:00<00:00, 11121.77it/s]
100%|██████████| 3192/3192 [00:00<00:00, 11947.87it/s]
100%|██████████| 3074/3074 [00:00<00:00, 12450.38it/s]


In [6]:
# Load inferred buildings for all years
building_footprints_infer = {}
for year in YEARS:
    building_footprints_infer[year] = gpd.read_file(os.path.join(OAK_FP, 'outputs/cbg-inference-{}/inference_building_processed'.format(year)))

## Define tiles (CF Round 1, CF Round 2)

In [7]:
if CF_ROUND == 1:
    # Round 1: Get CBG tile names
    checked_tiles = pd.read_csv(os.path.join(OAK_FP, 'outputs', 'cbg-inference-2020', 'Error-Analysis', 'sampled_tiles_checked.csv'))
    checked_tiles = checked_tiles.loc[checked_tiles['sample_rationale'] == 'CBG']
    tile_names = checked_tiles['file'].unique()
    
    selected_tiles = checked_tiles.copy()

In [8]:
if CF_ROUND == 2:
    # Round 2: 140 CBG-stratified randomly sampled tiles
    selected_tiles = pd.read_csv(os.path.join(OAK_FP, 'outputs', 'Temporal-Error-Analysis', 'Round2', 'sampled_tiles.csv'))
    tile_names = selected_tiles['file'].unique()

In [9]:
tile_gdf = selected_tiles[['file', 'GEOID']].copy()
tile_gdf['geometry'] = tile_gdf.file.progress_apply(
        lambda name: get_bounds(tile_bounds_dict_all['2020'], name) if name in list(tile_bounds_dict_all['2020'].keys()) else None
)
tile_gdf = gpd.GeoDataFrame(tile_gdf, crs='EPSG:4326')

100%|██████████| 71/71 [00:00<00:00, 325.71it/s]


## Generate visualizations for selected tiles

In [13]:
# Visualize OSM and predictions on tiles
def visualize_tile_predvOSM_CF(tile_bounds_dict, building_footprints_infer, building_footprints_osm, 
                            file_name, img_fp, tif_fp, final_output_fp, year, save=False, small=True 
                            ):
    if os.path.exists(os.path.join(final_output_fp, '{}-small_{}.png'.format(file_name, year))):
        return None
    
    # Get image, OSM and inference
    if os.path.exists(os.path.join(img_fp, 'train')):
        # For 2020 data which is split across train, val and test
        img_file = None
        for dirname in ['train', 'val', 'test']:
            dirpath = os.path.join(img_fp, dirname, 'images', '{}.npy'.format(file_name))
            if os.path.exists(dirpath):
                img_file = dirpath
    else:
        # For 2016 and 2018 data which is not split
        if year == '2020':
            img_file = os.path.join(img_fp, '{}.npy'.format(file_name))
        else:
            img_file = os.path.join(img_fp, '{}_{}.npy'.format(file_name, year))
            
    tile_img = np.load(img_file)
    tile_bounds = get_bounds(tile_bounds_dict, file_name)
    infer_tile = gpd.clip(building_footprints_infer, tile_bounds)
    osm_tile = gpd.clip(building_footprints_osm, tile_bounds)
    
    # Get raster for plotting purposes
    raster = rasterio.open(os.path.join(tif_fp, '{}.tif'.format(file_name)))
    t = from_bounds(*raster.bounds, tile_img.shape[0], tile_img.shape[1])
    
    # Get shape boundaries
    infer_tile_ext = infer_tile.copy()
    infer_tile_ext.geometry = infer_tile_ext.geometry.exterior

    osm_tile_ext = osm_tile.copy()
    osm_tile_ext.geometry = osm_tile_ext.geometry.exterior
    
    # Get small OSM & Inference buildings
    infer_tile_small = infer_tile.loc[infer_tile.small == 1]
    osm_tile_small = osm_tile.loc[osm_tile.small == 1]
    
    infer_tile_small_ext = infer_tile_small.copy()
    infer_tile_small_ext.geometry = infer_tile_small_ext.geometry.exterior

    osm_tile_small_ext = osm_tile_small.copy()
    osm_tile_small_ext.geometry = osm_tile_small_ext.geometry.exterior
    
    # Plot inference vs OSM
    if not small:
        plt.clf()
        fig, ax = plt.subplots(figsize=(15, 15))
        rasterio.plot.show(np.moveaxis(tile_img, 2, 0),transform=t, ax=ax)
        if len(infer_tile_ext) > 0:
            infer_tile_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='red', zorder=2)
        if len(osm_tile_ext) > 0:
            osm_tile_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='blue', zorder=1)
        ax.axis('off')
        if save:
            plt.savefig(os.path.join(final_output_fp, '{}-comp_{}.png'.format(file_name, year)), bbox_inches='tight')
        else:
            plt.show()
    
    # Plot small buildings
    if small:
        plt.clf()
        fig, ax = plt.subplots(figsize=(15, 15))
        rasterio.plot.show(np.moveaxis(tile_img, 2, 0),transform=t, ax=ax)
        if len(infer_tile_small_ext) > 0:
            infer_tile_small_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='red', zorder=2)
        if len(osm_tile_small_ext) > 0:
            osm_tile_small_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='blue', zorder=1)
        ax.axis('off')
        if save:
            plt.savefig(os.path.join(final_output_fp, '{}-small_{}.png'.format(file_name, year)), bbox_inches='tight')
        else:
            plt.show()
    plt.close()

In [14]:
# Generate visualizations
for year in ['2018']:
    _ = tile_gdf.to_crs('EPSG:26910').file.progress_apply(lambda file: visualize_tile_predvOSM_CF(
            tile_bounds_dict=tile_bounds_dict_all['2020'], # Use tile bounds from 2020
            building_footprints_infer=building_footprints_infer[year], # Use predictions for specific year
            building_footprints_osm=building_footprints_osm,
            file_name=file,
            img_fp=CF_IMG_FP[year], # Use npy arrays for specific year
            tif_fp=TIF_FP['2020'], # Use tile bounds from 2020
            final_output_fp=os.path.join(TEMPORAL_OUTPUT_FP, 'Round{}'.format(CF_ROUND), 'CF_Visualizations', year),
            save=True, small=True, 
            year=year))

100%|██████████| 71/71 [03:17<00:00,  2.78s/it]


In [10]:
# Generate and save tile centroids
tile_gdf['centroid'] = tile_gdf.geometry.progress_apply(
    lambda geom: geom.centroid
)

tile_gdf[['file', 'centroid']].to_csv(
    os.path.join(TEMPORAL_OUTPUT_FP, 'Round{}'.format(CF_ROUND), 'tile_centroids.csv'), 
    index=False)

100%|██████████| 152/152 [00:00<00:00, 17336.55it/s]


# Playground

In [None]:
tile = m_3712142_nw_10_060_20200525_173
x = np.load(os.path.join(CF_IMG_FP['2016'], '{}_2016.npy'.format(tile)))
plt.imshow(tile)
plt.show()