# Generate tiles for Cloud Factory

In [50]:
# Import functions
%run inference-functions.ipynb import load_data, get_bounds, visualize_tile_predvOSM, assign_cbgs_by_coverage

In [51]:
import json
import geopandas as gpd
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pyproj
from pyproj import Geod
import rasterio
import rasterio.plot
from rasterio.transform import from_bounds
import rasterio.features
import seaborn as sns
import shapely
from shapely.geometry import box, Polygon
from shapely.ops import orient
from tqdm import tqdm

tqdm.pandas()

In [52]:
# Paths
SHAPEFILE_FP = '/oak/stanford/groups/deho/building_compliance/shapefiles'
OAK_FP = '/oak/stanford/groups/deho/building_compliance/'
TIF_FP = {
    '2016': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2016/raw_tif', 
    '2018': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2018/raw_tif',
    '2020': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/raw_tif'
}

INFERENCES_DIR = {
    '2016': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2016/infer',
    '2018': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2018/infer',
    '2020': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/phase2_superresx2/infer/'
}
OUTPUT_FP = os.path.join(OAK_FP, 'outputs', 'cbg-inference-2020')
YEARS = ['2016', '2018', '2020']
TEMPORAL_OUTPUT_FP = os.path.join(OAK_FP, 'outputs', 'Temporal-Error-Analysis')

In [53]:
cbg_sj, zoning = load_data(shapefile_fp=SHAPEFILE_FP, oak_fp=OAK_FP)
building_footprints_osm = gpd.read_file(os.path.join(OUTPUT_FP,'osm_building_processed'))
cbg_footprints = gpd.read_file(os.path.join(OUTPUT_FP, 'cbg_aggregate'))

[INFO] Restricting zoning data to R-1, R-2 and R-M (ex. R-MH)


In [54]:
IMG_FP = {
    '2016': '/oak/stanford/groups/deho/building_compliance/san_jose_naip_512/2016/cf_tiles/',
    '2020': os.path.join(OAK_FP, 'san_jose_naip_512', 'phase2_superresx2')
}


In [55]:
# Load tile dict for 2020 only
tile_bounds_dict_all = {}
for year in ['2020']:
    OUTPUT_FP = os.path.join(OAK_FP, 'outputs', 'cbg-inference-{}'.format(year))
    with open(os.path.join(OUTPUT_FP, 'tile_bounds.json'), "r") as f:
        tile_bounds_dict = json.load(f)
        tile_bounds_dict_all[year] = tile_bounds_dict

In [56]:
# Load inferred buildings for all years
building_footprints_infer = {}
for year in YEARS:
    building_footprints_infer[year] = gpd.read_file(os.path.join(OAK_FP, 'outputs/cbg-inference-{}/inference_building_processed'.format(year)))

In [57]:
# Get CBG tile names
checked_tiles = pd.read_csv(os.path.join(OAK_FP, 'outputs', 'cbg-inference-2020', 'Error-Analysis', 'sampled_tiles_checked.csv'))
checked_tiles = checked_tiles.loc[checked_tiles['sample_rationale'] == 'CBG']
tile_names = checked_tiles['file'].unique()

In [17]:
checked_tiles.head()

Unnamed: 0,file,GEOID,sample_rationale,false_pos_area,false_neg_area,FP_detached_area,FP_attached_area,false_pos_perc,false_neg_perc,FP_detached_perc,...,n_osm(small),list_infer(small),Check,n_infer(small)_groundTruth,n_GT_missed(small),n_GT_missedOSM(small),Notes,n_infer(small)_groundTruth_2ndcheck,n_GT_missed(small)_2ndcheck,n_GT_missedOSM(small)_2ndcheck
0,m_3712141_nw_10_060_20200525_385,60855062033,CBG,0.0,0.0,0.0,0.0,,,,...,0,[],Nathan,0.0,0.0,0.0,,,,
3,m_3712141_nw_10_060_20200525_390,60855063011,CBG,150.668292,59.261588,16.483612,134.184679,26.938958,10.595762,2.947212,...,5,"[[(-121.9664834283035, 37.32168068446622)], [(...",Nathan,4.0,0.0,,DISCARD -- buildings are weird and large,4.0,0.0,
4,m_3712141_ne_10_060_20200525_359,60855016023,CBG,1760.713617,1216.862605,141.877614,1618.836004,24.687363,17.061905,1.989298,...,34,"[[(-121.87326994548212, 37.329435842327754)], ...",Nathan,23.0,8.0,2.0,,,,
5,m_3712141_ne_10_060_20200525_232,60855002003,CBG,5313.53453,1548.480732,4180.9733,1132.56123,48.152179,14.032603,37.888711,...,22,"[[(-121.89850293673905, 37.345966478096926)], ...",Nathan,39.0,6.0,11.0,part of tile has no OSM annotations,,,
6,m_3712142_sw_10_060_20200525_251,60855031272,CBG,0.0,0.0,0.0,0.0,,,,...,0,[],Nathan,0.0,0.0,0.0,,,,


In [58]:
tile_gdf = checked_tiles[['file', 'GEOID']].copy()
tile_gdf['geometry'] = tile_gdf.file.progress_apply(
        lambda name: get_bounds(tile_bounds_dict_all['2020'], name) if name in list(tile_bounds_dict_all['2020'].keys()) else None
)
tile_gdf = gpd.GeoDataFrame(tile_gdf, crs='EPSG:4326')

100%|██████████| 71/71 [00:00<00:00, 8556.61it/s]


In [46]:
# Visualize OSM and predictions on tiles
def visualize_tile_predvOSM_CF(tile_bounds_dict, building_footprints_infer, building_footprints_osm, 
                            file_name, img_fp, tif_fp, final_output_fp, year, save=False, small=True, 
                            cropping=False):
    # Get image, OSM and inference
    if os.path.exists(os.path.join(img_fp, 'train')):
        # For 2020 data which is split across train, val and test
        img_file = None
        for dirname in ['train', 'val', 'test']:
            dirpath = os.path.join(img_fp, dirname, 'images', '{}.npy'.format(file_name))
            if os.path.exists(dirpath):
                img_file = dirpath
    else:
        # For 2016 and 2018 data which is not split
        if year == '2020':
            img_file = os.path.join(img_fp, '{}.npy'.format(file_name))
        else:
            img_file = os.path.join(img_fp, '{}_{}.npy'.format(file_name, year))
            
    tile_img = np.load(img_file)
    tile_bounds = get_bounds(tile_bounds_dict, file_name)
    infer_tile = gpd.clip(building_footprints_infer, tile_bounds)
    osm_tile = gpd.clip(building_footprints_osm, tile_bounds)
    
    # Get raster for plotting purposes
    raster = rasterio.open(os.path.join(tif_fp, '{}.tif'.format(file_name)))
    t = from_bounds(*raster.bounds, tile_img.shape[0], tile_img.shape[1])
    
    # Black out everything beyond a given geometry
    if len(cropping) > 0:
        tile_bounds_poly = Polygon(tile_bounds)
        black_poly = tile_bounds.difference(cropping.to_crs('EPSG:4326').geometry.unary_union)
        black_poly = gpd.GeoDataFrame(geometry=[black_poly], crs='EPSG:4326')
    
    # Get shape boundaries
    infer_tile_ext = infer_tile.copy()
    infer_tile_ext.geometry = infer_tile_ext.geometry.exterior

    osm_tile_ext = osm_tile.copy()
    osm_tile_ext.geometry = osm_tile_ext.geometry.exterior
    
    # Get small OSM & Inference buildings
    infer_tile_small = infer_tile.loc[infer_tile.small == 1]
    osm_tile_small = osm_tile.loc[osm_tile.small == 1]
    
    infer_tile_small_ext = infer_tile_small.copy()
    infer_tile_small_ext.geometry = infer_tile_small_ext.geometry.exterior

    osm_tile_small_ext = osm_tile_small.copy()
    osm_tile_small_ext.geometry = osm_tile_small_ext.geometry.exterior
    
    # Plot inference vs OSM
    plt.clf()
    fig, ax = plt.subplots(figsize=(15, 15))
    rasterio.plot.show(np.moveaxis(tile_img, 2, 0),transform=t, ax=ax)
    if len(infer_tile_ext) > 0:
        infer_tile_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='red', zorder=2)
    if len(osm_tile_ext) > 0:
        osm_tile_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='blue', zorder=1)
    if len(cropping) > 0:
        black_poly.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='black', zorder=3)
    ax.axis('off')
    if save:
        plt.savefig(os.path.join(final_output_fp, '{}-comp_{}.png'.format(file_name, year)), bbox_inches='tight')
    else:
        plt.show()
    
    # Plot small buildings
    if small:
        plt.clf()
        fig, ax = plt.subplots(figsize=(15, 15))
        rasterio.plot.show(np.moveaxis(tile_img, 2, 0),transform=t, ax=ax)
        if len(infer_tile_small_ext) > 0:
            infer_tile_small_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='red', zorder=2)
        if len(osm_tile_small_ext) > 0:
            osm_tile_small_ext.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='blue', zorder=1)
        if len(cropping) > 0:
            black_poly.to_crs('EPSG:26910').plot(ax=ax, alpha=1, color='black', zorder=3)
        ax.axis('off')
        if save:
            plt.savefig(os.path.join(final_output_fp, '{}-small_{}.png'.format(file_name, year)), bbox_inches='tight')
        else:
            plt.show()
    plt.close()

In [49]:
# Generate 2016 visualizations
for year in ['2020']:
    _ = tile_gdf.to_crs('EPSG:26910').file.progress_apply(lambda file: visualize_tile_predvOSM_CF(
            tile_bounds_dict=tile_bounds_dict_all['2020'], # Use tile bounds from 2020
            building_footprints_infer=building_footprints_infer[year], # Use predictions for specific year
            building_footprints_osm=building_footprints_osm,
            file_name=file,
            img_fp=IMG_FP[year], # Use npy arrays for specific year
            tif_fp=TIF_FP['2020'], # Use tile bounds from 2020
            final_output_fp=os.path.join(TEMPORAL_OUTPUT_FP, 'CF_Visualizations'),
            save=True, small=True, 
            cropping=gpd.GeoDataFrame(geometry=[]), 
            year=year))

100%|██████████| 71/71 [22:09<00:00, 18.73s/it]


In [60]:
tile_gdf.head()

Unnamed: 0,file,GEOID,geometry
0,m_3712141_nw_10_060_20200525_385,60855062033,"POLYGON ((-121.98562 37.32182, -121.98562 37.3..."
3,m_3712141_nw_10_060_20200525_390,60855063011,"POLYGON ((-121.96828 37.32167, -121.96828 37.3..."
4,m_3712141_ne_10_060_20200525_359,60855016023,"POLYGON ((-121.87449 37.32696, -121.87449 37.3..."
5,m_3712141_ne_10_060_20200525_232,60855002003,"POLYGON ((-121.89852 37.34379, -121.89852 37.3..."
6,m_3712142_sw_10_060_20200525_251,60855031272,"POLYGON ((-121.83959 37.27853, -121.83959 37.2..."


In [61]:
# Generate tile centroids
tile_gdf['centroid'] = tile_gdf.geometry.progress_apply(
    lambda geom: geom.centroid
)

100%|██████████| 71/71 [00:00<00:00, 7391.30it/s]


In [63]:
tile_gdf[['file', 'centroid']].to_csv(os.path.join(TEMPORAL_OUTPUT_FP, 'tile_centroids.csv'), index=False)

In [None]:
tile = m_3712142_nw_10_060_20200525_173
x = np.load(os.path.join(IMG_FP['2016'], '{}_2016.npy'.format(tile)))
plt.imshow(tile)
plt.show()