# Error analysis on San Jose Census Block Groups

In [1]:
# Import functions
%run inference-functions.ipynb import load_data, get_bounds, visualize_tile_predvOSM, assign_cbgs_by_coverage

In [14]:
import json
import geopandas as gpd
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pyproj
from pyproj import Geod
import rasterio
import rasterio.plot
from rasterio.transform import from_bounds
import rasterio.features
import seaborn as sns
import shapely
from shapely.geometry import box, Polygon, Point
from shapely.ops import orient
from tqdm import tqdm
import pickle
import matplotlib.image as mpimg
import matplotlib.patches as patches

tqdm.pandas()

In [2]:
cf_fp = '/oak/stanford/groups/deho/building_compliance/CloudFactory/'
cf_exports = os.path.join(cf_fp, 'exports', 'annotations_toy')
cf_images = os.path.join(cf_fp, 'images')

In [3]:
with open(os.path.join(cf_images, "img_metadata.p"), "rb") as output_file:
    img_metadata = pickle.load(output_file)

In [33]:
img_metadata['28412011']

{'lons': [595862.25,
  595862.55,
  595862.85,
  595863.15,
  595863.45,
  595863.75,
  595864.05,
  595864.35,
  595864.65,
  595864.95,
  595865.25,
  595865.55,
  595865.85,
  595866.15,
  595866.45,
  595866.75,
  595867.05,
  595867.35,
  595867.65,
  595867.95,
  595868.25,
  595868.55,
  595868.85,
  595869.15,
  595869.45,
  595869.75,
  595870.05,
  595870.35,
  595870.65,
  595870.95,
  595871.25,
  595871.55,
  595871.85,
  595872.15,
  595872.45,
  595872.75,
  595873.05,
  595873.35,
  595873.65,
  595873.95,
  595874.25,
  595874.55,
  595874.85,
  595875.15,
  595875.45,
  595875.75,
  595876.05,
  595876.35,
  595876.65,
  595876.95,
  595877.25,
  595877.55,
  595877.85,
  595878.15,
  595878.45,
  595878.75,
  595879.05,
  595879.35,
  595879.65,
  595879.95,
  595880.25,
  595880.55,
  595880.85,
  595881.15,
  595881.45,
  595881.75,
  595882.05,
  595882.35,
  595882.65,
  595882.95,
  595883.25,
  595883.55,
  595883.85,
  595884.15,
  595884.45,
  595884.75,
  59

In [43]:
p = Point(img_metadata['28412011']['lons'][round(len(img_metadata['28412011']['lons'])/2)], img_metadata['28412011']['lats'][round(len(img_metadata['28412011']['lats'])/2)])

wgs84 = pyproj.CRS('EPSG:26910')
utm = pyproj.CRS('EPSG:4326')
project = pyproj.Transformer.from_crs(wgs84, utm, always_xy=True).transform

# convert
p_4326 = shapely.ops.transform(project, p)

In [44]:
print(p_4326)

POINT (-121.9180526816877 37.29847744935833)


In [None]:
37.2985436687817, -121.9181972799668 

In [None]:
 37.29847744935833, -121.9180526816877

In [80]:
df = pd.DataFrame(columns=['apn', 'year', 'geometry'])

for i in os.listdir(cf_exports):
    apn = i.split('.')[0]
    image = mpimg.imread(os.path.join(cf_images, 'round1', f'{apn}.png'))
    
    assert apn in img_metadata
    
    lons = img_metadata[apn]['lons']
    lats = img_metadata[apn]['lats']
    
    with open(os.path.join(cf_exports, i), 'r') as f:
        parcel_label = json.load(f)
        
        # get image height and width
        height = parcel_label['metadata']['system']['height']
        width = parcel_label['metadata']['system']['width']
        
        # height will not have any buffer
        assert len(lats)*2 == height
        buffer = width - len(lons)*2

        if parcel_label['annotated']:
            assert parcel_label["annotationsCount"] > 0
            assert parcel_label["annotationsCount"] == len(parcel_label['annotations'])
            show = False
#             fig, ax = plt.subplots()
#             ax.imshow(image)

            for label in parcel_label['annotations']:
                # ignore all labels that are "blank"
                if label['label'] == 'small_building':
                    # i think there are some mislabels -- small_buildings not having coordinate key
                    if 'coordinates' in label:
                        coords = label['coordinates']
                        assert len(coords) == 1

                        coords = coords[0]
                        
                        show = True
                        coords_raw0 = [(round(i['x']), round(i['y'])) for i in coords]
                        x = [round(i['x']) for i in coords]
                        min_x = min(x)
                        max_x = max(x)
                        y = [round(i['y']) for i in coords]
                        min_y = min(y)
                        max_y = max(y)
                        rect = patches.Rectangle((min_x, min_y), max_x-min_x, max_y-min_y, linewidth=1, edgecolor='r', facecolor='none')
#                         ax.add_patch(rect)
                        
                        
                        coords_raw = [(round(i['x']), round(i['y'])-len(lats)) for i in coords]
#                         print(coords_raw)
                        is_2016 = True
                        test_x = [i[0] > len(lons) for i in coords_raw]
                        test_y = [i[1] <= len(lats) for i in coords_raw]
                        
                        if all(test_x):
                            is_2016 = False
                            # if 2020, then adjust x by buffer
                            coords_raw = [(i[0]-len(lons)-buffer, i[1]) for i in coords_raw]
                        else:
                            if any(test_x):
                                print('Wrong behavior')
                        
                        # this is to ensure that the pixel count starts from 1 rather than 0
                        est_g0 = [i[0] > 3 for i in coords_raw] + [i[1] > 0 for i in coords_raw]
                        
                        assert all(test_y)
                        assert all(est_g0)
                        
                                
                        # convert pixel coords into 26910
#                         coords_26910 = [[lons[i[0]-1], lats[i[1]]-1] for i in coords_raw]
                        
    
                        """
                        There is a systematic error when we displace by 1 like above -- not sure why tbh!
                        - x-4 and keeping y as is leads to the most accurate behavior, but this is hacky
                        """
                        coords_26910 = [[lons[i[0]-4], lats[i[1]]] for i in coords_raw]
                        polygon_26910 = Polygon(coords_26910)
                        
                        wgs84 = pyproj.CRS('EPSG:26910')
                        utm = pyproj.CRS('EPSG:4326')
                        project = pyproj.Transformer.from_crs(wgs84, utm, always_xy=True).transform

                        # convert
                        polygon_4326 = shapely.ops.transform(project, polygon_26910)
                        
                        ## NEED TO CHECK IF COORDS ARE ACCURATE
#                         print(polygon_4326)
                        year = '2016' if is_2016 else '2020'
                        df.loc[len(df)] = [apn, year, polygon_4326]
#             if show:
#                 plt.show()
#             else:
#                 plt.clf()           
                            
        else:
            print(apn)
            assert parcel_label['annotationsCount'] == 0

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2


In [82]:
print(df.iloc[18]['geometry'])

POLYGON ((-121.9028749302738 37.31363996610875, -121.9028452509674 37.31358560985851, -121.9028996471418 37.31356988991337, -121.9029259414548 37.3136242147656, -121.9028749302738 37.31363996610875))


In [None]:
37.31363996610875 -121.9028749302738 

In [None]:
  37.30927522765177 -121.9411210900947

In [None]:
 37.29853195270901 -121.9181974477803

In [None]:
37.31444714883764 -121.896721866559 