In [None]:
%load_ext autoreload
%autoreload 2 

import maup
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from ast import literal_eval
from tqdm import tqdm
import warnings
from shapely.geometry import MultiPolygon
from collections import defaultdict

In [None]:
warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning)

In [None]:
bg_2010_path = '../data/tl_2010_55_bg10'
bg_2020_path = '../data/tl_2020_55_bg'
ward_2020_path = '../data/WI_2020_wards'
submissions_path = '../data/wi_submissions_20210820_no_media_markets.csv'
crs = '+proj=utm +zone=16 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'

In [None]:
submissions_df = pd.read_csv(submissions_path).rename(columns={'Unnamed: 0': 'id'}).set_index('id')

In [None]:
submissions_df['tiles'] = submissions_df['tiles'].apply(literal_eval)

In [None]:
bg_2010_gdf = gpd.read_file(bg_2010_path).set_index('GEOID10').to_crs(crs)
bg_2010_gdf.index = bg_2010_gdf.index.astype(str)

In [None]:
bg_2020_gdf = gpd.read_file(bg_2020_path).set_index('GEOID')

In [None]:
ward_gdf = gpd.read_file(ward_2020_path).set_index('Code-2').to_crs(crs)
ward_gdf.index = ward_gdf.index.astype(str)

In [None]:
with maup.progress():
  #bg_to_ward = maup.assign(bg_2010_gdf, ward_gdf)#, area_cutoff=0.1 * ward_gdf.geometry.area.min())
  #ward_to_bg_single = maup.assign(ward_gdf, bg_2010_gdf)
  inter = maup.intersections(ward_gdf, bg_2010_gdf)

ward_to_bg = defaultdict(set)
for bg, ward in bg_to_ward.items():
  ward_to_bg[ward].add(bg)
for ward, bg in ward_to_bg_single.items():
  ward_to_bg[ward].add(bg)

In [None]:
ward_to_bg = defaultdict(set)
for (ward, bg), geom in inter.items():
  if geom.area > 0.01 * ward_gdf.loc[ward].geometry.area:
    ward_to_bg[ward].add(bg)

In [None]:
ward_gdf['idx'] = ward_gdf.index

In [None]:
fail_count = 0
for idx, tiles in tqdm(submissions_df['tiles'].items()):
  if not tiles:
    continue
  if len(tiles[0]) != 12:
    # Wards.
    coi_geom = ward_gdf[ward_gdf.index.isin(tiles)]
    
    fig, ax = plt.subplots(dpi=150)
    mapped_bgs = set.union(*(ward_to_bg[ward] for ward in tiles))
    bg_2010_gdf.loc[bg_2010_gdf.index.isin(mapped_bgs)].plot(ax=ax, color='#ccc', alpha=0.5, edgecolor='black', linewidth=1.5)
    coi_geom.plot(ax=ax, alpha=0.6, column='idx', edgecolor='red', linewidth=1)
    ax.set_title('Mapping')
    plt.show()
    plt.close()    
    
    coi_dissolved = maup.autorepair(coi_geom.dissolve())
      
    try:
      if isinstance(coi_dissolved.iloc[0], MultiPolygon):
        #print(coi_dissolved.iloc[0].geometry)
        sub_gdf = gpd.GeoDataFrame({'geometry': list(coi_dissolved.iloc[0].geoms)})
        sub_gdf.crs = coi_geom.crs
        cutoff = 0.005 * sub_gdf.area.sum()
        assignment = maup.assign(bg_2010_gdf, sub_gdf, area_cutoff=cutoff)
      else:
        cutoff = 0.005 * coi_dissolved.area.sum()
        assignment = maup.assign(bg_2010_gdf, coi_dissolved, area_cutoff=cutoff)
        
      fig, ax = plt.subplots(dpi=150)
      bg_2010_gdf.loc[bg_2010_gdf.index.isin(assignment[~assignment.isna()].index)].plot(ax=ax, color='#ccc', alpha=0.5, edgecolor='black', linewidth=1.5)
      coi_geom.plot(ax=ax, alpha=0.6, column='idx', edgecolor='red', linewidth=1)
      ax.set_title('Dissolving')
      plt.show()
      plt.close()
      
    except TypeError as ex:
      print('Failed to maup.\n', ex)
      fail_count += 1

In [None]:
len('551332035004')