In [None]:
import matplotlib.pyplot as pyplot
from IPython.core.display import HTML
display(HTML("""
<style>
table.dataframe {
    margin-left: auto !important;
    margin-right: auto !important;
    /* optional: keep the table width no wider than its contents */
    width: auto;
}
</style>
"""))
import pandas as pd
pd.set_option("display.max_columns", 10)
pd.set_option("display.max_rows", 15)
import warnings
warnings.filterwarnings("ignore",message="Geometry is in a geographic CRS.*centroid.*")

import geopandas as gpd
import numpy as np
from normalization import normalize_od, normalized_remained, normalized_moved, top_k_destinations
from viz import build_od_map_flat, build_od_map_pdk

## Load and normalize OD matrix

In [None]:
od_matrix = pd.read_csv('OD_BASELINE_MAR_31_HOMES.csv')

# NORMALIZED OD WITH DP BUDGET OF eps=2
od_perc = normalize_od(od_matrix,'zipcode_origin', 'zipcode_dest', 'count', diff_privacy_eps=2)
# PERCENTAGES THAT REMAINED NORMALIZED WITH NO DP
rem_perc = normalized_remained(od_matrix,'zipcode_origin', 'zipcode_dest', 'count')

# PERCENTAGES THAT MOVED NORMALIZED WITH NO DP
moved_perc = normalized_moved(od_matrix,'zipcode_origin', 'zipcode_dest', 'count', diff_privacy_eps=2)

# Top-5 destination ZIP codes by (possibly-noisy) percentage
top5_dest = top_k_destinations(moved_perc, 'zipcode_dest', 'percentage', k=5)

## Zipcode data for CA

In [None]:
california_zipcodes = gpd.read_file('zip_codes_ca.geojson')
california_zipcodes = california_zipcodes.loc[california_zipcodes.geography_id.isin(od_matrix.zipcode_dest)].reset_index(drop=True)

## Summary and map

In [None]:
od_perc.sort_values('percentage', ascending=False).set_index(['zipcode_origin', 'zipcode_dest']).head(8)

In [None]:
flat_map = build_od_map_flat(
    polygons=california_zipcodes,
    od_df=od_perc,   # raw counts table
    origin="zipcode_origin",
    dest="zipcode_dest",
    weight="percentage",
    id_col="geography_id",
)
flat_map

In [None]:
# ----------------- build map (no legend) --------------------------------
deck = build_od_map_pdk(
    california_zipcodes, moved_perc,
    origin="zipcode_origin", dest="zipcode_dest",
    pct="percentage", id_col="geography_id",
    min_flow=2,
)
deck