# Unmatched parcels

A significant number of parcels did not match between the ZTRAX and the GP16 data, possibly due to parcel splits. Make a table documenting this, and spot check them to figure out reasons.

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np
import geopandas as gp
import matplotlib.pyplot as plt

DB_URI = 'postgresql://matthewc@localhost/matthewc'

In [None]:
county_counts = pd.read_sql('''
SELECT "Main_County", count(puma) AS nonnull_puma, count(*) AS total
    FROM diss.zasmt
    WHERE "Building_PropertyLandUseStndCode" IN ('RR101', 'RR999', 'VL101') -- RR are SFH, VL101 is residential vacant
    GROUP BY "Main_County";
''', DB_URI)

In [None]:
county_counts = county_counts.append({
    'Main_County': 'Total',
    'nonnull_puma': county_counts.nonnull_puma.sum(),
    'total': county_counts.total.sum()
}, ignore_index=True)

In [None]:
county_counts['null_puma'] = county_counts.total - county_counts.nonnull_puma

In [None]:
county_counts['percent_null'] = (county_counts.null_puma / county_counts.total * 100).round(1).astype('str') + '%'

In [None]:
county_counts['Main_County'] = county_counts.Main_County.str.title()

In [None]:
county_counts

In [None]:
res = county_counts[['Main_County', 'total', 'null_puma', 'percent_null']].copy()
res['total'] = res.total.apply(lambda f: f'{f:,d}')
res['null_puma'] = res.null_puma.apply(lambda f: f'{f:,d}')
res = res.rename(columns={
    'Main_County': 'County',
    'total': 'Total single-family parcels',
    'null_puma': 'Unmatched single-family parcels',
    'percent_null': 'Percent unmatched'
})

res

In [None]:
print(res.to_latex(index=False))