In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
# Download the October 2019 draft and unzip
# https://app.box.com/s/j0w6wibvrtk5wfcvedwj8ud2yhhng337

v1_file_path = 'file/path/here'

v1 = gpd.read_file(
    v1_file_path
).dropna(subset=['geometry', 'PHASE_2_UID'])

In [3]:
# Download the January 2020 draft and unzip
# https://app.box.com/s/af8defnrglqdwnqihhiulnnvozvg5164

v2_file_path = 'file/path/here'

v2 = gpd.read_file(
    v2_file_path
).dropna(subset=['geometry'])

In [4]:
# areas in v1 that have no id in v2

v1.PHASE_2_UID.astype(int).isin(v2.PHASE_2_UI).value_counts().rename({
    True: 'ID in v2',
    False: 'ID does not appear in v2'
})

ID in v2                    235908
ID does not appear in v2        15
Name: PHASE_2_UID, dtype: int64

In [5]:
dat_version_merge = v2.set_index('PHASE_2_UI').join(
    v1.assign(
        PHASE_2_UI = lambda x: x['PHASE_2_UID'].astype(int)
    ).set_index('PHASE_2_UI')[[
        'NEW_ZONE_V4', 'BENCHMARK_ZONE', 'ZONING_ZTY'
    ]],
    
    how='inner'
)

In [6]:
dat_version_diff = dat_version_merge.NEW_ZONE_V4.value_counts().rename(
    'v1'
).to_frame().join(
    dat_version_merge.LDC_ZONE_S.value_counts().rename('v2'),
    how='outer'
).assign(
    version_diff = lambda x: x['v2'] - x['v1'],
    version_diff_pct_change = lambda x: (x['v2'] - x['v1']) / x['v1']
).sort_values('version_diff')

In [7]:
dat_version_diff.head(10)

Unnamed: 0,v1,v2,version_diff,version_diff_pct_change
R4,17272.0,12352.0,-4920.0,-0.284854
R2A,118719.0,115232.0,-3487.0,-0.029372
RM1,7464.0,6229.0,-1235.0,-0.165461
MS3,1097.0,244.0,-853.0,-0.777575
MS2B,1317.0,471.0,-846.0,-0.642369
MU5A,1833.0,1039.0,-794.0,-0.43317
MU5B,1391.0,1050.0,-341.0,-0.245147
RR,2300.0,1992.0,-308.0,-0.133913
RM1-HD,303.0,156.0,-147.0,-0.485149
MU3,633.0,528.0,-105.0,-0.165877


In [8]:
"""
https://app.box.com/s/eh45jzf8tocyk7bcjp799s284e8fl9rj >> ABOUT field

R4
Missing middle residential house scale that allows four units on a property

RM1
Missing middle that allows 6 units on a property
"""

dat_version_diff.loc[['R4', 'RM1']][[
    'v1',
    'v2'
]].sum().astype(int).rename('total').to_frame().assign(
    change = lambda x: x['total'].diff(),
    pct_change = lambda x: x['total'].pct_change().apply('{:.2%}'.format)
)

Unnamed: 0,total,change,pct_change
v1,24736,,nan%
v2,18581,-6155.0,-24.88%


In [9]:
# Zone classifications that increased in number

dat_version_diff.sort_values('version_diff', ascending=False).head(10)

Unnamed: 0,v1,v2,version_diff,version_diff_pct_change
R2B,18803.0,24277.0,5474.0,0.291124
R2C,12394.0,14297.0,1903.0,0.153542
R3,300.0,1771.0,1471.0,4.903333
F25,4784.0,6116.0,1332.0,0.278428
R1,118.0,323.0,205.0,1.737288
RM2,2115.0,2195.0,80.0,0.037825
TOD,1157.0,1165.0,8.0,0.006914
R2C-HD,1.0,8.0,7.0,7.0
R2B-H,113.0,119.0,6.0,0.053097
P,2347.0,2351.0,4.0,0.001704


In [10]:
# Count number of times one zone converted to another

dat_version_merge.assign(
    zones_match = lambda x: x['NEW_ZONE_V4'] == x['LDC_ZONE_S'],
    conversion = lambda x: x['NEW_ZONE_V4'] + ' > ' + x['LDC_ZONE_S']
).query(
    'zones_match == False'
).conversion.value_counts().to_frame()

Unnamed: 0,conversion
R4 > R2B,6366
MU5B-A > MU5B-Q,2792
R2A > R4,2382
MU5A-A > MU5A-Q,1940
MU3-A > MU3-Q,1524
R4 > R3,1461
R2A > R2C,1091
MU5A > MU5A-V,780
RM1 > R4,776
R2B > R4,637
