In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np
from warnings import warn

In [None]:
def weighted_percentile (vals, percentiles, weights):
    if len(vals) != len(weights):
        raise ArgumentError('values and weights arrays are not same length!')

    nas = pd.isnull(vals) | pd.isnull(weights)

    nnas = np.sum(nas)
    if nnas > 0:
        warn(f'found {nnas} NAs in data, dropping them')

    vals = vals[~nas]
    weights = weights[~nas]

    weights = weights / np.sum(weights)
    sortIdx = np.argsort(vals)
    vals = vals.iloc[sortIdx]
    weights = weights.iloc[sortIdx]

    cumWeights = np.cumsum(weights)
    if not isinstance(percentiles, np.ndarray):
        percentiles = np.array(percentiles)
    percentiles = percentiles / 100

    # center weights, i.e. put the point value halfway through the weight
    # https://github.com/nudomarinero/wquantiles/blob/master/wquantiles.py
    centeredCumWeights = cumWeights - 0.5 * weights
    return np.interp(percentiles, centeredCumWeights, vals)

In [None]:
ipums_rents = pd.read_sql("""
SELECT serial, MIN(hhwt) AS hhwt, MIN(rentgrs) AS rentgrs FROM ipums
WHERE met2013 = 'Los Angeles-Long Beach-Anaheim, CA'
AND numprec >= 1
AND ownershp = 'Rented'
AND rentgrs > 200
AND unitsstr NOT IN ('Mobile home or trailer', 'Boat, tent, van, other')
GROUP BY serial
""", 'sqlite:////Volumes/Pheasant Ridge/IPUMS/scag_sorting_5yr/scag_sorting_5yr.db')

In [None]:
# compute rent like in ZORI - mean of middle quintile
low = weighted_percentile(ipums_rents.rentgrs, 40, ipums_rents.hhwt)
high = weighted_percentile(ipums_rents.rentgrs, 60, ipums_rents.hhwt)
mid_quintile = ipums_rents[(ipums_rents.rentgrs >= low) & (ipums_rents.rentgrs <= high)]
mean_rent = np.average(mid_quintile.rentgrs, weights=mid_quintile.hhwt)
print(f'{low=} {high=} {mean_rent=}')

In [None]:
# the Zillow-estimated rent in the LA Metro area throughout 2020 is 2545 +/- 1. Figure out a scale factor to make rents comparable to Zillow.
scale_factor = 2545 / mean_rent
print(f'{scale_factor=}')

In [None]:
weighted_percentile(ipums_rents.rentgrs, 50, ipums_rents.hhwt)