In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from bisect import bisect
import matplotlib.pyplot as plt
from functools import cache

sns.set_theme()
%matplotlib inline

In [None]:
df = pd.read_csv('final_results_1000.csv')

# Constants

In [None]:
downpayment = 0.20     # percent of price
interest = 0.05        # percent of price
mortgage_duration = 30 # years
rofr = 0.1             # percent reduction in sales price
spillover = 0.1        # percent of sales that don't go through
off_market = 0.1       # percent of sales off market
sfh = 1                # include single family homes in OPA? 1 = yes, 0 = no
dti = 0.36             # debt-to-income ratio that's required to get the mortgage approved
ami = 149_600          # average median income

funding = 10_000_000   # pot of funding (dollars)

In [None]:
n = len(df)
num_sims = len(set(df.simulation))
num_per_sim = n / num_sims

In [None]:
df.columns = [c.lower().replace(' ', '_') for c in df.columns]

# Computations

In [None]:
df['p_ami'] = df.income / ami * 100
df['mortgage'] = (1 - downpayment) * ((1 + interest) ** mortgage_duration) * (1 - rofr) * df.price / mortgage_duration
df['viable'] = (df.mortgage / df.income <= dti)

df['eligible_spillover'] = 1 - (np.random.random(n) < spillover) # 0 = not eligible, 1 = still eligible
df['eligible_market'] = 1 - (np.random.random(n) < off_market)   # 0 = not eligible, 1 = still eligible
df['eligible_sfh'] = np.ones(n) # TODO: update this

df['viable_eligible'] = df.eligible_spillover * df.eligible_market * df.eligible_sfh * df.viable

In [2]:
def get_category_coarse(x):
    idx = bisect([80, 160], x)
    cats = ['Low Income', 'Middle Income', 'Upper Income']
    return cats[idx]

def get_category(x):
    idx = bisect([30, 50, 80, 160], x)
    cats = ['Extremely Low Income', 'Very Low Income', 'Low Income', 'Middle Income', 'Upper Income']
    return cats[idx]

In [None]:
df['income_category'] = df.p_ami.apply(get_category)
df['income_category_coarse'] = df.p_ami.apply(get_category_coarse)

# Groups

In [None]:
ve = df.groupby('simulation').mean().viable_eligible
m, s = ve.mean(), ve.std()

print(f'viable & eligible: {m:.2%}')
print(f'95% confidence interval (viable & eligible): {m - 2*s:.2%}, {m + 2*s:.2%}')

# Funding optimization

First we filter to the homes that weren't eligible for whom the barrier to purchase is income...

In [None]:
potential = df[df.eligible_spillover * df.eligible_market * df.eligible_sfh == 1]
potential = potential[potential.viable_eligible == 0]

In [None]:
avg_income_unaffordable = potential.groupby('simulation').mean().income
m, s = avg_income_unaffordable.mean(), avg_income_unaffordable.std()

print(f"avg income for people who couldn't buy: {m}")
print(f"95% confidence interval (avg income for people who couldn't buy): {m - 2*s:.0f}, {m + 2*s:.0f}")

...next we try to allocate funding to these folks

## Surgical optimization

We calculate the gap in income needed to get to the viable threshold and fill that.

In [None]:
sim_ids = set(potential.simulation)

In [None]:
def count_sum_to_thresh(df, thresh):
    a = df.gap
    for i in range(len(a)):
        if sum(a[:i]) > thresh:
            break
    num_helped = i - 1
    
    return num_helped, df.iloc[:num_helped]

In [None]:
@cache
def surgical(funding):
    num_helped = []
    people_helped = []

    for sid in sim_ids:
        x = potential[potential.simulation == sid]
        x = x.sort_values('gap')
        nh, ph = count_sum_to_thresh(x, funding)

        num_helped.append(nh)
        people_helped.append(ph)

    num_helped = np.array(num_helped)
    people_helped = pd.concat(people_helped)
    
    return num_helped, people_helped

## Surgical equitable optimization
AMI ≤ 80%

In [None]:
@cache
def surgical_equitable(funding):
    num_helped = []
    people_helped = []

    for sid in sim_ids:
        x = potential[potential.simulation == sid]
        x = x[x.p_ami < 80]
        x = x.sort_values('gap')
        
        nh, ph = count_sum_to_thresh(x, funding)

        num_helped.append(nh)
        people_helped.append(ph)

    num_helped = np.array(num_helped)
    people_helped = pd.concat(people_helped)
    
    return num_helped, people_helped

## Thanos model

In [None]:
def count_sum_to_thresh_thanos(a, thresh, full_df):
    for i in range(len(a)):
        if sum(a[:i]) > thresh:
            break
    num_helped = i - 1
    
    return num_helped, full_df.iloc[:num_helped]

In [None]:
@cache
def thanos_equitable(funding):
    num_helped = []
    people_helped = []

    for sid in sim_ids:
        x = potential[potential.simulation == sid]
        x = x[x.p_ami < 80]
        x = x.sample(frac = 0.5) # thanos
        x = x.sort_values('gap')
        a = x.gap
        a = a / 0.63 # https://web.archive.org/web/20200321115234id_/https://www.innovations.harvard.edu/sites/default/files/hpd_0202_stegman.pdf
        
        nh, ph = count_sum_to_thresh_thanos(a, funding, x)

        num_helped.append(nh)
        people_helped.append(ph)

    num_helped = np.array(num_helped)
    people_helped = pd.concat(people_helped)
    
    return num_helped, people_helped

## Thanos optimization

In [None]:
def thanos(funding):
    num_helped = []
    people_helped = []

    for sid in sim_ids:
        x = potential[potential.simulation == sid]
        x = x.sample(frac = 0.5) # thanos
        x = x.sort_values('gap')
        a = x.gap
        a = a / 0.63 # https://web.archive.org/web/20200321115234id_/https://www.innovations.harvard.edu/sites/default/files/hpd_0202_stegman.pdf
        
        nh, ph = count_sum_to_thresh_thanos(a, funding, x)

        num_helped.append(nh)
        people_helped.append(ph)

    num_helped = np.array(num_helped)
    people_helped = pd.concat(people_helped)
    
    return num_helped, people_helped

# Plotting

In [None]:
potential = df[df.eligible_spillover * df.eligible_market * df.eligible_sfh == 1]
potential = potential[potential.viable_eligible == 0]
potential['gap'] = potential.mortgage / dti - potential.income

In [None]:
funding_options = [0, 2_000_000, 5_000_000, 10_000_000, 20_000_000]

In [None]:
d = {
    'Surgical': [surgical(f)[0].mean() for f in funding_options],
    'Surgical (Equitable Focus)': [surgical_equitable(f)[0].mean() for f in funding_options],
    'Human': [thanos(f)[0].mean() for f in funding_options],
    'Human (Equitable Focus)': [thanos_equitable(f)[0].mean() for f in funding_options]
}

In [None]:
d['funding'] = funding_options

In [None]:
plt.rcParams['figure.figsize'] = [10, 5]

sns.lineplot(data=pd.DataFrame(d).set_index('funding'), markers=True)

plt.xlabel('funding')
plt.ylabel('number of renters who need funding that can now afford')
plt.gcf().patch.set_facecolor('white')
plt.xticks(funding_options)

plt.savefig('g.png', dpi=300, bbox_inches='tight')

In [None]:
def perc_low_income(people_helped):
    perc_helped = people_helped.income_category.value_counts() / potential.income_category.value_counts() * 100
    return perc_helped['Low Income']

In [None]:
d = {
    'funding': funding_options,
    'Surgical': [perc_low_income(surgical(f)[1]) for f in funding_options],
    'Surgical (Equitable Focus)': [perc_low_income(surgical_equitable(f)[1]) for f in funding_options],
    'Human': [perc_low_income(thanos(f)[1]) for f in funding_options],
    'Human (Equitable Focus)': [perc_low_income(thanos_equitable(f)[1]) for f in funding_options]
}

In [None]:
for k in ('Surgical', 'Surgical (Equitable Focus)', 'Human', 'Human (Equitable Focus)'):
    d[k][0] = 0

plt.rcParams['figure.figsize'] = [10, 5]

sns.lineplot(data=pd.DataFrame(d).set_index('funding'), markers=True)

plt.xlabel('funding')
plt.ylabel('% of low income renters who need funding & can afford')
plt.gcf().patch.set_facecolor('white')
plt.xticks(funding_options)

plt.savefig('g.png', dpi=300, bbox_inches='tight')

# Matrix

In [None]:
# DO NOTHING

downpayment = 0.20     # percent of price
interest = 0.055       # percent of price
mortgage_duration = 30 # years
rofr = 0               # percent reduction in sales price
spillover = 0          # percent of sales that don't go through
off_market = 0.5       # percent of sales off markett
dti = 0.36             # debt-to-income ratio that's required to get the mortgage approved
ami = 149_600          # average median income
funding = 2_000_000    # pot of funding (dollars)

df = pd.read_csv('final_results_1000.csv')
n = len(df)
sim_ids = set(df.simulation)
num_sims = len(sim_ids)
num_per_sim = n / num_sims

df['p_ami'] = df.income / ami * 100
df['mortgage'] = (1 - downpayment) * ((1 + interest) ** mortgage_duration) * (1 - rofr) * df.price / mortgage_duration
df['viable'] = (df.mortgage / df.income <= dti)

df['eligible_spillover'] = 1 - (np.random.random(n) < spillover) # 0 = not eligible, 1 = still eligible
df['eligible_market'] = 1 - (np.random.random(n) < off_market)   # 0 = not eligible, 1 = still eligible
df['eligible_sfh'] = np.ones(n) # TODO: update this

df['viable_eligible'] = df.eligible_spillover * df.eligible_market * df.eligible_sfh * df.viable
df['income_category'] = df.p_ami.apply(get_category_coarse)

potential = df[df.eligible_spillover * df.eligible_market * df.eligible_sfh == 1]
potential = potential[potential.viable_eligible == 0]
potential['gap'] = potential.mortgage / dti - potential.income

nh, ph = thanos(funding)
nhe, phe = thanos_equitable(funding)

no_funding = df.groupby('simulation').sum().viable_eligible

p_ev = (no_funding + nh).mean() / num_per_sim


In [None]:
potential.income_category.value_counts()

In [None]:
df.income_category.value_counts()

In [None]:
perc_low_income(ph)

# 🪦

In [None]:
potential = df[df.eligible_spillover * df.eligible_market * df.eligible_sfh == 1]
potential = potential[potential.viable_eligible == 0]
potential['gap'] = potential.mortgage / dti - potential.income

In [None]:
num_helped, people_helped = thanos(funding)

In [None]:
m, s = num_helped.mean(), num_helped.std()

print(f"num helped with ${funding}: {m}")
print(f"% helped with ${funding}: {m / (n / len(sim_ids)):.2%}")
print(f"95% confidence interval (num helped with ${funding}): {m - 2*s:.0f}, {m + 2*s:.0f}")

In [None]:
m, s = people_helped.income.mean(), people_helped.income.std()

print(f"avg income of people helped with funding: {m:.0f}")
print(f"95% confidence interval (avg income of people helped with funding): {m - 2*s:.0f}, {m + 2*s:.0f}")

In [None]:
(people_helped.income_category.value_counts() / potential.income_category.value_counts() * 100).sort_values()