# Hockey Stick analysis

In [1]:
from pathlib import Path
from io import BytesIO

import requests
import pandas as pd
import numpy as np
import altair as alt

In [2]:
BASE_DIR = Path().cwd().parent
DATA_DIR = BASE_DIR.joinpath("data")
INPUT_DIR = DATA_DIR.joinpath("raw")
OUTPUT_DIR = DATA_DIR.joinpath("processed")
LOCAL_SHP_DIR = BASE_DIR.joinpath("shp")

X value

In [3]:
gardena_pred_medhhinc_35yo = 30000
gardena_medhhinc_2016 = 40000

Y value

In [4]:
fremont_pred_medhhinc_35yo = 46000
fremont_medhhinc_2016 = 120000

Offsets

Initital step at age 2

Formula:

```
Base Offset = 0.62

Sub Offset = 0.89

X = low opportunity neighborhood
Y = high opportunity neighborhood

initial move = (Y-X) * Base_Offset + X
```

In [5]:
SUB_OFFSET = 0.89

In [6]:
def calc_casual_move(x, y, offset = 1):
    BASE_OFFSET = 0.62
    return offset * ((y - x) * BASE_OFFSET) + x

Casual estimated start move (top dotted line in figure)

In [7]:
calc_casual_move(gardena_medhhinc_2016, fremont_medhhinc_2016)

89600.0

Bottom start move (bottom dotted line in figure) (whatever X is)

In [8]:
gardena_medhhinc_2016

40000

In [9]:
def estimate_income_gain(x, y, initial_offset = SUB_OFFSET):
    # what we'll subtract the initial_offset from on every pass
    MOVE_OFFSET = 0.025
    # our results 
    results = []
    # the ages we're measuring (2-23 years old)
    ages = [a for a in range(2, 24)]
    # our offset value set to the initial value pass to the function
    offset = initial_offset
    # begin loop to calc income gain/loss from move
    for age in ages:
        # setup result dict
        result = dict()
        result["age"] = age
        # only adjust the offset if we're not at the start i.e. not Age 2
        if age != 2:
            offset -= MOVE_OFFSET
        
        # calc pred gain/lose from move
        result["income"] = calc_casual_move(x, y, offset)
        # append to list to store result
        results.append(result)
    # return results after loop finishes
    return results

### Pred Income

In [10]:
predicted_income_moved_at_birth = calc_casual_move(gardena_pred_medhhinc_35yo, fremont_pred_medhhinc_35yo)
predicted_income_moved_at_birth

39920.0

In [11]:
gardena_pred_medhhinc_35yo

30000

In [12]:
prediction_data = pd.DataFrame(estimate_income_gain(gardena_pred_medhhinc_35yo, fremont_pred_medhhinc_35yo))
prediction_data.head()

Unnamed: 0,age,income
0,2,38828.8
1,3,38580.8
2,4,38332.8
3,5,38084.8
4,6,37836.8


In [13]:
width = 800
height = 600

rule_line = alt.Chart().mark_rule(
    strokeDash=[12, 6],
    strokeWidth=1,
    color="gray"
)

text_anno = alt.Chart().mark_text(
    baseline="middle",
    align="left",
    color="gray",
    fontSize=16,
    dx=(width/2) + 10
)

alt.Chart(prediction_data).mark_point().encode(
    alt.X('age',
        scale=alt.Scale(),
        axis=alt.Axis(title="Age of Child when Parents Move"),
    ),    
    alt.Y('income',
        scale=alt.Scale(
            zero=False, 
            domainMin=29000, 
            domainMax=41000
        ),
        axis=alt.Axis(format="$,f", title="Average Income at Age 35"),
    ),
) + rule_line.encode(
    y=alt.datum(predicted_income_moved_at_birth)
) + text_anno.encode(
    y=alt.datum(predicted_income_moved_at_birth),
    text=alt.datum("Fremont")
) + rule_line.encode(
    y=alt.datum(gardena_pred_medhhinc_35yo)
) + text_anno.encode(
    y=alt.datum(gardena_pred_medhhinc_35yo),
    text=alt.datum("Gardena")
).properties(
    title="Income gain from moving to a 'better' neighborhood",
    width=width, 
    height=height
)