## Predicting residential mobility 

In [209]:
import random
import math
import pandas as pd

In [231]:
station_attributes = {
    "year_open": 2019,
    "population": 10000, 
    "dwellings_total": 4000,
    "dwellings_occupied": 3700,
    "age": {
        "age_0_19": 0.15,
        "age_20_24": 0.05,
        "age_25_29": 0.05,
        "age_30_34": 0.08,
        "age_35_39": 0.08,
        "age_40_44": 0.08,
        "age_45_49": 0.08,
        "age_50_54": 0.08,
        "age_55_59": 0.08,
        "age_60_64": 0.07,
        "age_65_69": 0.06,
        "age_70_74": 0.05,
        "age_75_79": 0.04,
        "age_80_84": 0.03,
        "age_85_89": 0.02,
        "age_90_94": 0.01,
        "age_95_99": 0.004,
        "age_100_104": 0.006
    },
    "sex": {
        "sex_m": 0.51,
        "sex_f": 0.49
    },
    "low_inc": {
        "low_inc_yes": 0.2,
        "low_inc_no": 0.8
    },
    "marital": {
        "marital_yes": 0.4, # is married
        "marital_no": 0.6
    },
    "built_pre": {
        "built_pre_avgbedrooms": 1.7,
        "built_pre_singledetached": 0.4,
        "built_pre_rowtownsemi": 0.2,
        "built_pre_aptsmall": 0.2,
        "built_pre_aptlarge": 0.2
    },
    "built_change": {
        # these were done in 0%-100% in the base model, not 0-1
        "built_change_avgbedrooms": 20,
        "built_change_singledetached": 0,
        "built_change_rowtownsemi": 20,
        "built_change_aptsmall": 20,
        "built_change_aptlarge": 10
    }
}

In [196]:
def samplePerson():

    p_yes = station_attributes["low_inc"]["low_inc_yes"]
    r = random.random()
    low_inc = int(r < p_yes)

    p_yes = station_attributes["sex"]["sex_f"]
    r = random.random()
    sex = int(r < p_yes)

    age_probs = station_attributes["age"]
    age_group = random.choices(
        population=list(age_probs.keys()),
        weights=list(age_probs.values()),
        k=1
    )[0]
    parts = age_group.replace("age_", "").split("_")
    low, high = map(int, parts)
    age = random.randint(low, high)

    p_yes = station_attributes["marital"]["marital_yes"]
    r = random.random()
    marital = int(r < p_yes)
    if age_group == "age_0_19":
        marital = int(False)

    return {
        "age": age, # numeric
        "sex": sex, # 1 (female) or 0 (male)
        "low_inc": low_inc, # 1 (yes) or 0 (no)
        "marital": marital # 1 (married) or 0 (not married)
    }
    
samplePerson()

{'age': 25, 'sex': 0, 'low_inc': 0, 'marital': 1}

In [221]:
def simulateMovingIndividual(sample_person):

    b_intercept = 40.85782
    
    b_age = -0.0267
    x_age = sample_person["age"]
    
    b_sex = -0.02405
    x_sex = sample_person["sex"]

    b_low_inc = 0.28904
    x_low_inc = sample_person["low_inc"]

    b_marital = -0.02962
    x_marital = sample_person["marital"]

    b_year = -0.01969
    x_year = station_attributes["year_open"]

    b_built_pre_singledetached = 0.00027
    x_built_pre_singledetached = (
        station_attributes["dwellings_total"] * station_attributes["built_pre"]["built_pre_singledetached"]
    )

    b_built_pre_rowtownsemi = 0.00015
    x_built_pre_rowtownsemi = (
        station_attributes["dwellings_total"] * station_attributes["built_pre"]["built_pre_rowtownsemi"]
    )

    b_built_pre_aptsmall = 0.00015
    x_built_pre_aptsmall = (
        station_attributes["dwellings_total"] * station_attributes["built_pre"]["built_pre_aptsmall"]
    )

    b_built_pre_aptlarge = 0.00015
    x_built_pre_aptlarge = (
        station_attributes["dwellings_total"] * station_attributes["built_pre"]["built_pre_aptlarge"]
    )

    b_built_change_avgbedrooms = 0.01673
    x_built_change_avgbedrooms = (
        station_attributes["built_change"]["built_change_avgbedrooms"]
    )

    b_built_change_singledetached = 0.00212
    x_built_change_singledetached = (
        station_attributes["built_change"]["built_change_singledetached"]
    )

    b_built_change_rowtownsemi = -0.00018
    x_built_change_rowtownsemi = (
        station_attributes["built_change"]["built_change_rowtownsemi"]
    )

    b_built_change_aptsmall = 0.00013
    x_built_change_aptsmall = (
       station_attributes["built_change"]["built_change_aptsmall"]
    )

    b_built_change_aptlarge = 0.00035
    x_built_change_aptlarge = (
        station_attributes["built_change"]["built_change_aptlarge"]
    )
    
    z = (
        b_intercept + 
        b_low_inc * x_low_inc + 
        b_age * x_age + 
        b_year * x_year + 
        b_sex * x_sex + 
        b_marital * x_marital + 
        b_built_pre_singledetached * x_built_pre_singledetached + 
        b_built_pre_rowtownsemi * x_built_pre_rowtownsemi + 
        b_built_pre_aptsmall * x_built_pre_aptsmall + 
        b_built_pre_aptlarge * x_built_pre_aptlarge +
        b_built_change_avgbedrooms * x_built_change_avgbedrooms +
        b_built_change_singledetached * x_built_change_singledetached + 
        b_built_change_rowtownsemi * x_built_change_rowtownsemi + 
        b_built_change_aptsmall * x_built_change_aptsmall + 
        b_built_change_aptlarge * x_built_change_aptlarge
    )
    
    p = 1 / (1 + math.exp(-z))

    r = random.random()
    move = int(r < p)
    
    return p, move

simulateMovingIndividual(samplePerson())


(0.39368226831805, 0)

In [229]:
results = []
i = 0
while i < station_attributes["population"]:
    i += 1
    sample_person = samplePerson()
    move = simulateMovingIndividual(sample_person)
    result = sample_person
    result["move_p"] = move[0]
    result["move_b"] = move[1]
    results.append(result)
df = pd.DataFrame(results)
df

Unnamed: 0,age,sex,low_inc,marital,move_p,move_b
0,44,1,1,0,0.789680,1
1,32,1,0,0,0.794842,0
2,41,1,0,0,0.752885,1
3,56,0,0,1,0.669957,1
4,75,0,1,1,0.620043,0
...,...,...,...,...,...,...
9995,55,0,1,0,0.741418,0
9996,19,0,0,0,0.848837,1
9997,24,0,0,0,0.830899,1
9998,10,0,0,0,0.877160,1


In [230]:
by_low_inc = (
    df.groupby("low_inc", as_index=False)
      .agg(
          total_people=("move_b", "count"),
          total_moved=("move_b", "sum"),
          percent_moved=("move_b", "mean")
      )
)
by_low_inc

Unnamed: 0,low_inc,total_people,total_moved,percent_moved
0,0,8025,5734,0.714517
1,1,1975,1532,0.775696
