###  urbanchoice_interaction

#### 导入

In [9]:

# %load_ext autoreload
# %autoreload 2
import models, utils
import orca

orca.run([
    "rsh_simulate",              # residential sales hedonic
    "nrh_simulate",              # non-residential rent hedonic

    "households_relocation",     # households relocation model
#     "hlcm_simulate",            # households location choice
#     "households_transition",     # households transition

#     "jobs_relocation",           # jobs relocation model
#     "elcm_simulate",             # employment location choice
#     "jobs_transition",           # jobs transition

#     "feasibility",               # compute development feasibility
#     "residential_developer",     # build residential buildings
#     "non_residential_developer", # build non-residential buildings
], iter_vars=[2010])


Running iteration 1 with iteration value 2010
Running step 'rsh_simulate'
count    1.407800e+05
mean     4.271533e+03
std      3.744239e+05
min      1.204550e-04
25%      1.899947e+02
50%      3.179589e+02
75%      5.150183e+02
max      1.210341e+08
dtype: float64
Time to execute step 'rsh_simulate': 0.45 s
Running step 'nrh_simulate'
Filling column job_category with value service (331 values)
count    10803.000000
mean        21.214628
std          6.859405
min          6.841714
25%         17.920246
50%         21.384064
75%         25.575510
max         47.570487
dtype: float64
Time to execute step 'nrh_simulate': 0.56 s
Running step 'households_relocation'
Total agents: 345588
Total currently unplaced: 0
Assinging for relocation...
Total currently unplaced: 17279
Time to execute step 'households_relocation': 0.06 s
Total time to execute iteration 1 with iteration value 2010: 1.07 s


### 需要的函数

In [17]:
# 定义一些需要的函数
def to_frame(tables, cfg, additional_columns=[]):
    cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
    tables = [t for t in tables if t is not None]
    columns = misc.column_list(tables, cfg.columns_used()) + additional_columns
    if len(tables) > 1:
        df = orca.merge_tables(target=tables[0].name,
                               tables=tables, columns=columns)
    else:
        df = tables[0].to_frame(columns)
    df = deal_with_nas(df)
    return df


def yaml_to_class(cfg):
    import yaml
    model_type = yaml.safe_load(open(cfg))["model_type"]
    return {
        "regression": RegressionModel,
        "segmented_regression": SegmentedRegressionModel,
        "discretechoice": MNLDiscreteChoiceModel,
        "segmented_discretechoice": SegmentedMNLDiscreteChoiceModel
    }[model_type]
def get_run_filename():
    return os.path.join(misc.runs_dir(), "run%d.h5" % misc.get_run_number())


def change_store(store_name):
    orca.add_injectable(
        "store",
        pd.HDFStore(os.path.join(misc.data_dir(), store_name), mode="r"))


def change_scenario(scenario):
    assert scenario in orca.get_injectable("scenario_inputs"), \
        "Invalid scenario name"
    print("Changing scenario to '%s'" % scenario)
    orca.add_injectable("scenario", scenario)


def conditional_upzone(scenario, attr_name, upzone_name):
    scenario_inputs = orca.get_injectable("scenario_inputs")
    zoning_baseline = orca.get_table(
        scenario_inputs["baseline"]["zoning_table_name"])
    attr = zoning_baseline[attr_name]
    if scenario != "baseline":
        zoning_scenario = orca.get_table(
            scenario_inputs[scenario]["zoning_table_name"])
        upzone = zoning_scenario[upzone_name].dropna()
        attr = pd.concat([attr, upzone], axis=1).max(skipna=True, axis=1)
    return attr


def enable_logging():
    from urbansim.utils import logutil
    logutil.set_log_level(logutil.logging.INFO)
    logutil.log_to_stream()


def deal_with_nas(df):
    df_cnt = len(df)
    fail = False

    df = df.replace([np.inf, -np.inf], np.nan)
    for col in df.columns:
        s_cnt = df[col].count()
        if df_cnt != s_cnt:
            fail = True
            print("Found %d nas or inf (out of %d) in column %s" %
                  (df_cnt - s_cnt, df_cnt, col))

    assert not fail, "NAs were found in dataframe, please fix"
    return df


def fill_nas_from_config(dfname, df):
    df_cnt = len(df)   #返回的是df的行数
    fillna_config = orca.get_injectable("fillna_config")
    fillna_config_df = fillna_config[dfname]
    for fname in fillna_config_df:
        filltyp, dtyp = fillna_config_df[fname]
        s_cnt = df[fname].count()
        fill_cnt = df_cnt - s_cnt
        if filltyp == "zero":
            val = 0
        elif filltyp == "mode":
            val = df[fname].dropna().value_counts().idxmax()
        elif filltyp == "median":
            val = df[fname].dropna().quantile()
        else:
            assert 0, "Fill type not found!"
        print("Filling column {} with value {} ({} values)"
              .format(fname, val, fill_cnt))
        df[fname] = df[fname].fillna(val).astype(dtyp)
    return df

import abc
@abc.abstractmethod
def columns_used(self):
    pass

#### 获取家庭表

In [18]:
households = orca.get_table('households')
df = households.to_frame()
df[df['building_id']==-1]


Unnamed: 0_level_0,building_id,building_type_id,income,persons,tenure,income_quartile,zone_id
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
7,-1,1,27310,1,2,0,
77,-1,2,0,1,1,0,
79,-1,1,8500,1,1,0,
131,-1,1,14000,1,2,0,
141,-1,2,0,1,1,0,
...,...,...,...,...,...,...,...
2630553,-1,3,150700,1,1,3,
2630554,-1,3,8100,1,1,0,
2630582,-1,3,8500,1,1,0,
2630598,-1,3,137000,1,1,3,


##### 从上面那个表可以看出，应该是有17279个家庭是需要进行区位选择的

#### 重点

In [19]:
from __future__ import print_function

import os

import numpy as np
import orca
import pandas as pd
from urbansim.models import RegressionModel, SegmentedRegressionModel, \
    MNLDiscreteChoiceModel, SegmentedMNLDiscreteChoiceModel, \
    GrowthRateTransition
from urbansim.developer import sqftproforma, developer
from urbansim.utils import misc

cfg = misc.config("hlcm.yaml")
choosers = orca.get_table('households')
nodes = orca.get_table('zones')
buildings = orca.get_table('buildings')

zones = orca.get_table('zones')

out_fname= "building_id"
supply_fname = "residential_units" #有计算列建筑物中的列
vacant_fname =  "vacant_residential_units" #有计算列建筑物中的列

#### choosers_df

In [20]:
choosers_df = to_frame([choosers],cfg,additional_columns = ["building_id"])
choosers_df

Unnamed: 0_level_0,building_id,income_quartile
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,136325,0
4,133982,0
5,25633,0
6,132596,0
7,-1,0
...,...,...
2630612,74346,0
2630613,78154,0
2630614,130536,1
2630615,137578,0


#### locations_df

In [21]:
locations_df = to_frame([buildings, nodes], cfg,
                            [supply_fname, vacant_fname])
locations_df

Unnamed: 0_level_0,zone_id,residential_units,residential_sales_price,vacant_residential_units,zone_id_buildings,general_type,unit_sqft,population,ave_lot_sqft,poor,hhsize,sum_residential_units,ave_unit_sqft,zone_id_zones,sfdu,jobs,ave_income,renters
building_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
66515,103.0,3,304,1.0,103.0,Residential,1585.000000,8.604838,7.293847,6.906755,1.098612,7.951559,7.153834,103,1.386294,6.259581,11.156265,7.583756
65547,103.0,1,48,0.0,103.0,Residential,3314.000000,8.604838,7.293847,6.906755,1.098612,7.951559,7.153834,103,1.386294,6.259581,11.156265,7.583756
66514,103.0,3,324,-2.0,103.0,Residential,1650.000000,8.604838,7.293847,6.906755,1.098612,7.951559,7.153834,103,1.386294,6.259581,11.156265,7.583756
66655,103.0,8,934,0.0,103.0,Residential,941.500000,8.604838,7.293847,6.906755,1.098612,7.951559,7.153834,103,1.386294,6.259581,11.156265,7.583756
65727,103.0,6,1579,0.0,103.0,Residential,739.666687,8.604838,7.293847,6.906755,1.098612,7.951559,7.153834,103,1.386294,6.259581,11.156265,7.583756
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1855320,14.0,1,0,1.0,14.0,Office,170061.000000,4.110874,9.034048,3.401197,0.693147,6.131226,7.696667,14,0.000000,9.366831,10.452505,1.791759
1854424,14.0,1,0,1.0,14.0,Office,996760.000000,4.110874,9.034048,3.401197,0.693147,6.131226,7.696667,14,0.000000,9.366831,10.452505,1.791759
1853798,14.0,1,0,1.0,14.0,Office,523385.000000,4.110874,9.034048,3.401197,0.693147,6.131226,7.696667,14,0.000000,9.366831,10.452505,1.791759
1855107,42.0,1,0,1.0,42.0,Industrial,50672.000000,0.000000,12.454458,0.000000,1.098612,1.098612,7.103939,42,0.000000,4.962845,11.057866,0.000000


In [22]:
locations_df.columns

Index(['zone_id', 'residential_units', 'residential_sales_price',
       'vacant_residential_units', 'zone_id_buildings', 'general_type',
       'unit_sqft', 'population', 'ave_lot_sqft', 'poor', 'hhsize',
       'sum_residential_units', 'ave_unit_sqft', 'zone_id_zones', 'sfdu',
       'jobs', 'ave_income', 'renters'],
      dtype='object')

#### available_units,vacant_units,overfull buildings

In [23]:
available_units = buildings[supply_fname]
vacant_units = buildings[vacant_fname]

print("There are {} total available units\n"
      "    and {} total choosers\n"
      "    but there are {} overfull buildings"
      .format(available_units.sum(), len(choosers),
              len(vacant_units[vacant_units < 0])))

There are 374133 total available units
    and 345588 total choosers
    but there are 43020 overfull buildings


#### units

In [24]:
vacant_units = vacant_units[vacant_units > 0]
units = locations_df.loc[np.repeat(vacant_units.index.values,
                         vacant_units.values.astype('int'))].reset_index()
units

Unnamed: 0,building_id,zone_id,residential_units,residential_sales_price,vacant_residential_units,zone_id_buildings,general_type,unit_sqft,population,ave_lot_sqft,poor,hhsize,sum_residential_units,ave_unit_sqft,zone_id_zones,sfdu,jobs,ave_income,renters
0,34,171.0,15,814,10.0,171.0,Residential,1189.133301,7.873217,8.125186,5.863631,1.098612,6.846943,7.375569,171,0.693147,6.131226,11.307143,7.444833
1,34,171.0,15,814,10.0,171.0,Residential,1189.133301,7.873217,8.125186,5.863631,1.098612,6.846943,7.375569,171,0.693147,6.131226,11.307143,7.444833
2,34,171.0,15,814,10.0,171.0,Residential,1189.133301,7.873217,8.125186,5.863631,1.098612,6.846943,7.375569,171,0.693147,6.131226,11.307143,7.444833
3,34,171.0,15,814,10.0,171.0,Residential,1189.133301,7.873217,8.125186,5.863631,1.098612,6.846943,7.375569,171,0.693147,6.131226,11.307143,7.444833
4,34,171.0,15,814,10.0,171.0,Residential,1189.133301,7.873217,8.125186,5.863631,1.098612,6.846943,7.375569,171,0.693147,6.131226,11.307143,7.444833
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109354,1855329,22.0,1,0,1.0,22.0,Office,3631.043457,6.952729,10.310913,5.796058,0.693147,7.020191,7.166893,22,0.000000,8.051660,10.927753,1.386294
109355,1855330,22.0,1,0,1.0,22.0,Office,3333.000000,6.952729,10.310913,5.796058,0.693147,7.020191,7.166893,22,0.000000,8.051660,10.927753,1.386294
109356,1855334,163.0,1,0,1.0,163.0,Office,3631.043457,8.233238,7.886821,6.651572,1.386294,7.060476,7.198557,163,3.258097,5.105945,11.128747,7.786136
109357,1855345,159.0,1,0,1.0,159.0,Hotel,3631.043457,8.042056,7.962597,6.246107,1.098612,7.160846,7.178164,159,1.098612,3.737670,11.167699,7.598399


#### temporarily empty units，buildings total in the region

In [25]:
print("    for a total of {} temporarily empty units\n"
          "    in {} buildings total in the region"
          .format(int(vacant_units.sum()), len(vacant_units)))

    for a total of 109359 temporarily empty units
    in 50861 buildings total in the region


#### movers

In [26]:
movers = choosers_df[choosers_df[out_fname] == -1]
movers

Unnamed: 0_level_0,building_id,income_quartile
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1
7,-1,0
77,-1,0
79,-1,0
131,-1,0
141,-1,0
...,...,...
2630553,-1,3
2630554,-1,0
2630582,-1,0
2630598,-1,3


In [27]:
if len(movers) > vacant_units.sum():
    print("WARNING: Not enough locations for movers"
          "    reducing locations to size of movers for performance gain")
    movers = movers.head(vacant_units.sum())

#### new_units，分配（区位选择）

In [28]:
new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, units, cfg) 

Assigned 17279 choosers to new units


In [29]:
new_units  #第一列表示的是家庭id，第二列是units表中的索引

7          64119
77         58848
79         82403
131        82563
141        83652
           ...  
2630000    82511
2630062    47603
2630424    95431
2630553    50658
2630598    17168
Length: 17279, dtype: int64

In [48]:
units.loc[new_units.values] #这边就是把被选中的units挑出来|

Unnamed: 0,building_id,zone_id,residential_sales_price,residential_units,vacant_residential_units,zone_id_buildings,general_type,unit_sqft,zone_id_zones,poor,jobs,population,renters,ave_income,ave_unit_sqft,ave_lot_sqft,sum_residential_units,hhsize,sfdu
87602,139527,43.0,3558,241,152.0,43.0,Residential,674.676331,43,6.723832,6.251904,8.576970,7.361375,11.290419,7.330405,7.324603,8.149313,0.693147,0.000000
61838,130421,9.0,356,189,16.0,9.0,Residential,459.894165,9,8.484670,8.678461,8.793915,5.129899,9.908525,6.381195,8.033955,8.478660,0.693147,0.000000
80062,138399,3.0,3357,28,18.0,3.0,Residential,156.642853,3,5.762051,6.708084,6.242223,4.094345,10.098273,6.541992,7.283731,6.313548,0.693147,0.000000
56176,128164,74.0,26896858,300,136.0,74.0,Residential,1.436667,74,7.395108,7.030857,8.031060,5.891644,10.275086,7.208041,7.322571,8.023225,0.693147,3.044522
84598,138924,17.0,5324569,191,134.0,17.0,Residential,6.251309,17,7.041412,8.461892,8.293049,6.289716,11.002117,7.103939,8.160232,8.329899,0.693147,1.386294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50588,118695,55.0,954,12,2.0,55.0,Residential,880.250000,55,6.450470,5.786897,8.024207,7.246368,11.050906,7.244942,7.823843,7.290293,1.098612,0.000000
20598,62647,104.0,3338,22,5.0,104.0,Residential,420.500000,104,6.496775,7.106606,8.091321,6.725034,11.074436,6.988567,6.908172,7.524021,1.098612,0.693147
83542,138883,16.0,7,67,53.0,16.0,Residential,9177.671875,16,6.030685,8.928905,7.415777,6.291569,11.052492,7.090910,8.584115,7.637234,0.693147,0.000000
22271,64418,105.0,655,36,27.0,105.0,Residential,794.444458,105,7.593878,6.668228,8.649799,7.265430,10.714440,6.867974,6.756799,8.157944,1.098612,1.386294


In [49]:
units.loc[new_units.values][out_fname] #查看选中的unit对应的建筑物id
 #第二列就是建筑物id

87602    139527
61838    130421
80062    138399
56176    128164
84598    138924
          ...  
50588    118695
20598     62647
83542    138883
22271     64418
11705     44745
Name: building_id, Length: 17279, dtype: int64

In [50]:
units.loc[new_units.values][out_fname].values #这就是获取movers分配的建筑物的id

array([139527, 130421, 138399, ..., 138883,  64418,  44745], dtype=int64)

In [51]:
new_buildings

91         129855
101         70464
127        128164
197        112397
206         77398
            ...  
2628531    130927
2628936    128099
2629645     83664
2630215    106858
2630409    111908
Length: 17279, dtype: int64

In [52]:
new_buildings = pd.Series(units.loc[new_units.values][out_fname].values,
                              index=new_units.index)
new_buildings

80         139527
272        130421
274        138399
300        128164
336        138924
            ...  
2629241    118695
2629700     62647
2630003    138883
2630125     64418
2630295     44745
Length: 17279, dtype: int64

In [53]:
choosers.update_col_from_series(out_fname, new_buildings, cast=True)

#更新choosers表

In [54]:
choosers.to_frame() 

Unnamed: 0_level_0,building_id,building_type_id,income,persons,tenure,income_quartile,zone_id
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,136325,1,27310,1,2,0,38.0
4,133982,2,25000,1,2,0,44.0
5,25633,1,3100,1,1,0,170.0
6,132596,2,7200,1,1,0,45.0
7,57108,1,27310,1,2,0,164.0
...,...,...,...,...,...,...,...
2630612,74346,3,690,1,1,0,148.0
2630613,78154,3,24650,1,1,0,141.0
2630614,130536,3,58000,1,1,1,10.0
2630615,137578,3,15400,1,1,0,22.0


In [56]:
df = choosers.to_frame() 
df[df['building_id'] == -1] #这就表示，进行了区位选择后，每个家庭都分配到了建筑物id

Unnamed: 0_level_0,building_id,building_type_id,income,persons,tenure,income_quartile,zone_id
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
