Skip to content

Commit

Permalink
Merge d7dbb95 into ec95b55
Browse files Browse the repository at this point in the history
  • Loading branch information
fscottfoti committed Jul 28, 2014
2 parents ec95b55 + d7dbb95 commit 32e723c
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 166 deletions.
1 change: 0 additions & 1 deletion urbansim/developer/sqftproforma.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,6 @@ def lookup(self, form, df, only_built=True):
df['max_far_from_dua'] = df.max_dua * df.ave_unit_size / self.config.building_efficiency
df['min_max_fars'] = df[['min_max_fars', 'max_far_from_dua']].min(axis=1)

df['min_max_fars'] = df.min_max_fars.fillna(0)
if only_built:
df = df.query('min_max_fars > 0 and parcel_size > 0')

Expand Down
14 changes: 14 additions & 0 deletions urbansim/models/lcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import toolz

from . import util
from ..exceptions import ModelEvaluationError
from ..urbanchoice import interaction, mnl
from ..utils import yamlio
from ..utils.logutil import log_start_finish
Expand Down Expand Up @@ -238,6 +239,13 @@ def fit(self, choosers, alternatives, current_choice):
choosers, alternatives, self.sample_size, current_choice)
model_design = dmatrix(
self.str_model_expression, data=merged, return_type='dataframe')

if len(merged) != model_design.as_matrix().shape[0]:
raise ModelEvaluationError(
'Estimated data does not have the same length as input. '
'This suggests there are null values in one or more of '
'the input columns.')

self.log_likelihoods, self.fit_parameters = mnl.mnl_estimate(
model_design.as_matrix(), chosen, self.sample_size)
self.fit_parameters.index = model_design.columns
Expand Down Expand Up @@ -336,6 +344,12 @@ def predict(self, choosers, alternatives, debug=False):
model_design = dmatrix(
self.str_model_expression, data=merged, return_type='dataframe')

if len(merged) != model_design.as_matrix().shape[0]:
raise ModelEvaluationError(
'Simulated data does not have the same length as input. '
'This suggests there are null values in one or more of '
'the input columns.')

coeffs = [self.fit_parameters['Coefficient'][x]
for x in model_design.columns]

Expand Down
9 changes: 9 additions & 0 deletions urbansim/models/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ def fit_model(df, filters, model_expression):
"""
df = util.apply_filter_query(df, filters)
model = smf.ols(formula=model_expression, data=df)

if len(model.exog) != len(df):
raise ModelEvaluationError(
'Estimated data does not have the same length as input. '
'This suggests there are null values in one or more of '
'the input columns.')

with log_start_finish('statsmodels OLS fit', logger):
return model.fit()

Expand Down Expand Up @@ -325,6 +332,8 @@ class instance for use during prediction.
self.fit_parameters = _model_fit_to_table(fit)
if debug:
index = util.apply_filter_query(data, self.fit_filters).index
assert len(fit.model.exog) == len(index), "The estimate data is"
"unequal in length to the original dataframe, usually caused by nans"
df = pd.DataFrame(
fit.model.exog, columns=fit.model.exog_names, index=index)
df[fit.model.endog_names] = fit.model.endog
Expand Down
14 changes: 14 additions & 0 deletions urbansim/sim/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

import pandas as pd
import toolz
import time
import logging

logger = logging.getLogger(__name__)

_TABLES = {}
_COLUMNS = {}
Expand Down Expand Up @@ -45,6 +49,14 @@ def columns(self):
"""
return list(self._frame.columns) + _list_columns_for_table(self.name)

@property
def local_columns(self):
"""
Columns in this table.
"""
return list(self._frame.columns)

@property
def index(self):
"""
Expand Down Expand Up @@ -489,4 +501,6 @@ def run(models, years=None):
for model_name in models:
print('Running model {}'.format(model_name))
model = get_model(model_name)
t1 = time.time()
model(year=year)
logger.debug("Time to execute model = %.3fs" % (time.time()-t1))
105 changes: 82 additions & 23 deletions urbansim/models/yamlmodelrunner.py → urbansim/sim/yamlmodelrunner.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
import numpy as np
import yaml
import pandas as pd
from numpy import random
from urbansim.utils import misc
import urbansim.sim.simulation as sim
from urbansim.models import RegressionModel, SegmentedRegressionModel, \
MNLLocationChoiceModel, SegmentedMNLLocationChoiceModel, \
GrowthRateTransition


# this is a single place to deal with nas
def deal_with_nas_for_est_or_sim(df, subset=None):
if subset is not None:
flds = filter(lambda x: x in df.columns, subset)
df = df[flds]
lenbefore = len(df)
df = df.dropna(how='any')
lenafter = len(df)
if lenafter != lenbefore:
print "Dropped %d rows because they contained nas" % (lenbefore-lenafter)
return df


def hedonic_estimate(df, cfgname):
"""
Parameters
Expand All @@ -21,10 +36,12 @@ def hedonic_estimate(df, cfgname):
model_type = yaml.load(open(cfg))["model_type"]
if model_type == "regression":
hm = RegressionModel.from_yaml(str_or_buffer=cfg)
df = deal_with_nas_for_est_or_sim(df, hm.columns_used())
print hm.fit(df, debug=True).summary()
est_data = {"est_data": hm.est_data}
if model_type == "segmented_regression":
hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
df = deal_with_nas_for_est_or_sim(df, hm.columns_used())
hm.min_segment_size = 10
for k, v in hm.fit(df, debug=True).items():
print "REGRESSION RESULTS FOR SEGMENT %s\n" % str(k)
Expand All @@ -35,16 +52,16 @@ def hedonic_estimate(df, cfgname):
return est_data


def hedonic_simulate(df, cfgname, outdf, outfname):
def hedonic_simulate(df, cfgname, outdf_name, outfname):
"""
Parameters
----------
df : DataFrame
The dataframe which contains the columns to use for the estimation.
cfgname : string
The name of the yaml config file which describes the hedonic model.
outdf : DataFrame
The dataframe to write the simulated price/rent to.
outdf_name : string
The name of the dataframe to write the simulated price/rent to.
outfname : string
The column name to write the simulated price/rent to.
"""
Expand All @@ -53,12 +70,28 @@ def hedonic_simulate(df, cfgname, outdf, outfname):
model_type = yaml.load(open(cfg))["model_type"]
if model_type == "regression":
hm = RegressionModel.from_yaml(str_or_buffer=cfg)
df = deal_with_nas_for_est_or_sim(df, hm.columns_used())
if model_type == "segmented_regression":
hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
df = deal_with_nas_for_est_or_sim(df, hm.columns_used())
hm.min_segment_size = 10
price_or_rent = hm.predict(df)
print price_or_rent.describe()
outdf.loc[price_or_rent.index.values, outfname] = price_or_rent
print
s = sim.get_table(outdf_name).get_column(outfname)
s.loc[price_or_rent.index.values] = price_or_rent
sim.add_column(outdf_name, outfname, s)


def _to_frame_get_fields(model_type, model, output_fname, df):
add_flds = [output_fname]
if model_type == "segmented_locationchoice":
add_flds += [model.segmentation_col]
flds = model.columns_used()+add_flds
print "The following fields are used by this model:", flds
print
df = df.to_frame(flds)
return deal_with_nas_for_est_or_sim(df)


def lcm_estimate(choosers, chosen_fname, alternatives, cfgname):
Expand All @@ -84,10 +117,14 @@ def lcm_estimate(choosers, chosen_fname, alternatives, cfgname):
model_type = yaml.load(open(cfg))["model_type"]
if model_type == "locationchoice":
lcm = MNLLocationChoiceModel.from_yaml(str_or_buffer=cfg)
choosers = _to_frame_get_fields(model_type, lcm, chosen_fname, choosers)
alternatives = deal_with_nas_for_est_or_sim(alternatives, lcm.columns_used())
lcm.fit(choosers, alternatives, choosers[chosen_fname])
lcm.report_fit()
elif model_type == "segmented_locationchoice":
lcm = SegmentedMNLLocationChoiceModel.from_yaml(str_or_buffer=cfg)
choosers = _to_frame_get_fields(model_type, lcm, chosen_fname, choosers)
alternatives = deal_with_nas_for_est_or_sim(alternatives, lcm.columns_used())
lcm.fit(choosers, alternatives, choosers[chosen_fname])
for k, v in lcm._group.models.items():
print "LCM RESULTS FOR SEGMENT %s\n" % str(k)
Expand Down Expand Up @@ -116,11 +153,11 @@ def get_vacant_units(choosers, location_fname, locations, supply_fname):
representing the number of agents that can be located at that location.
"""
vacant_units = locations[supply_fname].sub(
choosers.groupby(location_fname).size(), fill_value=0)
choosers[location_fname].value_counts(), fill_value=0)
print "There are %d total available units" % locations[supply_fname].sum()
print " and %d total choosers" % len(choosers.index)
print " but there are %d overfull buildings" % \
len(vacant_units[vacant_units < 0].index)
len(vacant_units[vacant_units < 0])
vacant_units = vacant_units[vacant_units > 0]
alternatives = locations.loc[np.repeat(vacant_units.index,
vacant_units.values.astype('int'))] \
Expand All @@ -135,12 +172,13 @@ def _print_number_unplaced(df, fieldname="building_id"):
Just an internal function to use to compute and print info on the number
of unplaced agents.
"""
counts = df[fieldname].isnull().value_counts()
counts = (df[fieldname] == -1).value_counts()
count = 0 if True not in counts else counts[True]
print "Total currently unplaced: %d" % count


def lcm_simulate(choosers, locations, cfgname, outdf, output_fname):
def lcm_simulate(choosers, locations, cfgname, outdf_name, output_fname,
location_ratio=2.0):
"""
Simulate the location choices for the specified choosers
Expand All @@ -154,12 +192,16 @@ def lcm_simulate(choosers, locations, cfgname, outdf, output_fname):
cfgname : string
The name of the yaml config file from which to read the location
choice model.
outdf : DataFrame
The dataframe to write the simulated location to.
outdf_name : string
The name of the dataframe to write the simulated location to.
outfname : string
The column name to write the simulated location to.
location_ratio : float
Above the location ratio (default of 2.0) of locations to choosers, the
locations will be sampled to meet this ratio (for performance reasons).
"""
print "Running location choice model simulation\n"
outdf = sim.get_table(outdf_name)
cfg = misc.config(cfgname)
model_type = yaml.load(open(cfg))["model_type"]

Expand All @@ -168,14 +210,28 @@ def lcm_simulate(choosers, locations, cfgname, outdf, output_fname):
elif model_type == "segmented_locationchoice":
lcm = SegmentedMNLLocationChoiceModel.from_yaml(str_or_buffer=cfg)

movers = choosers[choosers[output_fname].isnull()]
choosers = _to_frame_get_fields(model_type, lcm, output_fname, choosers)

movers = choosers[choosers[output_fname] == -1]

locations = deal_with_nas_for_est_or_sim(locations, lcm.columns_used()+[output_fname])

if len(locations) > len(movers) * location_ratio:
print "Location ratio exceeded: %d locations and only %d choosers" % \
(len(locations), len(movers))
idxes = random.choice(locations.index, size=len(movers) * location_ratio,
replace=False)
locations = locations.loc[idxes]
print " after sampling %d locations are available\n" % len(locations)

new_units = lcm.predict(movers, locations, debug=True)
print "Assigned %d choosers to new units" % len(new_units.index)
if len(new_units) == 0:
return
outdf[output_fname].loc[new_units.index] = \
s = sim.get_table(outdf_name).get_column(output_fname)
s.loc[new_units.index] = \
locations.loc[new_units.values][output_fname].values
sim.add_column(outdf_name, output_fname, s)
_print_number_unplaced(outdf, output_fname)

if model_type == "locationchoice":
Expand All @@ -195,30 +251,32 @@ def simple_relocation(choosers, relocation_rate, fieldname='building_id'):
"""
Parameters
----------
choosers : DataFrame
A dataframe of people which might be relocating.
choosers_name : string
A name of the dataframe of people which might be relocating.
relocation_rate : float
A number less than one describing the percent of rows to mark for
relocation.
fieldname : string
The field name in the choosers dataframe to set to np.nan for those
The field name in the choosers dataframe to set to -1 for those
rows to mark for relocation.
"""
print "Running relocation\n"
choosers_name = choosers
choosers = sim.get_table(choosers)
print "Total agents: %d" % len(choosers[fieldname])
_print_number_unplaced(choosers, fieldname)
chooser_ids = np.random.choice(choosers.index, size=int(relocation_rate *
len(choosers)), replace=False)
choosers[fieldname].loc[chooser_ids] = np.nan
s = choosers[fieldname]
print "Assinging for relocation..."
s.loc[chooser_ids] = -1
sim.add_column(choosers_name, fieldname, s)
_print_number_unplaced(choosers, fieldname)


def simple_transition(dset, dfname, rate):
def simple_transition(dfname, rate):
"""
Parameters
----------
choosers : dataset
The dataset object, in order to write the resulting transitioned
dataframe
dfname : string
The name of the dataframe in the dataset to read and write the the
dataframe.
Expand All @@ -227,8 +285,9 @@ def simple_transition(dset, dfname, rate):
transition model.
"""
transition = GrowthRateTransition(rate)
df = dset.fetch(dfname)
tbl = sim.get_table(dfname)
df = tbl.to_frame(tbl.local_columns)
print "%d agents before transition" % len(df.index)
df, added, copied, removed = transition.transition(df, None)
print "%d agents after transition" % len(df.index)
dset.save_tmptbl(dfname, df)
sim.add_table(dfname, df)
Loading

0 comments on commit 32e723c

Please sign in to comment.