Skip to content

Commit

Permalink
mix of change to developer and lcms
Browse files Browse the repository at this point in the history
Unfortunately I didn't check in my developer changes before I started working on debug info in the regressions and pdfs.  So this push includes both.

At any rate, this is a basically working developer model.

As well as version of yamlmodelrunner which returns debug information for the regressions and simulations.
  • Loading branch information
fscottfoti committed Jun 13, 2014
1 parent 61be2e8 commit e3b254d
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 39 deletions.
10 changes: 4 additions & 6 deletions urbansim/developer/developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, feasibility):
def max_form(f, colname):
"""
Assumes dataframe with hierarchical columns with first index equal to the
use and second index equal to the attribtue
use and second index equal to the attribute
e.g. f.columns equal to:
mixedoffice building_cost
Expand Down Expand Up @@ -118,14 +118,13 @@ def pick(self, form, target_units, parcel_size, ave_unit_size,
df = df[df.max_profit_far > 0]
df["parcel_size"] = parcel_size
df = df[df.parcel_size < max_parcel_size]
df['new_sqft'] = df.parcel_size * df.max_profit_far
ave_unit_size[ave_unit_size < min_unit_size] = min_unit_size
df['new_units'] = np.round(df.new_sqft / ave_unit_size)
df['residential_units'] = np.round(df.building_sqft / ave_unit_size)
df['current_units'] = current_units
df['net_units'] = df.new_units - df.current_units
df['net_units'] = df.residential_units - df.current_units
df = df[df.net_units > 0]

print "Describe of net units\n", df.net_units.describe()
# print "Describe of net units\n", df.net_units.describe()
print "Sum of net units that are profitable", df.net_units.sum()
if df.net_units.sum() < target_units:
print "WARNING THERE WERE NOT ENOUGH PROFITABLE UNITS TO MATCH DEMAND"
Expand Down Expand Up @@ -155,6 +154,5 @@ def merge(old_df, new_df):
maxind = np.max(old_df.index.values)
new_df.index = new_df.index + maxind + 1
concat_df = pd.concat([old_df, new_df], verify_integrity=True)
print concat_df.index.name
concat_df.index.name = 'building_id'
return concat_df
26 changes: 18 additions & 8 deletions urbansim/developer/sqftproforma.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def _reset_defaults(self):
self.parcel_sizes = [10000.0]
self.fars = [.1, .25, .5, .75, 1.0, 1.5, 1.8, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 11.0]
self.uses = ['retail', 'industrial', 'office', 'residential']
self.residential_uses = [False, False, False, True]
self.forms = {
'retail': {
"retail": 1.0
Expand Down Expand Up @@ -185,10 +186,13 @@ def _convert_types(self):
"""
self.fars = np.array(self.fars)
self.parking_rates = np.array([self.parking_rates[use] for use in self.uses])
self.res_ratios = {}
assert len(self.uses) == len(self.residential_uses)
for k, v in self.forms.iteritems():
self.forms[k] = np.array([self.forms[k].get(use, 0.0) for use in self.uses])
# normalize if not already
self.forms[k] /= self.forms[k].sum()
self.res_ratios[k] = pd.Series(self.forms[k])[self.residential_uses].sum()
self.costs = np.transpose(np.array([self.costs[use] for use in self.uses]))

@property
Expand Down Expand Up @@ -298,7 +302,7 @@ def _generate_lookup(self):
building_bulk = orig_bulk - parkingstalls * \
c.parking_sqft_d[parking_config]

df['build'] = building_bulk
df['building_sqft'] = building_bulk

parkingstalls = building_bulk * \
np.sum(uses_distrib * c.parking_rates) / c.sqft_per_rate
Expand All @@ -309,11 +313,11 @@ def _generate_lookup(self):
df['spaces'] = parkingstalls

if parking_config == 'underground':
df['parksqft'] = parkingstalls * \
df['park_sqft'] = parkingstalls * \
c.parking_sqft_d[parking_config]
stories = building_bulk / c.tiled_parcel_sizes
if parking_config == 'deck':
df['parksqft'] = parkingstalls * \
df['park_sqft'] = parkingstalls * \
c.parking_sqft_d[parking_config]
stories = ((building_bulk + parkingstalls *
c.parking_sqft_d[parking_config]) /
Expand All @@ -322,17 +326,17 @@ def _generate_lookup(self):
stories = building_bulk / \
(c.tiled_parcel_sizes - parkingstalls *
c.parking_sqft_d[parking_config])
df['parksqft'] = parkingstalls * \
df['park_sqft'] = parkingstalls * \
c.parking_sqft_d[parking_config]
# not all fars support surface parking
stories[np.where(stories < 0.0)] = np.nan

df['total_sqft'] = df.build + df.parksqft
df['total_sqft'] = df.building_sqft + df.park_sqft
stories /= c.parcel_coverage
df['stories'] = stories
df['build_cost_sqft'] = self._building_cost(uses_distrib, stories)

df['build_cost'] = df.build_cost_sqft * df.build
df['build_cost'] = df.build_cost_sqft * df.building_sqft
df['park_cost'] = parking_cost
df['cost'] = df.build_cost + df.park_cost

Expand Down Expand Up @@ -444,7 +448,7 @@ def lookup(self, form, df, only_built=True):
A dataframe which is indexed by the parcel ids that were passed, with the
following columns.
building_size : Series, float
building_sqft : Series, float
The number of square feet for the building to build. Keep in mind
this includes parking and common space. Will need a helpful function
to convert from gross square feet to actual usable square feet in
Expand Down Expand Up @@ -513,7 +517,7 @@ def twod_get(indexes, arr):
return arr[indexes, np.arange(indexes.size)].astype('float')

outdf = pd.DataFrame({
'building_size': twod_get(maxprofitind, building_bulks),
'building_sqft': twod_get(maxprofitind, building_bulks),
'building_cost': twod_get(maxprofitind, building_costs),
'total_cost': twod_get(maxprofitind, total_costs),
'building_revenue': twod_get(maxprofitind, building_revenue),
Expand All @@ -524,6 +528,12 @@ def twod_get(indexes, arr):
if only_built:
outdf = outdf.query('max_profit > 0')

resratio = c.res_ratios[form]
nonresratio = 1.0 - resratio
outdf["residential_sqft"] = outdf.building_sqft * c.building_efficiency * resratio
outdf["non_residential_sqft"] = outdf.building_sqft * c.building_efficiency * nonresratio
outdf["stories"] = outdf["max_profit_far"] / c.parcel_coverage

return outdf

def _debug_output(self):
Expand Down
47 changes: 38 additions & 9 deletions urbansim/models/lcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def __init__(self, model_expression, sample_size,
self.estimation_sample_size = estimation_sample_size
self.choice_column = choice_column
self.name = name if name is not None else 'MNLLocationChoiceModel'
self.sim_pdf = None

self.log_likelihoods = None
self.fit_parameters = None
Expand Down Expand Up @@ -271,7 +272,7 @@ def report_fit(self):

print(tbl)

def predict(self, choosers, alternatives):
def predict(self, choosers, alternatives, debug=False):
"""
Choose from among alternatives for a group of agents.
Expand All @@ -283,6 +284,10 @@ def predict(self, choosers, alternatives):
agent probabilities of choosing alternatives.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
debug : bool
If debug is set to true, well set the variable "sim_pdf" on
the object to store the probabilities for mapping of the
outcome.
Returns
-------
Expand All @@ -299,22 +304,33 @@ def predict(self, choosers, alternatives):
alternatives = util.apply_filter_query(
alternatives, self.alts_predict_filters)

if len(choosers) == 0:
return pd.Series()

# TODO: only using 1st item in choosers for determining probabilities.
# Need to expand options around this.
num_choosers = 1
_, merged, _ = interaction.mnl_interaction_dataset(
choosers.head(1), alternatives, len(alternatives))
choosers.head(num_choosers), alternatives, len(alternatives))
merged = util.apply_filter_query(
merged, self.interaction_predict_filters)
model_design = dmatrix(
self.str_model_expression, data=merged, return_type='dataframe')

coeffs = [self.fit_parameters['Coefficient'][x] for x in model_design.columns]

# probabilities are returned from mnl_simulate as a 2d array
# and need to be flatted for use in unit_choice.
probabilities = mnl.mnl_simulate(
model_design.as_matrix(),
self.fit_parameters['Coefficient'].values,
coeffs,
numalts=len(merged), returnprobs=True).flatten()

if debug:
# when we're not doing 1st item of choosers, this will break!
assert num_choosers == 1
self.sim_pdf = pd.Series(probabilities, index=alternatives.index)

# figure out exactly which things from which choices are drawn
alt_choices = (
merged[self.choice_column] if self.choice_column else merged.index)
Expand Down Expand Up @@ -460,8 +476,9 @@ def _iter_groups(self, data):
"""
groups = data.groupby(self.segmentation_col)

for name in self.models:
yield name, groups.get_group(name)
for name, group in groups:
print("Returning group %s" % str(name))
yield name, group

def fit(self, choosers, alternatives, current_choice):
"""
Expand Down Expand Up @@ -500,7 +517,7 @@ def fitted(self):
return (all(m.fitted for m in self.models.values())
if self.models else False)

def predict(self, choosers, alternatives):
def predict(self, choosers, alternatives, debug=False):
"""
Choose from among alternatives for a group of agents after
segmenting the `choosers` table.
Expand All @@ -514,6 +531,10 @@ def predict(self, choosers, alternatives):
Must have a column matching the .segmentation_col attribute.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
debug : bool
If debug is set to true, well set the variable "sim_pdf" on
the object to store the probabilities for mapping of the
outcome.
Returns
-------
Expand All @@ -526,7 +547,7 @@ def predict(self, choosers, alternatives):
results = []

for name, df in self._iter_groups(choosers):
choices = self.models[name].predict(df, alternatives)
choices = self.models[name].predict(df, alternatives, debug=debug)
# remove chosen alternatives
alternatives = alternatives.loc[~alternatives.index.isin(choices)]
results.append(choices)
Expand Down Expand Up @@ -725,7 +746,7 @@ def fitted(self):
"""
return self._group.fitted

def predict(self, choosers, alternatives):
def predict(self, choosers, alternatives, debug=False):
"""
Choose from among alternatives for a group of agents after
segmenting the `choosers` table.
Expand All @@ -739,6 +760,10 @@ def predict(self, choosers, alternatives):
Must have a column matching the .segmentation_col attribute.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
debug : bool
If debug is set to true, well set the variable "sim_pdf" on
the object to store the probabilities for mapping of the
outcome.
Returns
-------
Expand All @@ -752,7 +777,11 @@ def predict(self, choosers, alternatives):
choosers, self.choosers_predict_filters)
alternatives = util.apply_filter_query(
alternatives, self.alts_predict_filters)
return self._group.predict(choosers, alternatives)

if len(choosers) == 0:
return pd.Series()

return self._group.predict(choosers, alternatives, debug=debug)

def _process_model_dict(self, d):
"""
Expand Down
5 changes: 3 additions & 2 deletions urbansim/models/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def predict(self, data):
"""
model_design = dmatrix(self._rhs, data=data, return_type='dataframe')
return model_design.dot(self.params).values
return model_design.dot(self.params.loc[model_design.columns]).values


def _model_fit_to_table(fit):
Expand Down Expand Up @@ -311,7 +311,8 @@ class instance for use during prediction.
self.model_fit = fit
self.fit_parameters = _model_fit_to_table(fit)
if debug:
df = pd.DataFrame(fit.model.exog, columns=fit.model.exog_names, index=data.index)
index = util.apply_filter_query(data, self.fit_filters).index
df = pd.DataFrame(fit.model.exog, columns=fit.model.exog_names, index=index)
df[fit.model.endog_names] = fit.model.endog
df["fittedvalues"] = fit.fittedvalues
df["residuals"] = fit.resid
Expand Down
27 changes: 22 additions & 5 deletions urbansim/models/yamlmodelrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def hedonic_estimate(df, cfgname):
model_type = yaml.load(open(cfg))["model_type"]
if model_type == "regression":
hm = RegressionModel.from_yaml(str_or_buffer=cfg)
print hm.fit(df).summary()
est_data = hm.est_data
print hm.fit(df, debug=True).summary()
est_data = {"est_data": hm.est_data}
if model_type == "segmented_regression":
hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
hm.min_segment_size = 10
Expand Down Expand Up @@ -162,17 +162,34 @@ def lcm_simulate(choosers, locations, cfgname, outdf, output_fname):
print "Running location choice model simulation\n"
cfg = misc.config(cfgname)
model_type = yaml.load(open(cfg))["model_type"]

if model_type == "locationchoice":
lcm = MNLLocationChoiceModel.from_yaml(str_or_buffer=cfg)
elif model_type == "segmented_locationchoice":
lcm = SegmentedMNLLocationChoiceModel.from_yaml(str_or_buffer=cfg)

movers = choosers[choosers[output_fname].isnull()]
new_units = lcm.predict(movers, locations)

new_units = lcm.predict(movers, locations, debug=True)
print "Assigned %d choosers to new units" % len(new_units.index)
if len(new_units) == 0:
return
outdf[output_fname].loc[new_units.index] = \
locations.loc[new_units.values][output_fname].values
_print_number_unplaced(outdf, output_fname)

if model_type == "locationchoice":
sim_pdf = {"sim_pdf": lcm.sim_pdf}
elif model_type == "segmented_locationchoice":
sim_pdf = {name: lcm._group.models[name].sim_pdf for name in lcm._group.models}

# go back to the buildings from units
sim_pdf = pd.concat(sim_pdf.values(), keys=sim_pdf.keys(), axis=1)
sim_pdf.index = locations[output_fname]
sim_pdf = sim_pdf.groupby(level=0).first()

return sim_pdf


def simple_relocation(choosers, relocation_rate, fieldname='building_id'):
"""
Expand All @@ -189,8 +206,8 @@ def simple_relocation(choosers, relocation_rate, fieldname='building_id'):
"""
print "Running relocation\n"
_print_number_unplaced(choosers, fieldname)
chooser_ids = np.random.choice(choosers.index, size=relocation_rate *
len(choosers.index), replace=False)
chooser_ids = np.random.choice(choosers.index, size=int(relocation_rate *
len(choosers)), replace=False)
choosers[fieldname].loc[chooser_ids] = np.nan
_print_number_unplaced(choosers, fieldname)

Expand Down
6 changes: 5 additions & 1 deletion urbansim/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,12 @@ def df(self):
return self.dset.fetch(self.name)

def build_df(obj, flds=None):
flds = None
if flds is None:
flds = obj.flds
if obj.flds is None:
return obj.df
else:
flds = obj.flds
columns = [getattr(obj, fld) for fld in flds]
df = pd.concat(columns, axis=1)
df.columns = flds
Expand Down
Loading

0 comments on commit e3b254d

Please sign in to comment.