Skip to content

Commit

Permalink
Merge e2f58de into da26815
Browse files Browse the repository at this point in the history
  • Loading branch information
fscottfoti committed Jun 20, 2014
2 parents da26815 + e2f58de commit dd30346
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 53 deletions.
49 changes: 34 additions & 15 deletions urbansim/developer/developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, feasibility):
def max_form(f, colname):
"""
Assumes dataframe with hierarchical columns with first index equal to the
use and second index equal to the attribtue
use and second index equal to the attribute
e.g. f.columns equal to:
mixedoffice building_cost
Expand All @@ -33,17 +33,26 @@ def max_form(f, colname):
df = f.stack(level=0)[[colname]].stack().unstack(level=1).reset_index(level=1, drop=True)
return df.idxmax(axis=1)

def keep_form_with_max_profit(self):
def keep_form_with_max_profit(self, forms=None):
"""
This converts the dataframe, which shows all profitable forms,
to the form with the greatest profit, so that more profitable
forms outcompete less profitable forms.
Parameters
----------
forms: list of strings
List of forms which compete which other. Can leave some out.
"""
f = self.dset.feasibility
f = self.feasibility

if forms is not None:
f = f[forms]

mu = self.max_form(f, "max_profit")
indexes = [tuple(x) for x in mu.reset_index().values]
df = f.stack(level=0).loc[indexes]
df.index.names = ["parcel_id", "use"]
df.index.names = ["parcel_id", "form"]
df = df.reset_index(level=1)
return df

Expand All @@ -67,23 +76,25 @@ def compute_units_to_build(num_agents, num_units, target_vacancy):
print "Number of agent spaces: %d" % num_units
assert target_vacancy < 1.0
target_units = max(num_agents / (1 - target_vacancy) - num_units, 0)
print "Current vacancy = %.2f" % (1 - num_agents / num_units)
print "Current vacancy = %.2f" % (1 - num_agents / float(num_units))
print "Target vacancy = %.2f, target of new units = %d" % (target_vacancy, target_units)
return target_units

def pick(self, form, target_units, parcel_size, ave_unit_size,
current_units, max_parcel_size=200000, min_unit_size=400,
drop_after_build=True):
drop_after_build=True, residential=True):
"""
Choose the buildings from the list that are feasible to build in
order to match the specified demand.
Parameters
----------
form : string
One of the building forms from the pro forma specification -
form : string or list
One or more of the building forms from the pro forma specification -
e.g. "residential" or "mixedresidential" - these are configuration
parameters pass previously to the pro forma.
parameters passed previously to the pro forma. If more than one form
is passed the forms compete with each other (based on profitability)
for which one gets built in order to meet demand.
target_units : int
The number of units to build. For non-residential buildings this
should be passed as the number of job spaces that need to be created.
Expand All @@ -110,22 +121,31 @@ def pick(self, form, target_units, parcel_size, ave_unit_size,
Whether or not to drop parcels from consideration after they
have been chosen for development. Usually this is true so as
to not develop the same parcel twice.
residential: bool
If creating non-residential buildings set this to false and developer
will fill in non_residential_units rather than residential_units
"""

df = self.feasibility[form]
if isinstance(form, list):
df = self.keep_form_with_max_profit(form)
else:
df = self.feasibility[form]

# feasible buildings only for this building type
df = df[df.max_profit_far > 0]
df["parcel_size"] = parcel_size
df = df[df.parcel_size < max_parcel_size]
df['new_sqft'] = df.parcel_size * df.max_profit_far
ave_unit_size[ave_unit_size < min_unit_size] = min_unit_size
df['new_units'] = np.round(df.new_sqft / ave_unit_size)
df['current_units'] = current_units
df['net_units'] = df.new_units - df.current_units
if residential:
df['residential_units'] = np.round(df.building_sqft / ave_unit_size)
df['net_units'] = df.residential_units - df.current_units
else:
df['non_residential_units'] = np.round(df.building_sqft / ave_unit_size)
df['net_units'] = df.non_residential_units - df.current_units
df = df[df.net_units > 0]

print "Describe of net units\n", df.net_units.describe()
# print "Describe of net units\n", df.net_units.describe()
print "Sum of net units that are profitable", df.net_units.sum()
if df.net_units.sum() < target_units:
print "WARNING THERE WERE NOT ENOUGH PROFITABLE UNITS TO MATCH DEMAND"
Expand Down Expand Up @@ -155,6 +175,5 @@ def merge(old_df, new_df):
maxind = np.max(old_df.index.values)
new_df.index = new_df.index + maxind + 1
concat_df = pd.concat([old_df, new_df], verify_integrity=True)
print concat_df.index.name
concat_df.index.name = 'building_id'
return concat_df
26 changes: 18 additions & 8 deletions urbansim/developer/sqftproforma.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def _reset_defaults(self):
self.parcel_sizes = [10000.0]
self.fars = [.1, .25, .5, .75, 1.0, 1.5, 1.8, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 11.0]
self.uses = ['retail', 'industrial', 'office', 'residential']
self.residential_uses = [False, False, False, True]
self.forms = {
'retail': {
"retail": 1.0
Expand Down Expand Up @@ -185,10 +186,13 @@ def _convert_types(self):
"""
self.fars = np.array(self.fars)
self.parking_rates = np.array([self.parking_rates[use] for use in self.uses])
self.res_ratios = {}
assert len(self.uses) == len(self.residential_uses)
for k, v in self.forms.iteritems():
self.forms[k] = np.array([self.forms[k].get(use, 0.0) for use in self.uses])
# normalize if not already
self.forms[k] /= self.forms[k].sum()
self.res_ratios[k] = pd.Series(self.forms[k])[self.residential_uses].sum()
self.costs = np.transpose(np.array([self.costs[use] for use in self.uses]))

@property
Expand Down Expand Up @@ -298,7 +302,7 @@ def _generate_lookup(self):
building_bulk = orig_bulk - parkingstalls * \
c.parking_sqft_d[parking_config]

df['build'] = building_bulk
df['building_sqft'] = building_bulk

parkingstalls = building_bulk * \
np.sum(uses_distrib * c.parking_rates) / c.sqft_per_rate
Expand All @@ -309,11 +313,11 @@ def _generate_lookup(self):
df['spaces'] = parkingstalls

if parking_config == 'underground':
df['parksqft'] = parkingstalls * \
df['park_sqft'] = parkingstalls * \
c.parking_sqft_d[parking_config]
stories = building_bulk / c.tiled_parcel_sizes
if parking_config == 'deck':
df['parksqft'] = parkingstalls * \
df['park_sqft'] = parkingstalls * \
c.parking_sqft_d[parking_config]
stories = ((building_bulk + parkingstalls *
c.parking_sqft_d[parking_config]) /
Expand All @@ -322,17 +326,17 @@ def _generate_lookup(self):
stories = building_bulk / \
(c.tiled_parcel_sizes - parkingstalls *
c.parking_sqft_d[parking_config])
df['parksqft'] = parkingstalls * \
df['park_sqft'] = parkingstalls * \
c.parking_sqft_d[parking_config]
# not all fars support surface parking
stories[np.where(stories < 0.0)] = np.nan

df['total_sqft'] = df.build + df.parksqft
df['total_sqft'] = df.building_sqft + df.park_sqft
stories /= c.parcel_coverage
df['stories'] = stories
df['build_cost_sqft'] = self._building_cost(uses_distrib, stories)

df['build_cost'] = df.build_cost_sqft * df.build
df['build_cost'] = df.build_cost_sqft * df.building_sqft
df['park_cost'] = parking_cost
df['cost'] = df.build_cost + df.park_cost

Expand Down Expand Up @@ -444,7 +448,7 @@ def lookup(self, form, df, only_built=True):
A dataframe which is indexed by the parcel ids that were passed, with the
following columns.
building_size : Series, float
building_sqft : Series, float
The number of square feet for the building to build. Keep in mind
this includes parking and common space. Will need a helpful function
to convert from gross square feet to actual usable square feet in
Expand Down Expand Up @@ -513,7 +517,7 @@ def twod_get(indexes, arr):
return arr[indexes, np.arange(indexes.size)].astype('float')

outdf = pd.DataFrame({
'building_size': twod_get(maxprofitind, building_bulks),
'building_sqft': twod_get(maxprofitind, building_bulks),
'building_cost': twod_get(maxprofitind, building_costs),
'total_cost': twod_get(maxprofitind, total_costs),
'building_revenue': twod_get(maxprofitind, building_revenue),
Expand All @@ -524,6 +528,12 @@ def twod_get(indexes, arr):
if only_built:
outdf = outdf.query('max_profit > 0')

resratio = c.res_ratios[form]
nonresratio = 1.0 - resratio
outdf["residential_sqft"] = outdf.building_sqft * c.building_efficiency * resratio
outdf["non_residential_sqft"] = outdf.building_sqft * nonresratio
outdf["stories"] = outdf["max_profit_far"] / c.parcel_coverage

return outdf

def _debug_output(self):
Expand Down
47 changes: 38 additions & 9 deletions urbansim/models/lcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def __init__(self, model_expression, sample_size,
self.estimation_sample_size = estimation_sample_size
self.choice_column = choice_column
self.name = name if name is not None else 'MNLLocationChoiceModel'
self.sim_pdf = None

self.log_likelihoods = None
self.fit_parameters = None
Expand Down Expand Up @@ -271,7 +272,7 @@ def report_fit(self):

print(tbl)

def predict(self, choosers, alternatives):
def predict(self, choosers, alternatives, debug=False):
"""
Choose from among alternatives for a group of agents.
Expand All @@ -283,6 +284,10 @@ def predict(self, choosers, alternatives):
agent probabilities of choosing alternatives.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
debug : bool
If debug is set to true, well set the variable "sim_pdf" on
the object to store the probabilities for mapping of the
outcome.
Returns
-------
Expand All @@ -299,22 +304,33 @@ def predict(self, choosers, alternatives):
alternatives = util.apply_filter_query(
alternatives, self.alts_predict_filters)

if len(choosers) == 0:
return pd.Series()

# TODO: only using 1st item in choosers for determining probabilities.
# Need to expand options around this.
num_choosers = 1
_, merged, _ = interaction.mnl_interaction_dataset(
choosers.head(1), alternatives, len(alternatives))
choosers.head(num_choosers), alternatives, len(alternatives))
merged = util.apply_filter_query(
merged, self.interaction_predict_filters)
model_design = dmatrix(
self.str_model_expression, data=merged, return_type='dataframe')

coeffs = [self.fit_parameters['Coefficient'][x] for x in model_design.columns]

# probabilities are returned from mnl_simulate as a 2d array
# and need to be flatted for use in unit_choice.
probabilities = mnl.mnl_simulate(
model_design.as_matrix(),
self.fit_parameters['Coefficient'].values,
coeffs,
numalts=len(merged), returnprobs=True).flatten()

if debug:
# when we're not doing 1st item of choosers, this will break!
assert num_choosers == 1
self.sim_pdf = pd.Series(probabilities, index=alternatives.index)

# figure out exactly which things from which choices are drawn
alt_choices = (
merged[self.choice_column] if self.choice_column else merged.index)
Expand Down Expand Up @@ -460,8 +476,9 @@ def _iter_groups(self, data):
"""
groups = data.groupby(self.segmentation_col)

for name in self.models:
yield name, groups.get_group(name)
for name, group in groups:
print("Returning group %s" % str(name))
yield name, group

def fit(self, choosers, alternatives, current_choice):
"""
Expand Down Expand Up @@ -500,7 +517,7 @@ def fitted(self):
return (all(m.fitted for m in self.models.values())
if self.models else False)

def predict(self, choosers, alternatives):
def predict(self, choosers, alternatives, debug=False):
"""
Choose from among alternatives for a group of agents after
segmenting the `choosers` table.
Expand All @@ -514,6 +531,10 @@ def predict(self, choosers, alternatives):
Must have a column matching the .segmentation_col attribute.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
debug : bool
If debug is set to true, well set the variable "sim_pdf" on
the object to store the probabilities for mapping of the
outcome.
Returns
-------
Expand All @@ -526,7 +547,7 @@ def predict(self, choosers, alternatives):
results = []

for name, df in self._iter_groups(choosers):
choices = self.models[name].predict(df, alternatives)
choices = self.models[name].predict(df, alternatives, debug=debug)
# remove chosen alternatives
alternatives = alternatives.loc[~alternatives.index.isin(choices)]
results.append(choices)
Expand Down Expand Up @@ -738,7 +759,7 @@ def fitted(self):
"""
return self._group.fitted

def predict(self, choosers, alternatives):
def predict(self, choosers, alternatives, debug=False):
"""
Choose from among alternatives for a group of agents after
segmenting the `choosers` table.
Expand All @@ -752,6 +773,10 @@ def predict(self, choosers, alternatives):
Must have a column matching the .segmentation_col attribute.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
debug : bool
If debug is set to true, well set the variable "sim_pdf" on
the object to store the probabilities for mapping of the
outcome.
Returns
-------
Expand All @@ -765,7 +790,11 @@ def predict(self, choosers, alternatives):
choosers, self.choosers_predict_filters)
alternatives = util.apply_filter_query(
alternatives, self.alts_predict_filters)
return self._group.predict(choosers, alternatives)

if len(choosers) == 0:
return pd.Series()

return self._group.predict(choosers, alternatives, debug=debug)

def _process_model_dict(self, d):
"""
Expand Down
1 change: 1 addition & 0 deletions urbansim/models/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ class instance for use during prediction.
self.model_fit = fit
self.fit_parameters = _model_fit_to_table(fit)
if debug:
index = util.apply_filter_query(data, self.fit_filters).index
df = pd.DataFrame(
fit.model.exog, columns=fit.model.exog_names, index=data.index)
df[fit.model.endog_names] = fit.model.endog
Expand Down
Loading

0 comments on commit dd30346

Please sign in to comment.