Merge e2f58de into da26815

UDST · Jun 20, 2014 · dd30346 · dd30346
2 parents da26815 + e2f58de
commit dd30346
Show file tree

Hide file tree

Showing 8 changed files with 158 additions and 53 deletions.
diff --git a/urbansim/developer/developer.py b/urbansim/developer/developer.py
@@ -14,7 +14,7 @@ def __init__(self, feasibility):
     def max_form(f, colname):
         """
         Assumes dataframe with hierarchical columns with first index equal to the
-        use and second index equal to the attribtue
+        use and second index equal to the attribute
 
         e.g. f.columns equal to:
         mixedoffice   building_cost
@@ -33,17 +33,26 @@ def max_form(f, colname):
         df = f.stack(level=0)[[colname]].stack().unstack(level=1).reset_index(level=1, drop=True)
         return df.idxmax(axis=1)
 
-    def keep_form_with_max_profit(self):
+    def keep_form_with_max_profit(self, forms=None):
         """
         This converts the dataframe, which shows all profitable forms,
         to the form with the greatest profit, so that more profitable
         forms outcompete less profitable forms.
+
+        Parameters
+        ----------
+        forms: list of strings
+            List of forms which compete which other.  Can leave some out.
         """
-        f = self.dset.feasibility
+        f = self.feasibility
+
+        if forms is not None:
+            f = f[forms]
+
         mu = self.max_form(f, "max_profit")
         indexes = [tuple(x) for x in mu.reset_index().values]
         df = f.stack(level=0).loc[indexes]
-        df.index.names = ["parcel_id", "use"]
+        df.index.names = ["parcel_id", "form"]
         df = df.reset_index(level=1)
         return df
 
@@ -67,23 +76,25 @@ def compute_units_to_build(num_agents, num_units, target_vacancy):
         print "Number of agent spaces: %d" % num_units
         assert target_vacancy < 1.0
         target_units = max(num_agents / (1 - target_vacancy) - num_units, 0)
-        print "Current vacancy = %.2f" % (1 - num_agents / num_units)
+        print "Current vacancy = %.2f" % (1 - num_agents / float(num_units))
         print "Target vacancy = %.2f, target of new units = %d" % (target_vacancy, target_units)
         return target_units
 
     def pick(self, form, target_units, parcel_size, ave_unit_size,
              current_units, max_parcel_size=200000, min_unit_size=400,
-             drop_after_build=True):
+             drop_after_build=True, residential=True):
         """
         Choose the buildings from the list that are feasible to build in
         order to match the specified demand.
 
         Parameters
         ----------
-        form : string
-            One of the building forms from the pro forma specification -
+        form : string or list
+            One or more of the building forms from the pro forma specification -
             e.g. "residential" or "mixedresidential" - these are configuration
-            parameters pass previously to the pro forma.
+            parameters passed previously to the pro forma.  If more than one form
+            is passed the forms compete with each other (based on profitability)
+            for which one gets built in order to meet demand.
         target_units : int
             The number of units to build.  For non-residential buildings this
             should be passed as the number of job spaces that need to be created.
@@ -110,22 +121,31 @@ def pick(self, form, target_units, parcel_size, ave_unit_size,
             Whether or not to drop parcels from consideration after they
             have been chosen for development.  Usually this is true so as
             to not develop the same parcel twice.
+        residential: bool
+            If creating non-residential buildings set this to false and developer
+            will fill in non_residential_units rather than residential_units
         """
 
-        df = self.feasibility[form]
+        if isinstance(form, list):
+            df = self.keep_form_with_max_profit(form)
+        else:
+            df = self.feasibility[form]
 
         # feasible buildings only for this building type
         df = df[df.max_profit_far > 0]
         df["parcel_size"] = parcel_size
         df = df[df.parcel_size < max_parcel_size]
-        df['new_sqft'] = df.parcel_size * df.max_profit_far
         ave_unit_size[ave_unit_size < min_unit_size] = min_unit_size
-        df['new_units'] = np.round(df.new_sqft / ave_unit_size)
         df['current_units'] = current_units
-        df['net_units'] = df.new_units - df.current_units
+        if residential:
+            df['residential_units'] = np.round(df.building_sqft / ave_unit_size)
+            df['net_units'] = df.residential_units - df.current_units
+        else:
+            df['non_residential_units'] = np.round(df.building_sqft / ave_unit_size)
+            df['net_units'] = df.non_residential_units - df.current_units
         df = df[df.net_units > 0]
 
-        print "Describe of net units\n", df.net_units.describe()
+        # print "Describe of net units\n", df.net_units.describe()
         print "Sum of net units that are profitable", df.net_units.sum()
         if df.net_units.sum() < target_units:
             print "WARNING THERE WERE NOT ENOUGH PROFITABLE UNITS TO MATCH DEMAND"
@@ -155,6 +175,5 @@ def merge(old_df, new_df):
         maxind = np.max(old_df.index.values)
         new_df.index = new_df.index + maxind + 1
         concat_df = pd.concat([old_df, new_df], verify_integrity=True)
-        print concat_df.index.name
         concat_df.index.name = 'building_id'
         return concat_df
diff --git a/urbansim/developer/sqftproforma.py b/urbansim/developer/sqftproforma.py
@@ -8,6 +8,7 @@ def _reset_defaults(self):
         self.parcel_sizes = [10000.0]
         self.fars = [.1, .25, .5, .75, 1.0, 1.5, 1.8, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 11.0]
         self.uses = ['retail', 'industrial', 'office', 'residential']
+        self.residential_uses = [False, False, False, True]
         self.forms = {
             'retail': {
                 "retail": 1.0
@@ -185,10 +186,13 @@ def _convert_types(self):
         """
         self.fars = np.array(self.fars)
         self.parking_rates = np.array([self.parking_rates[use] for use in self.uses])
+        self.res_ratios = {}
+        assert len(self.uses) == len(self.residential_uses)
         for k, v in self.forms.iteritems():
             self.forms[k] = np.array([self.forms[k].get(use, 0.0) for use in self.uses])
             # normalize if not already
             self.forms[k] /= self.forms[k].sum()
+            self.res_ratios[k] = pd.Series(self.forms[k])[self.residential_uses].sum()
         self.costs = np.transpose(np.array([self.costs[use] for use in self.uses]))
 
     @property
@@ -298,7 +302,7 @@ def _generate_lookup(self):
                         building_bulk = orig_bulk - parkingstalls * \
                             c.parking_sqft_d[parking_config]
 
-                df['build'] = building_bulk
+                df['building_sqft'] = building_bulk
 
                 parkingstalls = building_bulk * \
                     np.sum(uses_distrib * c.parking_rates) / c.sqft_per_rate
@@ -309,11 +313,11 @@ def _generate_lookup(self):
                 df['spaces'] = parkingstalls
 
                 if parking_config == 'underground':
-                    df['parksqft'] = parkingstalls * \
+                    df['park_sqft'] = parkingstalls * \
                         c.parking_sqft_d[parking_config]
                     stories = building_bulk / c.tiled_parcel_sizes
                 if parking_config == 'deck':
-                    df['parksqft'] = parkingstalls * \
+                    df['park_sqft'] = parkingstalls * \
                         c.parking_sqft_d[parking_config]
                     stories = ((building_bulk + parkingstalls *
                                 c.parking_sqft_d[parking_config]) /
@@ -322,17 +326,17 @@ def _generate_lookup(self):
                     stories = building_bulk / \
                         (c.tiled_parcel_sizes - parkingstalls *
                          c.parking_sqft_d[parking_config])
-                    df['parksqft'] = parkingstalls * \
+                    df['park_sqft'] = parkingstalls * \
                         c.parking_sqft_d[parking_config]
                     # not all fars support surface parking
                     stories[np.where(stories < 0.0)] = np.nan
 
-                df['total_sqft'] = df.build + df.parksqft
+                df['total_sqft'] = df.building_sqft + df.park_sqft
                 stories /= c.parcel_coverage
                 df['stories'] = stories
                 df['build_cost_sqft'] = self._building_cost(uses_distrib, stories)
 
-                df['build_cost'] = df.build_cost_sqft * df.build
+                df['build_cost'] = df.build_cost_sqft * df.building_sqft
                 df['park_cost'] = parking_cost
                 df['cost'] = df.build_cost + df.park_cost
 
@@ -444,7 +448,7 @@ def lookup(self, form, df, only_built=True):
         A dataframe which is indexed by the parcel ids that were passed, with the
         following columns.
 
-        building_size : Series, float
+        building_sqft : Series, float
             The number of square feet for the building to build.  Keep in mind
             this includes parking and common space.  Will need a helpful function
             to convert from gross square feet to actual usable square feet in
@@ -513,7 +517,7 @@ def twod_get(indexes, arr):
             return arr[indexes, np.arange(indexes.size)].astype('float')
 
         outdf = pd.DataFrame({
-            'building_size': twod_get(maxprofitind, building_bulks),
+            'building_sqft': twod_get(maxprofitind, building_bulks),
             'building_cost': twod_get(maxprofitind, building_costs),
             'total_cost': twod_get(maxprofitind, total_costs),
             'building_revenue': twod_get(maxprofitind, building_revenue),
@@ -524,6 +528,12 @@ def twod_get(indexes, arr):
         if only_built:
             outdf = outdf.query('max_profit > 0')
 
+        resratio = c.res_ratios[form]
+        nonresratio = 1.0 - resratio
+        outdf["residential_sqft"] = outdf.building_sqft * c.building_efficiency * resratio
+        outdf["non_residential_sqft"] = outdf.building_sqft * nonresratio
+        outdf["stories"] = outdf["max_profit_far"] / c.parcel_coverage
+
         return outdf
 
     def _debug_output(self):

diff --git a/urbansim/models/lcm.py b/urbansim/models/lcm.py
@@ -124,6 +124,7 @@ def __init__(self, model_expression, sample_size,
         self.estimation_sample_size = estimation_sample_size
         self.choice_column = choice_column
         self.name = name if name is not None else 'MNLLocationChoiceModel'
+        self.sim_pdf = None
 
         self.log_likelihoods = None
         self.fit_parameters = None
@@ -271,7 +272,7 @@ def report_fit(self):
 
         print(tbl)
 
-    def predict(self, choosers, alternatives):
+    def predict(self, choosers, alternatives, debug=False):
         """
         Choose from among alternatives for a group of agents.
 
@@ -283,6 +284,10 @@ def predict(self, choosers, alternatives):
             agent probabilities of choosing alternatives.
         alternatives : pandas.DataFrame
             Table describing the things from which agents are choosing.
+        debug : bool
+            If debug is set to true, well set the variable "sim_pdf" on
+            the object to store the probabilities for mapping of the
+            outcome.
 
         Returns
         -------
@@ -299,22 +304,33 @@ def predict(self, choosers, alternatives):
         alternatives = util.apply_filter_query(
             alternatives, self.alts_predict_filters)
 
+        if len(choosers) == 0:
+            return pd.Series()
+
         # TODO: only using 1st item in choosers for determining probabilities.
         # Need to expand options around this.
+        num_choosers = 1
         _, merged, _ = interaction.mnl_interaction_dataset(
-            choosers.head(1), alternatives, len(alternatives))
+            choosers.head(num_choosers), alternatives, len(alternatives))
         merged = util.apply_filter_query(
             merged, self.interaction_predict_filters)
         model_design = dmatrix(
             self.str_model_expression, data=merged, return_type='dataframe')
 
+        coeffs = [self.fit_parameters['Coefficient'][x] for x in model_design.columns]
+
         # probabilities are returned from mnl_simulate as a 2d array
         # and need to be flatted for use in unit_choice.
         probabilities = mnl.mnl_simulate(
             model_design.as_matrix(),
-            self.fit_parameters['Coefficient'].values,
+            coeffs,
             numalts=len(merged), returnprobs=True).flatten()
 
+        if debug:
+            # when we're not doing 1st item of choosers, this will break!
+            assert num_choosers == 1
+            self.sim_pdf = pd.Series(probabilities, index=alternatives.index)
+
         # figure out exactly which things from which choices are drawn
         alt_choices = (
             merged[self.choice_column] if self.choice_column else merged.index)
@@ -460,8 +476,9 @@ def _iter_groups(self, data):
         """
         groups = data.groupby(self.segmentation_col)
 
-        for name in self.models:
-            yield name, groups.get_group(name)
+        for name, group in groups:
+            print("Returning group %s" % str(name))
+            yield name, group
 
     def fit(self, choosers, alternatives, current_choice):
         """
@@ -500,7 +517,7 @@ def fitted(self):
         return (all(m.fitted for m in self.models.values())
                 if self.models else False)
 
-    def predict(self, choosers, alternatives):
+    def predict(self, choosers, alternatives, debug=False):
         """
         Choose from among alternatives for a group of agents after
         segmenting the `choosers` table.
@@ -514,6 +531,10 @@ def predict(self, choosers, alternatives):
             Must have a column matching the .segmentation_col attribute.
         alternatives : pandas.DataFrame
             Table describing the things from which agents are choosing.
+        debug : bool
+            If debug is set to true, well set the variable "sim_pdf" on
+            the object to store the probabilities for mapping of the
+            outcome.
 
         Returns
         -------
@@ -526,7 +547,7 @@ def predict(self, choosers, alternatives):
         results = []
 
         for name, df in self._iter_groups(choosers):
-            choices = self.models[name].predict(df, alternatives)
+            choices = self.models[name].predict(df, alternatives, debug=debug)
             # remove chosen alternatives
             alternatives = alternatives.loc[~alternatives.index.isin(choices)]
             results.append(choices)
@@ -738,7 +759,7 @@ def fitted(self):
         """
         return self._group.fitted
 
-    def predict(self, choosers, alternatives):
+    def predict(self, choosers, alternatives, debug=False):
         """
         Choose from among alternatives for a group of agents after
         segmenting the `choosers` table.
@@ -752,6 +773,10 @@ def predict(self, choosers, alternatives):
             Must have a column matching the .segmentation_col attribute.
         alternatives : pandas.DataFrame
             Table describing the things from which agents are choosing.
+        debug : bool
+            If debug is set to true, well set the variable "sim_pdf" on
+            the object to store the probabilities for mapping of the
+            outcome.
 
         Returns
         -------
@@ -765,7 +790,11 @@ def predict(self, choosers, alternatives):
             choosers, self.choosers_predict_filters)
         alternatives = util.apply_filter_query(
             alternatives, self.alts_predict_filters)
-        return self._group.predict(choosers, alternatives)
+
+        if len(choosers) == 0:
+            return pd.Series()
+
+        return self._group.predict(choosers, alternatives, debug=debug)
 
     def _process_model_dict(self, d):
         """

diff --git a/urbansim/models/regression.py b/urbansim/models/regression.py
@@ -311,6 +311,7 @@ class instance for use during prediction.
         self.model_fit = fit
         self.fit_parameters = _model_fit_to_table(fit)
         if debug:
+            index = util.apply_filter_query(data, self.fit_filters).index
             df = pd.DataFrame(
                 fit.model.exog, columns=fit.model.exog_names, index=data.index)
             df[fit.model.endog_names] = fit.model.endog