Initial pass at a SegmentedMNLLocationChoiceModel class.

UDST · May 29, 2014 · f852d8b · f852d8b
1 parent 007c3d3
commit f852d8b
Show file tree

Hide file tree

Showing 3 changed files with 198 additions and 1 deletion.
diff --git a/urbansim/models/lcm.py b/urbansim/models/lcm.py
@@ -530,3 +530,167 @@ def predict(self, choosers, alternatives):
             results.append(choices)
 
         return pd.concat(results)
+
+
+class SegmentedMNLLocationChoiceModel(object):
+    """
+    An MNL LCM group that allows segments to have different model expressions
+    but otherwise share configurations.
+
+    Parameters
+    ----------
+    segmentation_col
+        Name of column in the choosers table that will be used for groupby.
+    sample_size : int
+        Number of choices to sample for estimating the model.
+    choosers_fit_filters : list of str, optional
+        Filters applied to choosers table before fitting the model.
+    choosers_predict_filters : list of str, optional
+        Filters applied to the choosers table before calculating
+        new data points.
+    alts_fit_filters : list of str, optional
+        Filters applied to the alternatives table before fitting the model.
+    alts_predict_filters : list of str, optional
+        Filters applied to the alternatives table before calculating
+        new data points.
+    interaction_predict_filters : list of str, optional
+        Filters applied to the merged choosers/alternatives table
+        before predicting agent choices.
+    estimation_sample_size : int, optional, whether to sample choosers
+        during estimation (needs to be applied after choosers_fit_filters)
+    choice_column : optional
+        Name of the column in the `alternatives` table that choosers
+        should choose. e.g. the 'building_id' column. If not provided
+        the alternatives index is used.
+    default_model_expr : str, iterable, or dict, optional
+        A patsy model expression. Should contain only a right-hand side.
+
+    """
+    def __init__(self, segmentation_col, sample_size,
+                 choosers_fit_filters=None, choosers_predict_filters=None,
+                 alts_fit_filters=None, alts_predict_filters=None,
+                 interaction_predict_filters=None,
+                 estimation_sample_size=None,
+                 choice_column=None, default_model_expr=None):
+        self.segmentation_col = segmentation_col
+        self.sample_size = sample_size
+        self.choosers_fit_filters = choosers_fit_filters
+        self.choosers_predict_filters = choosers_predict_filters
+        self.alts_fit_filters = alts_fit_filters
+        self.alts_predict_filters = alts_predict_filters
+        self.interaction_predict_filters = interaction_predict_filters
+        self.estimation_sample_size = estimation_sample_size
+        self.choice_column = choice_column
+        self.default_model_expr = default_model_expr
+        self._group = MNLLocationChoiceModelGroup(segmentation_col)
+
+    def add_segment(self, name, model_expression=None):
+        """
+        Add a new segment with its own model expression.
+
+        Parameters
+        ----------
+        name
+            Segment name. Must match a segment in the groupby of the data.
+        model_expression : str or dict, optional
+            A patsy model expression that can be used with statsmodels.
+            Should contain both the left- and right-hand sides.
+            If not given the default model will be used, which must not be
+            None.
+
+        """
+        if not model_expression:
+            if not self.default_model_expr:
+                raise ValueError(
+                    'No default model available, '
+                    'you must supply a model experssion.')
+            model_expression = self.default_model_expr
+
+        # we'll take care of some of the filtering this side before
+        # segmentation
+        self._group.add_model_from_params(
+            name=name,
+            model_expression=model_expression,
+            sample_size=self.sample_size,
+            choosers_fit_filters=None,
+            choosers_predict_filters=None,
+            alts_fit_filters=None,
+            alts_predict_filters=None,
+            interaction_predict_filters=self.interaction_predict_filters,
+            estimation_sample_size=self.estimation_sample_size,
+            choice_column=self.choice_column)
+
+    def fit(self, choosers, alternatives, current_choice):
+        """
+        Fit and save models based on given data after segmenting
+        the `choosers` table. Segments that have not already been explicitly
+        added will be automatically added with default model.
+
+        Parameters
+        ----------
+        choosers : pandas.DataFrame
+            Table describing the agents making choices, e.g. households.
+            Must have a column with the same name as the .segmentation_col
+            attribute.
+        alternatives : pandas.DataFrame
+            Table describing the things from which agents are choosing,
+            e.g. buildings.
+        current_choice
+            Name of column in `choosers` that indicates which alternative
+            they have currently chosen.
+
+        Returns
+        -------
+        log_likelihoods : dict of dict
+            Keys will be model names and values will be dictionaries of
+            log-liklihood values as returned by MNLLocationChoiceModel.fit.
+
+        """
+        unique = choosers[self.segmentation_col].unique()
+
+        for x in unique:
+            if x not in self._group.models:
+                self.add_segment(x)
+
+        choosers = util.apply_filter_query(choosers, self.choosers_fit_filters)
+        alternatives = util.apply_filter_query(
+            alternatives, self.alts_fit_filters)
+
+        return self._group.fit(choosers, alternatives, current_choice)
+
+    @property
+    def fitted(self):
+        """
+        Whether models for all segments have been fit.
+
+        """
+        return self._group.fitted
+
+    def predict(self, choosers, alternatives):
+        """
+        Choose from among alternatives for a group of agents after
+        segmenting the `choosers` table.
+
+        Parameters
+        ----------
+        choosers : pandas.DataFrame
+            Table describing the agents making choices, e.g. households.
+            Only the first item in this table is used for determining
+            agent probabilities of choosing alternatives.
+            Must have a column matching the .segmentation_col attribute.
+        alternatives : pandas.DataFrame
+            Table describing the things from which agents are choosing.
+
+        Returns
+        -------
+        choices : pandas.Series
+            Mapping of chooser ID to alternative ID. Some choosers
+            will map to a nan value when there are not enough alternatives
+            for all the choosers.
+
+        """
+        choosers = util.apply_filter_query(
+            choosers, self.choosers_predict_filters)
+        alternatives = util.apply_filter_query(
+            alternatives, self.alts_predict_filters)
+        return self._group.predict(choosers, alternatives)
diff --git a/urbansim/models/regression.py b/urbansim/models/regression.py
@@ -685,6 +685,10 @@ def fit(self, data):
 
     @property
     def fitted(self):
+        """
+        Whether models for all segments have been fit.
+
+        """
         return self._group.fitted
 
     def predict(self, data):

diff --git a/urbansim/models/tests/test_lcm.py b/urbansim/models/tests/test_lcm.py
@@ -140,7 +140,7 @@ def test_mnl_lcm_repeated_alts(choosers, alternatives):
 
 def test_mnl_lcm_group(grouped_choosers, alternatives):
     model_exp = 'var2 + var1:var3'
-    sample_size = 2
+    sample_size = 4
 
     group = lcm.MNLLocationChoiceModelGroup('group')
     group.add_model_from_params('x', model_exp, sample_size)
@@ -157,3 +157,32 @@ def test_mnl_lcm_group(grouped_choosers, alternatives):
 
     assert len(choices.unique()) == len(choices)
     assert choices.isin(alternatives.index).all()
+
+
+def test_mnl_lcm_segmented_raises():
+    group = lcm.SegmentedMNLLocationChoiceModel('group', 2)
+
+    with pytest.raises(ValueError):
+        group.add_segment('x')
+
+
+def test_mnl_lcm_segmented(grouped_choosers, alternatives):
+    model_exp = 'var2 + var1:var3'
+    sample_size = 4
+
+    group = lcm.SegmentedMNLLocationChoiceModel(
+        'group', sample_size, default_model_expr=model_exp)
+    group.add_segment('x')
+    group.add_segment('y', 'var3 + var1:var2')
+
+    assert group.fitted is False
+    logliks = group.fit(grouped_choosers, alternatives, 'thing_id')
+    assert group.fitted is True
+
+    assert 'x' in logliks and 'y' in logliks
+    assert isinstance(logliks['x'], dict) and isinstance(logliks['y'], dict)
+
+    choices = group.predict(grouped_choosers, alternatives)
+
+    assert len(choices.unique()) == len(choices)
+    assert choices.isin(alternatives.index).all()