Add unit_choice function for choosing amongst alternatives based on p…

…robabilities.
UDST · Apr 28, 2014 · 74a8aa5 · 74a8aa5
1 parent feb5b78
commit 74a8aa5
Show file tree

Hide file tree

Showing 2 changed files with 97 additions and 5 deletions.
diff --git a/urbansim/models/lcm.py b/urbansim/models/lcm.py
@@ -1,13 +1,71 @@
-from __future__ import print_function
+from __future__ import print_function, division
 
 import numpy as np
+import pandas as pd
 from patsy import dmatrix
 from prettytable import PrettyTable
 
 from . import util
 from ..urbanchoice import interaction, mnl
 
 
+def unit_choice(chooser_ids, alternative_ids, probabilities):
+    """
+    Have a set of choosers choose from among alternatives according
+    to a probability distribution. Choice is binary: each
+    alternative can only be chosen once.
+
+    Parameters
+    ----------
+    chooser_ids : array_like
+        Array of IDs of the agents that are making choices.
+    alternative_ids : array_like
+        Array of IDs of alternatives among which agents are making choices.
+    probabilities : array_like
+        The probability that an agent will choose an alternative.
+        Must be the same shape as `alternative_ids`. Unavailable
+        alternatives should have a probability of 0.
+
+    Returns
+    -------
+    choices : pandas.Series
+        Mapping of chooser ID to alternative ID. Some choosers
+        will map to a nan value when there are not enough alternatives
+        for all the choosers.
+
+    """
+    chooser_ids = np.asanyarray(chooser_ids)
+    alternative_ids = np.asanyarray(alternative_ids)
+    probabilities = np.asanyarray(probabilities)
+
+    choices = pd.Series([np.nan] * len(chooser_ids), index=chooser_ids)
+
+    if probabilities.sum() == 0:
+        # return all nan if there are no available units
+        return choices
+
+    # probabilities need to sum to 1 for np.random.choice
+    probabilities = probabilities / probabilities.sum()
+
+    # need to see if there are as many available alternatives as choosers
+    n_available = np.count_nonzero(probabilities)
+    n_choosers = len(chooser_ids)
+    n_to_choose = n_choosers if n_choosers < n_available else n_available
+
+    chosen = np.random.choice(
+        alternative_ids, size=n_to_choose, replace=False, p=probabilities)
+
+    # if there are fewer available units than choosers we need to pick
+    # which choosers get a unit
+    if n_to_choose == n_available:
+        chooser_ids = np.random.choice(
+            chooser_ids, size=n_to_choose, replace=False)
+
+    choices[chooser_ids] = chosen
+
+    return choices
+
+
 class LocationChoiceModel(object):
     """
     A location choice model with the ability to store an estimated

diff --git a/urbansim/models/tests/test_lcm.py b/urbansim/models/tests/test_lcm.py
@@ -1,7 +1,8 @@
+import numpy.testing as npt
 import pandas as pd
 import pytest
 
-from .. import LocationChoiceModel
+from .. import lcm
 
 
 @pytest.fixture
@@ -19,13 +20,46 @@ def alternatives():
         index=range(10))
 
 
+def test_unit_choice_uniform(choosers, alternatives):
+    probabilities = [1] * len(alternatives)
+    choices = lcm.unit_choice(
+        choosers.index, alternatives.index, probabilities)
+    npt.assert_array_equal(choices.index, choosers.index)
+    assert choices.isin(alternatives.index).all()
+
+
+def test_unit_choice_some_zero(choosers, alternatives):
+    probabilities = [0, 1, 0, 1, 1, 0, 1, 0, 0, 1]
+    choices = lcm.unit_choice(
+        choosers.index, alternatives.index, probabilities)
+    npt.assert_array_equal(choices.index, choosers.index)
+    npt.assert_array_equal(sorted(choices.values), [1, 3, 4, 6, 9])
+
+
+def test_unit_choice_not_enough(choosers, alternatives):
+    probabilities = [0, 0, 0, 0, 0, 1, 0, 1, 0, 0]
+    choices = lcm.unit_choice(
+        choosers.index, alternatives.index, probabilities)
+    npt.assert_array_equal(choices.index, choosers.index)
+    assert choices.isnull().sum() == 3
+    npt.assert_array_equal(sorted(choices[~choices.isnull()]), [5, 7])
+
+
+def test_unit_choice_none_available(choosers, alternatives):
+    probabilities = [0] * len(alternatives)
+    choices = lcm.unit_choice(
+        choosers.index, alternatives.index, probabilities)
+    npt.assert_array_equal(choices.index, choosers.index)
+    assert choices.isnull().all()
+
+
 def test_lcm(choosers, alternatives):
-    lcm = LocationChoiceModel(
+    model = lcm.LocationChoiceModel(
         ['var3 != 15'], ['var2 != 14'], 'var2 + var1:var3', 5,
         name='Test LCM')
-    loglik = lcm.fit(choosers, alternatives, choosers.thing_id)
+    loglik = model.fit(choosers, alternatives, choosers.thing_id)
 
     # hard to test things exactly because there's some randomness
     # involved, but can at least do a smoke test.
     assert len(loglik) == 3
-    assert len(lcm.fit_results) == 2
+    assert len(model.fit_results) == 2