Move targets/relocation rates filtering logic to models.util

The controls table and relocation rates tables share common formats for specifying which agents a total or rate applies to. This moves some common handling of those formats to models.util for sharing between transition and relocation code.
UDST · May 5, 2014 · a6215dc · a6215dc
1 parent ab2dd50
commit a6215dc
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 51 deletions.
diff --git a/urbansim/models/relocation.py b/urbansim/models/relocation.py
@@ -1,5 +1,3 @@
-import numbers
-
 import numpy as np
 import pandas as pd
 
@@ -8,40 +6,6 @@
 PROB_COL = 'probability_of_relocating'
 
 
-def _filterize(name, value):
-    """
-    Turn a `name` and `value` into a string expression compatible
-    the ``DataFrame.query`` method.
-
-    Parameters
-    ----------
-    name : str
-        Should be the name of a column in the table to which the
-        filter will be applied.
-
-        A suffix of '_max' will result in a "less than" filter,
-        a suffix of '_min' will result in a "greater than or equal to" filter,
-        and no recognized suffix will result in an "equal to" filter.
-    value : any
-        Value side of filter for comparison to column values.
-
-    Returns
-    -------
-    filter_exp : str
-
-    """
-    if name.endswith('_min'):
-        name = name[:-4]
-        comp = '>='
-    elif name.endswith('_max'):
-        name = name[:-4]
-        comp = '<'
-    else:
-        comp = '=='
-
-    return '{} {} {!r}'.format(name, comp, value)
-
-
 def find_movers(choosers, rates):
     """
     Returns an array of the indexes of the `choosers` that are slated
@@ -85,12 +49,7 @@ def find_movers(choosers, rates):
         np.zeros(len(choosers)), index=choosers.index)
 
     for _, row in rates.iterrows():
-        filters = [_filterize(name, val)
-                   for name, val in row.iteritems()
-                   if (name != PROB_COL and
-                       (not isinstance(val, numbers.Number) or
-                        not np.isnan(val)))]
-        indexes = util.apply_filter_query(choosers, filters).index
+        indexes = util.filter_table(choosers, row, ignore={PROB_COL}).index
         relocation_rates.loc[indexes] = row[PROB_COL]
 
     movers = relocation_rates.index[

diff --git a/urbansim/models/tests/test_relocation.py b/urbansim/models/tests/test_relocation.py
@@ -27,15 +27,6 @@ def rates():
          'probability_of_relocating': [1, 1, 1]})
 
 
-@pytest.mark.parametrize('name, val, filter_exp', [
-    ('x', 1, 'x == 1'),
-    ('x', 'a', "x == 'a'"),
-    ('y_min', 2, 'y >= 2'),
-    ('z_max', 3, 'z < 3')])
-def test_filterize(name, val, filter_exp):
-    assert relo._filterize(name, val) == filter_exp
-
-
 def test_find_movers(choosers, rates):
     movers = relo.find_movers(choosers, rates)
     npt.assert_array_equal(movers, ['a', 'c', 'e'])

diff --git a/urbansim/models/tests/test_util.py b/urbansim/models/tests/test_util.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 import pytest
 from pandas.util import testing as pdt
@@ -13,6 +14,27 @@ def test_df():
         index=['a', 'b', 'c', 'd', 'e'])
 
 
+@pytest.fixture
+def choosers():
+    return pd.DataFrame(
+        {'var1': range(5),
+         'var2': range(5, 10),
+         'var3': ['q', 'w', 'e', 'r', 't'],
+         'building_id': range(100, 105)},
+        index=['a', 'b', 'c', 'd', 'e'])
+
+
+@pytest.fixture
+def rates():
+    return pd.DataFrame(
+        {'var1_min': [np.nan, np.nan, np.nan],
+         'var1_max': [1, np.nan, np.nan],
+         'var2_min': [np.nan, 7, np.nan],
+         'var2_max': [np.nan, 8, np.nan],
+         'var3': [np.nan, np.nan, 't'],
+         'probability_of_relocating': [1, 1, 1]})
+
+
 def test_apply_filter_query(test_df):
     filters = ['col1 < 3', 'col2 > 6']
     filtered = util.apply_filter_query(test_df, filters)
@@ -45,3 +67,18 @@ def test_apply_filter_query_no_filter(test_df):
     filtered = util.apply_filter_query(test_df, filters)
     expected = test_df
     pdt.assert_frame_equal(filtered, expected)
+
+
+@pytest.mark.parametrize('name, val, filter_exp', [
+    ('x', 1, 'x == 1'),
+    ('x', 'a', "x == 'a'"),
+    ('y_min', 2, 'y >= 2'),
+    ('z_max', 3, 'z < 3')])
+def test_filterize(name, val, filter_exp):
+    assert util._filterize(name, val) == filter_exp
+
+
+def test_filter_table(choosers, rates):
+    filtered = util.filter_table(
+        choosers, rates.iloc[1], ignore={'probability_of_relocating'})
+    pdt.assert_frame_equal(filtered, choosers.iloc[[2]])
diff --git a/urbansim/models/util.py b/urbansim/models/util.py
@@ -1,3 +1,9 @@
+import numbers
+
+import numpy as np
+import pandas as pd
+
+
 def apply_filter_query(df, filters=None):
     """
     Use the DataFrame.query method to filter a table down to the
@@ -21,3 +27,72 @@ def apply_filter_query(df, filters=None):
         return df.query(query)
     else:
         return df
+
+
+def _filterize(name, value):
+    """
+    Turn a `name` and `value` into a string expression compatible
+    the ``DataFrame.query`` method.
+
+    Parameters
+    ----------
+    name : str
+        Should be the name of a column in the table to which the
+        filter will be applied.
+
+        A suffix of '_max' will result in a "less than" filter,
+        a suffix of '_min' will result in a "greater than or equal to" filter,
+        and no recognized suffix will result in an "equal to" filter.
+    value : any
+        Value side of filter for comparison to column values.
+
+    Returns
+    -------
+    filter_exp : str
+
+    """
+    if name.endswith('_min'):
+        name = name[:-4]
+        comp = '>='
+    elif name.endswith('_max'):
+        name = name[:-4]
+        comp = '<'
+    else:
+        comp = '=='
+
+    return '{} {} {!r}'.format(name, comp, value)
+
+
+def filter_table(table, filter_series, ignore=None):
+    """
+    Filter a table based on a set of restrictions given in
+    Series of column name / filter parameter pairs. The column
+    names can have suffixes `_min` and `_max` to indicate
+    "less than" and "greater than" constraints.
+
+    Parameters
+    ----------
+    table : pandas.DataFrame
+        Table to filter.
+    filter_series : pandas.Series
+        Series of column name / value pairs of filter constraints.
+        Columns that ends with '_max' will be used to create
+        a "less than" filters, columns that end with '_min' will be
+        used to create "greater than or equal to" filters.
+        A column with no suffix will be used to make an 'equal to' filter.
+    ignore : sequence of str, optional
+        List of column names that should not be used for filtering.
+
+    Returns
+    -------
+    filtered : pandas.DataFrame
+
+    """
+    ignore = ignore if ignore else set()
+
+    filters = [_filterize(name, val)
+               for name, val in filter_series.iteritems()
+               if not (name in ignore or
+                       (isinstance(val, numbers.Number) and
+                        np.isnan(val)))]
+    return apply_filter_query(table, filters)