Skip to content

Commit

Permalink
Move targets/relocation rates filtering logic to models.util
Browse files Browse the repository at this point in the history
The controls table and relocation rates tables share common formats
for specifying which agents a total or rate applies to. This moves
some common handling of those formats to models.util for sharing
between transition and relocation code.
  • Loading branch information
jiffyclub committed May 5, 2014
1 parent ab2dd50 commit a6215dc
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 51 deletions.
43 changes: 1 addition & 42 deletions urbansim/models/relocation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import numbers

import numpy as np
import pandas as pd

Expand All @@ -8,40 +6,6 @@
PROB_COL = 'probability_of_relocating'


def _filterize(name, value):
"""
Turn a `name` and `value` into a string expression compatible
the ``DataFrame.query`` method.
Parameters
----------
name : str
Should be the name of a column in the table to which the
filter will be applied.
A suffix of '_max' will result in a "less than" filter,
a suffix of '_min' will result in a "greater than or equal to" filter,
and no recognized suffix will result in an "equal to" filter.
value : any
Value side of filter for comparison to column values.
Returns
-------
filter_exp : str
"""
if name.endswith('_min'):
name = name[:-4]
comp = '>='
elif name.endswith('_max'):
name = name[:-4]
comp = '<'
else:
comp = '=='

return '{} {} {!r}'.format(name, comp, value)


def find_movers(choosers, rates):
"""
Returns an array of the indexes of the `choosers` that are slated
Expand Down Expand Up @@ -85,12 +49,7 @@ def find_movers(choosers, rates):
np.zeros(len(choosers)), index=choosers.index)

for _, row in rates.iterrows():
filters = [_filterize(name, val)
for name, val in row.iteritems()
if (name != PROB_COL and
(not isinstance(val, numbers.Number) or
not np.isnan(val)))]
indexes = util.apply_filter_query(choosers, filters).index
indexes = util.filter_table(choosers, row, ignore={PROB_COL}).index
relocation_rates.loc[indexes] = row[PROB_COL]

movers = relocation_rates.index[
Expand Down
9 changes: 0 additions & 9 deletions urbansim/models/tests/test_relocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,6 @@ def rates():
'probability_of_relocating': [1, 1, 1]})


@pytest.mark.parametrize('name, val, filter_exp', [
('x', 1, 'x == 1'),
('x', 'a', "x == 'a'"),
('y_min', 2, 'y >= 2'),
('z_max', 3, 'z < 3')])
def test_filterize(name, val, filter_exp):
assert relo._filterize(name, val) == filter_exp


def test_find_movers(choosers, rates):
movers = relo.find_movers(choosers, rates)
npt.assert_array_equal(movers, ['a', 'c', 'e'])
Expand Down
37 changes: 37 additions & 0 deletions urbansim/models/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd
import pytest
from pandas.util import testing as pdt
Expand All @@ -13,6 +14,27 @@ def test_df():
index=['a', 'b', 'c', 'd', 'e'])


@pytest.fixture
def choosers():
return pd.DataFrame(
{'var1': range(5),
'var2': range(5, 10),
'var3': ['q', 'w', 'e', 'r', 't'],
'building_id': range(100, 105)},
index=['a', 'b', 'c', 'd', 'e'])


@pytest.fixture
def rates():
return pd.DataFrame(
{'var1_min': [np.nan, np.nan, np.nan],
'var1_max': [1, np.nan, np.nan],
'var2_min': [np.nan, 7, np.nan],
'var2_max': [np.nan, 8, np.nan],
'var3': [np.nan, np.nan, 't'],
'probability_of_relocating': [1, 1, 1]})


def test_apply_filter_query(test_df):
filters = ['col1 < 3', 'col2 > 6']
filtered = util.apply_filter_query(test_df, filters)
Expand Down Expand Up @@ -45,3 +67,18 @@ def test_apply_filter_query_no_filter(test_df):
filtered = util.apply_filter_query(test_df, filters)
expected = test_df
pdt.assert_frame_equal(filtered, expected)


@pytest.mark.parametrize('name, val, filter_exp', [
('x', 1, 'x == 1'),
('x', 'a', "x == 'a'"),
('y_min', 2, 'y >= 2'),
('z_max', 3, 'z < 3')])
def test_filterize(name, val, filter_exp):
assert util._filterize(name, val) == filter_exp


def test_filter_table(choosers, rates):
filtered = util.filter_table(
choosers, rates.iloc[1], ignore={'probability_of_relocating'})
pdt.assert_frame_equal(filtered, choosers.iloc[[2]])
75 changes: 75 additions & 0 deletions urbansim/models/util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
import numbers

import numpy as np
import pandas as pd


def apply_filter_query(df, filters=None):
"""
Use the DataFrame.query method to filter a table down to the
Expand All @@ -21,3 +27,72 @@ def apply_filter_query(df, filters=None):
return df.query(query)
else:
return df


def _filterize(name, value):
"""
Turn a `name` and `value` into a string expression compatible
the ``DataFrame.query`` method.
Parameters
----------
name : str
Should be the name of a column in the table to which the
filter will be applied.
A suffix of '_max' will result in a "less than" filter,
a suffix of '_min' will result in a "greater than or equal to" filter,
and no recognized suffix will result in an "equal to" filter.
value : any
Value side of filter for comparison to column values.
Returns
-------
filter_exp : str
"""
if name.endswith('_min'):
name = name[:-4]
comp = '>='
elif name.endswith('_max'):
name = name[:-4]
comp = '<'
else:
comp = '=='

return '{} {} {!r}'.format(name, comp, value)


def filter_table(table, filter_series, ignore=None):
"""
Filter a table based on a set of restrictions given in
Series of column name / filter parameter pairs. The column
names can have suffixes `_min` and `_max` to indicate
"less than" and "greater than" constraints.
Parameters
----------
table : pandas.DataFrame
Table to filter.
filter_series : pandas.Series
Series of column name / value pairs of filter constraints.
Columns that ends with '_max' will be used to create
a "less than" filters, columns that end with '_min' will be
used to create "greater than or equal to" filters.
A column with no suffix will be used to make an 'equal to' filter.
ignore : sequence of str, optional
List of column names that should not be used for filtering.
Returns
-------
filtered : pandas.DataFrame
"""
ignore = ignore if ignore else set()

filters = [_filterize(name, val)
for name, val in filter_series.iteritems()
if not (name in ignore or
(isinstance(val, numbers.Number) and
np.isnan(val)))]
return apply_filter_query(table, filters)

0 comments on commit a6215dc

Please sign in to comment.