Move apply_filter_query function to models.util module for easy shari…

…ng between model classes.
UDST · Apr 18, 2014 · 11913c6 · 11913c6
1 parent 4193f12
commit 11913c6
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 69 deletions.
diff --git a/urbansim/models/hedonic.py b/urbansim/models/hedonic.py
@@ -2,34 +2,10 @@
 import pandas as pd
 import statsmodels.formula.api as smf
 
+from . import util
 from .. exceptions import ModelEvaluationError
 
 
-def apply_filter_query(df, filters=None):
-    """
-    Use the DataFrame.query method to filter a table down to the
-    desired rows.
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-    filters : list of str, optional
-        List of filters to apply. Will be joined together with
-        ' and ' and passed to DataFrame.query.
-        If not supplied no filtering will be done.
-
-    Returns
-    -------
-    filtered_df : pandas.DataFrame
-
-    """
-    if filters:
-        query = ' and '.join(filters)
-        return df.query(query)
-    else:
-        return df
-
-
 def fit_model(df, filters, model_expression):
     """
     Use statsmodels to construct a model relation.
@@ -50,7 +26,7 @@ def fit_model(df, filters, model_expression):
     fit : statsmodels.regression.linear_model.OLSResults
 
     """
-    df = apply_filter_query(df, filters)
+    df = util.apply_filter_query(df, filters)
     model = smf.ols(formula=model_expression, data=df)
     return model.fit()
 
@@ -81,7 +57,7 @@ def predict(df, filters, model_fit, ytransform=None):
         after applying filters.
 
     """
-    df = apply_filter_query(df, filters)
+    df = util.apply_filter_query(df, filters)
     sim_data = model_fit.predict(df)
 
     if len(sim_data) != len(df):

diff --git a/urbansim/models/lcm.py b/urbansim/models/lcm.py
@@ -1,6 +1,7 @@
 import numpy as np
 from patsy import dmatrix
 
+from . import util
 from ..urbanchoice import interaction, mnl
 
 
@@ -11,10 +12,11 @@ class LocationChoiceModel(object):
 
     Parameters
     ----------
-    fit_filters : list of str
-        Filters applied before fitting the model.
-    predict_filters : list of str
-        Filters applied before calculating new data points.
+    alts_fit_filters : list of str
+        Filters applied to the alternatives table before fitting the model.
+    alts_predict_filters : list of str
+        Filters applied to the alternatives table before calculating
+        new data points.
     model_expression : str
         A patsy model expression. Should contain only a right-hand side.
     sample_size : int
@@ -24,10 +26,10 @@ class LocationChoiceModel(object):
         in output.
 
     """
-    def __init__(self, fit_filters, predict_filters, model_expression,
-                 sample_size, name=None):
-        self.fit_filters = fit_filters
-        self.predict_filters = predict_filters
+    def __init__(self, alts_fit_filters, alts_predict_filters,
+                 model_expression, sample_size, name=None):
+        self.alts_fit_filters = alts_fit_filters
+        self.alts_predict_filters = alts_predict_filters
         # LCMs never have a constant
         self.model_expression = model_expression + ' - 1'
         self.sample_size = sample_size
@@ -59,6 +61,7 @@ def fit(self, choosers, alternatives, current_choice):
             Log-liklihood ratio
 
         """
+        alternatives = util.apply_filter_query(self.alts_fit_filters)
         _, merged, chosen = interaction.mnl_interaction_dataset(
             choosers, alternatives, self.sample_size, current_choice)
         model_design = dmatrix(

diff --git a/urbansim/models/tests/test_hedonic.py b/urbansim/models/tests/test_hedonic.py
@@ -22,40 +22,6 @@ def groupby_df(test_df):
     return test_df
 
 
-def test_apply_filter_query(test_df):
-    filters = ['col1 < 3', 'col2 > 6']
-    filtered = hedonic.apply_filter_query(test_df, filters)
-    expected = pd.DataFrame(
-        {'col1': [2], 'col2': [7]},
-        index=['c'])
-    pdt.assert_frame_equal(filtered, expected)
-
-
-def test_apply_filter_query_empty(test_df):
-    filters = ['col1 < 1', 'col2 > 8']
-    filtered = hedonic.apply_filter_query(test_df, filters)
-    expected = pd.DataFrame(
-        {'col1': [], 'col2': []},
-        index=[])
-    pdt.assert_frame_equal(filtered, expected)
-
-
-def test_apply_filter_query_or(test_df):
-    filters = ['col1 < 1 or col2 > 8']
-    filtered = hedonic.apply_filter_query(test_df, filters)
-    expected = pd.DataFrame(
-        {'col1': [0, 4], 'col2': [5, 9]},
-        index=['a', 'e'])
-    pdt.assert_frame_equal(filtered, expected)
-
-
-def test_apply_filter_query_no_filter(test_df):
-    filters = []
-    filtered = hedonic.apply_filter_query(test_df, filters)
-    expected = test_df
-    pdt.assert_frame_equal(filtered, expected)
-
-
 def test_fit_model(test_df):
     filters = []
     model_exp = 'col1 ~ col2'

diff --git a/urbansim/models/tests/test_util.py b/urbansim/models/tests/test_util.py
@@ -0,0 +1,47 @@
+import pandas as pd
+import pytest
+from pandas.util import testing as pdt
+
+from .. import util
+
+
+@pytest.fixture
+def test_df():
+    return pd.DataFrame(
+        {'col1': range(5),
+         'col2': range(5, 10)},
+        index=['a', 'b', 'c', 'd', 'e'])
+
+
+def test_apply_filter_query(test_df):
+    filters = ['col1 < 3', 'col2 > 6']
+    filtered = util.apply_filter_query(test_df, filters)
+    expected = pd.DataFrame(
+        {'col1': [2], 'col2': [7]},
+        index=['c'])
+    pdt.assert_frame_equal(filtered, expected)
+
+
+def test_apply_filter_query_empty(test_df):
+    filters = ['col1 < 1', 'col2 > 8']
+    filtered = util.apply_filter_query(test_df, filters)
+    expected = pd.DataFrame(
+        {'col1': [], 'col2': []},
+        index=[])
+    pdt.assert_frame_equal(filtered, expected)
+
+
+def test_apply_filter_query_or(test_df):
+    filters = ['col1 < 1 or col2 > 8']
+    filtered = util.apply_filter_query(test_df, filters)
+    expected = pd.DataFrame(
+        {'col1': [0, 4], 'col2': [5, 9]},
+        index=['a', 'e'])
+    pdt.assert_frame_equal(filtered, expected)
+
+
+def test_apply_filter_query_no_filter(test_df):
+    filters = []
+    filtered = util.apply_filter_query(test_df, filters)
+    expected = test_df
+    pdt.assert_frame_equal(filtered, expected)
diff --git a/urbansim/models/util.py b/urbansim/models/util.py
@@ -0,0 +1,23 @@
+def apply_filter_query(df, filters=None):
+    """
+    Use the DataFrame.query method to filter a table down to the
+    desired rows.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+    filters : list of str, optional
+        List of filters to apply. Will be joined together with
+        ' and ' and passed to DataFrame.query.
+        If not supplied no filtering will be done.
+
+    Returns
+    -------
+    filtered_df : pandas.DataFrame
+
+    """
+    if filters:
+        query = ' and '.join(filters)
+        return df.query(query)
+    else:
+        return df