Skip to content

Commit

Permalink
Move apply_filter_query function to models.util module for easy shari…
Browse files Browse the repository at this point in the history
…ng between model classes.
  • Loading branch information
jiffyclub committed Apr 18, 2014
1 parent 4193f12 commit 11913c6
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 69 deletions.
30 changes: 3 additions & 27 deletions urbansim/models/hedonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,10 @@
import pandas as pd
import statsmodels.formula.api as smf

from . import util
from .. exceptions import ModelEvaluationError


def apply_filter_query(df, filters=None):
"""
Use the DataFrame.query method to filter a table down to the
desired rows.
Parameters
----------
df : pandas.DataFrame
filters : list of str, optional
List of filters to apply. Will be joined together with
' and ' and passed to DataFrame.query.
If not supplied no filtering will be done.
Returns
-------
filtered_df : pandas.DataFrame
"""
if filters:
query = ' and '.join(filters)
return df.query(query)
else:
return df


def fit_model(df, filters, model_expression):
"""
Use statsmodels to construct a model relation.
Expand All @@ -50,7 +26,7 @@ def fit_model(df, filters, model_expression):
fit : statsmodels.regression.linear_model.OLSResults
"""
df = apply_filter_query(df, filters)
df = util.apply_filter_query(df, filters)
model = smf.ols(formula=model_expression, data=df)
return model.fit()

Expand Down Expand Up @@ -81,7 +57,7 @@ def predict(df, filters, model_fit, ytransform=None):
after applying filters.
"""
df = apply_filter_query(df, filters)
df = util.apply_filter_query(df, filters)
sim_data = model_fit.predict(df)

if len(sim_data) != len(df):
Expand Down
19 changes: 11 additions & 8 deletions urbansim/models/lcm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from patsy import dmatrix

from . import util
from ..urbanchoice import interaction, mnl


Expand All @@ -11,10 +12,11 @@ class LocationChoiceModel(object):
Parameters
----------
fit_filters : list of str
Filters applied before fitting the model.
predict_filters : list of str
Filters applied before calculating new data points.
alts_fit_filters : list of str
Filters applied to the alternatives table before fitting the model.
alts_predict_filters : list of str
Filters applied to the alternatives table before calculating
new data points.
model_expression : str
A patsy model expression. Should contain only a right-hand side.
sample_size : int
Expand All @@ -24,10 +26,10 @@ class LocationChoiceModel(object):
in output.
"""
def __init__(self, fit_filters, predict_filters, model_expression,
sample_size, name=None):
self.fit_filters = fit_filters
self.predict_filters = predict_filters
def __init__(self, alts_fit_filters, alts_predict_filters,
model_expression, sample_size, name=None):
self.alts_fit_filters = alts_fit_filters
self.alts_predict_filters = alts_predict_filters
# LCMs never have a constant
self.model_expression = model_expression + ' - 1'
self.sample_size = sample_size
Expand Down Expand Up @@ -59,6 +61,7 @@ def fit(self, choosers, alternatives, current_choice):
Log-liklihood ratio
"""
alternatives = util.apply_filter_query(self.alts_fit_filters)
_, merged, chosen = interaction.mnl_interaction_dataset(
choosers, alternatives, self.sample_size, current_choice)
model_design = dmatrix(
Expand Down
34 changes: 0 additions & 34 deletions urbansim/models/tests/test_hedonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,40 +22,6 @@ def groupby_df(test_df):
return test_df


def test_apply_filter_query(test_df):
filters = ['col1 < 3', 'col2 > 6']
filtered = hedonic.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [2], 'col2': [7]},
index=['c'])
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_empty(test_df):
filters = ['col1 < 1', 'col2 > 8']
filtered = hedonic.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [], 'col2': []},
index=[])
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_or(test_df):
filters = ['col1 < 1 or col2 > 8']
filtered = hedonic.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [0, 4], 'col2': [5, 9]},
index=['a', 'e'])
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_no_filter(test_df):
filters = []
filtered = hedonic.apply_filter_query(test_df, filters)
expected = test_df
pdt.assert_frame_equal(filtered, expected)


def test_fit_model(test_df):
filters = []
model_exp = 'col1 ~ col2'
Expand Down
47 changes: 47 additions & 0 deletions urbansim/models/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pandas as pd
import pytest
from pandas.util import testing as pdt

from .. import util


@pytest.fixture
def test_df():
return pd.DataFrame(
{'col1': range(5),
'col2': range(5, 10)},
index=['a', 'b', 'c', 'd', 'e'])


def test_apply_filter_query(test_df):
filters = ['col1 < 3', 'col2 > 6']
filtered = util.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [2], 'col2': [7]},
index=['c'])
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_empty(test_df):
filters = ['col1 < 1', 'col2 > 8']
filtered = util.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [], 'col2': []},
index=[])
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_or(test_df):
filters = ['col1 < 1 or col2 > 8']
filtered = util.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [0, 4], 'col2': [5, 9]},
index=['a', 'e'])
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_no_filter(test_df):
filters = []
filtered = util.apply_filter_query(test_df, filters)
expected = test_df
pdt.assert_frame_equal(filtered, expected)
23 changes: 23 additions & 0 deletions urbansim/models/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
def apply_filter_query(df, filters=None):
"""
Use the DataFrame.query method to filter a table down to the
desired rows.
Parameters
----------
df : pandas.DataFrame
filters : list of str, optional
List of filters to apply. Will be joined together with
' and ' and passed to DataFrame.query.
If not supplied no filtering will be done.
Returns
-------
filtered_df : pandas.DataFrame
"""
if filters:
query = ' and '.join(filters)
return df.query(query)
else:
return df

0 comments on commit 11913c6

Please sign in to comment.