Skip to content

Commit

Permalink
Merge branch 'master' into developer
Browse files Browse the repository at this point in the history
Conflicts:
	urbansim/models/regression.py
  • Loading branch information
fscottfoti committed Jun 20, 2014
2 parents e9f6316 + da26815 commit e2f58de
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 19 deletions.
25 changes: 21 additions & 4 deletions urbansim/models/lcm.py
Expand Up @@ -587,14 +587,16 @@ class SegmentedMNLLocationChoiceModel(object):
the alternatives index is used.
default_model_expr : str, iterable, or dict, optional
A patsy model expression. Should contain only a right-hand side.
name : str, optional
An optional string used to identify the model in places.
"""
def __init__(self, segmentation_col, sample_size,
choosers_fit_filters=None, choosers_predict_filters=None,
alts_fit_filters=None, alts_predict_filters=None,
interaction_predict_filters=None,
estimation_sample_size=None,
choice_column=None, default_model_expr=None):
choice_column=None, default_model_expr=None, name=None):
self.segmentation_col = segmentation_col
self.sample_size = sample_size
self.choosers_fit_filters = choosers_fit_filters
Expand All @@ -606,6 +608,8 @@ def __init__(self, segmentation_col, sample_size,
self.choice_column = choice_column
self.default_model_expr = default_model_expr
self._group = MNLLocationChoiceModelGroup(segmentation_col)
self.name = (name if name is not None else
'SegmentedMNLLocationChoiceModel')

@classmethod
def from_yaml(cls, yaml_str=None, str_or_buffer=None):
Expand Down Expand Up @@ -639,7 +643,8 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None):
cfg['interaction_predict_filters'],
cfg['estimation_sample_size'],
cfg['choice_column'],
default_model_expr)
default_model_expr,
cfg['name'])

if "models" not in cfg:
cfg["models"] = {}
Expand Down Expand Up @@ -729,6 +734,14 @@ def fit(self, choosers, alternatives, current_choice):

unique = choosers[self.segmentation_col].unique()

# Remove any existing segments that may no longer have counterparts
# in the data. This can happen when loading a saved model and then
# calling this method with data that no longer has segments that
# were there the last time this was called.
gone = set(self._group.models) - set(unique)
for g in gone:
del self._group.models[g]

for x in unique:
if x not in self._group.models:
self.add_segment(x)
Expand Down Expand Up @@ -823,6 +836,7 @@ def to_dict(self):
"""
return {
'model_type': 'segmented_locationchoice',
'name': self.name,
'segmentation_col': self.segmentation_col,
'sample_size': self.sample_size,
'choosers_fit_filters': self.choosers_fit_filters,
Expand All @@ -836,8 +850,11 @@ def to_dict(self):
'model_expression': self.default_model_expr,
},
'fitted': self.fitted,
'models': {yamlio.to_scalar_safe(name): self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
'models': {
yamlio.to_scalar_safe(name):
self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()
}
}

def to_yaml(self, str_or_buffer=None):
Expand Down
33 changes: 27 additions & 6 deletions urbansim/models/regression.py
Expand Up @@ -312,7 +312,8 @@ class instance for use during prediction.
self.fit_parameters = _model_fit_to_table(fit)
if debug:
index = util.apply_filter_query(data, self.fit_filters).index
df = pd.DataFrame(fit.model.exog, columns=fit.model.exog_names, index=index)
df = pd.DataFrame(
fit.model.exog, columns=fit.model.exog_names, index=data.index)
df[fit.model.endog_names] = fit.model.endog
df["fittedvalues"] = fit.fittedvalues
df["residuals"] = fit.resid
Expand Down Expand Up @@ -589,18 +590,24 @@ class SegmentedRegressionModel(object):
the results reflect actual price.
By default no transformation is applied.
min_segment_size : int, optional
Segments with less than this many members will be skipped.
name : str, optional
A name used in places to identify the model.
"""
def __init__(
self, segmentation_col, fit_filters=None, predict_filters=None,
default_model_expr=None, default_ytransform=None, min_segment_size=0):
default_model_expr=None, default_ytransform=None,
min_segment_size=0, name=None):
self.segmentation_col = segmentation_col
self._group = RegressionModelGroup(segmentation_col)
self.fit_filters = fit_filters
self.predict_filters = predict_filters
self.default_model_expr = default_model_expr
self.default_ytransform = default_ytransform
self.min_segment_size = min_segment_size
self.name = name if name is not None else 'SegmentedRegressionModel'

@classmethod
def from_yaml(cls, yaml_str=None, str_or_buffer=None):
Expand Down Expand Up @@ -628,7 +635,8 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None):
seg = cls(
cfg['segmentation_col'], cfg['fit_filters'],
cfg['predict_filters'], default_model_expr,
YTRANSFORM_MAPPING[default_ytransform])
YTRANSFORM_MAPPING[default_ytransform], cfg['min_segment_size'],
cfg['name'])

if "models" not in cfg:
cfg["models"] = {}
Expand Down Expand Up @@ -703,8 +711,17 @@ def fit(self, data, debug=False):
unique = data[self.segmentation_col].unique()
value_counts = data[self.segmentation_col].value_counts()

# Remove any existing segments that may no longer have counterparts
# in the data. This can happen when loading a saved model and then
# calling this method with data that no longer has segments that
# were there the last time this was called.
gone = set(self._group.models) - set(unique)
for g in gone:
del self._group.models[g]

for x in unique:
if x not in self._group.models and value_counts[x] > self.min_segment_size:
if x not in self._group.models and \
value_counts[x] > self.min_segment_size:
self.add_segment(x)

return self._group.fit(data, debug=debug)
Expand Down Expand Up @@ -774,16 +791,20 @@ def to_dict(self):
"""
return {
'model_type': 'segmented_regression',
'name': self.name,
'segmentation_col': self.segmentation_col,
'fit_filters': self.fit_filters,
'predict_filters': self.predict_filters,
'min_segment_size': self.min_segment_size,
'default_config': {
'model_expression': self.default_model_expr,
'ytransform': YTRANSFORM_MAPPING[self.default_ytransform]
},
'fitted': self.fitted,
'models': {yamlio.to_scalar_safe(name): self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
'models': {
yamlio.to_scalar_safe(name):
self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
}

def to_yaml(self, str_or_buffer=None):
Expand Down
18 changes: 17 additions & 1 deletion urbansim/models/tests/test_lcm.py
Expand Up @@ -194,12 +194,13 @@ def test_mnl_lcm_segmented_yaml(grouped_choosers, alternatives):
sample_size = 4

group = lcm.SegmentedMNLLocationChoiceModel(
'group', sample_size, default_model_expr=model_exp)
'group', sample_size, default_model_expr=model_exp, name='test_seg')
group.add_segment('x')
group.add_segment('y', 'var3 + var1:var2')

expected_dict = {
'model_type': 'segmented_locationchoice',
'name': 'test_seg',
'segmentation_col': 'group',
'sample_size': sample_size,
'choosers_fit_filters': None,
Expand Down Expand Up @@ -255,3 +256,18 @@ def test_mnl_lcm_segmented_yaml(grouped_choosers, alternatives):

new_seg = lcm.SegmentedMNLLocationChoiceModel.from_yaml(group.to_yaml())
assert new_seg.fitted is True


def test_segmented_lcm_removes_old_models(grouped_choosers, alternatives):
model_exp = 'var2 + var1:var3'
sample_size = 4

group = lcm.SegmentedMNLLocationChoiceModel(
'group', sample_size, default_model_expr=model_exp)
group.add_segment('a')
group.add_segment('b')
group.add_segment('c')

group.fit(grouped_choosers, alternatives, 'thing_id')

assert sorted(group._group.models.keys()) == ['x', 'y']
17 changes: 16 additions & 1 deletion urbansim/models/tests/test_regression.py
Expand Up @@ -320,15 +320,18 @@ def test_SegmentedRegressionModel_explicit(groupby_df):
def test_SegmentedRegressionModel_yaml(groupby_df):
seg = regression.SegmentedRegressionModel(
'group', fit_filters=['col1 not in [2]'],
predict_filters=['group != "z"'], default_model_expr='col1 ~ col2')
predict_filters=['group != "z"'], default_model_expr='col1 ~ col2',
min_segment_size=5000, name='test_seg')
seg.add_segment('x')
seg.add_segment('y', 'np.exp(col2) ~ np.exp(col1)', np.log)

expected_dict = {
'model_type': 'segmented_regression',
'name': 'test_seg',
'segmentation_col': 'group',
'fit_filters': ['col1 not in [2]'],
'predict_filters': ['group != "z"'],
'min_segment_size': 5000,
'default_config': {
'model_expression': 'col1 ~ col2',
'ytransform': None
Expand Down Expand Up @@ -385,3 +388,15 @@ def test_SegmentedRegressionModel_yaml(groupby_df):

new_seg = regression.SegmentedRegressionModel.from_yaml(seg.to_yaml())
assert new_seg.fitted is True


def test_SegmentedRegressionModel_removes_gone_segments(groupby_df):
seg = regression.SegmentedRegressionModel(
'group', default_model_expr='col1 ~ col2')
seg.add_segment('a')
seg.add_segment('b')
seg.add_segment('c')

seg.fit(groupby_df)

assert sorted(seg._group.models.keys()) == ['x', 'y']
19 changes: 19 additions & 0 deletions urbansim/models/tests/test_transition.py
Expand Up @@ -301,3 +301,22 @@ def test_transition_model(basic_df, grow_targets_filters, totals_col, year):
assert added.isin(new.index).all()
assert not added.isin(basic_df.index).any()
npt.assert_array_equal(added.values, [basic_df.index.values.max() + 1])


def test_tabular_transition_add_and_remove():
data = pd.DataFrame(
{'a': ['x', 'x', 'y', 'y', 'y', 'y', 'y', 'y', 'z', 'z']})

totals = pd.DataFrame(
{'a': ['x', 'y', 'z'],
'total': [3, 1, 10]},
index=[2112, 2112, 2112])

tran = transition.TabularTotalsTransition(totals, 'total')
model = transition.TransitionModel(tran)

new, added, _ = model.transition(data, 2112)

assert len(new) == totals.total.sum()
assert added.is_unique is True
assert new.index.is_unique is True
10 changes: 10 additions & 0 deletions urbansim/models/tests/test_util.py
Expand Up @@ -71,6 +71,16 @@ def test_apply_filter_query_no_filter(test_df):
pdt.assert_frame_equal(filtered, expected)


def test_apply_filter_query_str(test_df):
filters = 'col1 < 3'
filtered = util.apply_filter_query(test_df, filters)
expected = pd.DataFrame(
{'col1': [0, 1, 2],
'col2': [5, 6, 7]},
index=['a', 'b', 'c'])
pdt.assert_frame_equal(filtered, expected)


@pytest.mark.parametrize('name, val, filter_exp', [
('x', 1, 'x == 1'),
('x', 'a', "x == 'a'"),
Expand Down
8 changes: 4 additions & 4 deletions urbansim/models/transition.py
Expand Up @@ -5,8 +5,6 @@
"""
from __future__ import division

import itertools

import numpy as np
import pandas as pd

Expand Down Expand Up @@ -264,7 +262,7 @@ def transition(self, data, year):
copied_indexes = []
removed_indexes = []

# since we're looping over descrete segments we need to track
# since we're looping over discrete segments we need to track
# out here where their new indexes will begin
starting_index = data.index.values.max() + 1

Expand All @@ -273,7 +271,9 @@ def transition(self, data, year):
nrows = self._calc_nrows(len(subset), row[self._config_column])
updated, added, copied, removed = \
add_or_remove_rows(subset, nrows, starting_index)
starting_index = starting_index + nrows + 1
if nrows > 0:
# only update the starting index if rows were added
starting_index = starting_index + nrows
segments.append(updated)
added_indexes.append(added)
copied_indexes.append(copied)
Expand Down
10 changes: 7 additions & 3 deletions urbansim/models/util.py
Expand Up @@ -18,9 +18,10 @@ def apply_filter_query(df, filters=None):
Parameters
----------
df : pandas.DataFrame
filters : list of str, optional
filters : list of str or str, optional
List of filters to apply. Will be joined together with
' and ' and passed to DataFrame.query.
' and ' and passed to DataFrame.query. A string will be passed
straight to DataFrame.query.
If not supplied no filtering will be done.
Returns
Expand All @@ -29,7 +30,10 @@ def apply_filter_query(df, filters=None):
"""
if filters:
query = ' and '.join(filters)
if isinstance(filters, str):
query = filters
else:
query = ' and '.join(filters)
return df.query(query)
else:
return df
Expand Down

0 comments on commit e2f58de

Please sign in to comment.