Skip to content

Commit

Permalink
Add YAML IO for SegmentedRegressionModel
Browse files Browse the repository at this point in the history
  • Loading branch information
jiffyclub committed May 28, 2014
1 parent f83f376 commit dcd1e24
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 4 deletions.
115 changes: 115 additions & 0 deletions urbansim/models/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,11 @@ def fit(self, data):
return {name: self.models[name].fit(df)
for name, df in self._iter_groups(data)}

@property
def fitted(self):
return (all(m.fitted for m in self.models.values())
if self.models else False)

def predict(self, data):
"""
Predict new data for each group in the segmentation.
Expand Down Expand Up @@ -573,6 +578,45 @@ def __init__(
self.default_model_expr = default_model_expr
self.default_ytransform = default_ytransform

@classmethod
def from_yaml(cls, yaml_str=None, str_or_buffer=None):
"""
Create a SegmentedRegressionModel instance from a saved YAML
configuration. Arguments are mutally exclusive.
Parameters
----------
yaml_str : str, optional
A YAML string from which to load model.
str_or_buffer : str or file like, optional
File name or buffer from which to load YAML.
Returns
-------
SegmentedRegressionModel
"""
cfg = yamlio.yaml_to_dict(yaml_str, str_or_buffer)

default_model_expr = cfg['default_config']['model_expression']
default_ytransform = cfg['default_config']['ytransform']

seg = cls(
cfg['segmentation_col'], cfg['fit_filters'],
default_model_expr, YTRANSFORM_MAPPING[default_ytransform])

for name, m in cfg['models'].items():
m['model_expression'] = m.get(
'model_expression', default_model_expr)
m['ytransform'] = YTRANSFORM_MAPPING[
m.get('ytransform', default_ytransform)]
m['fit_filters'] = None
m['predict_filters'] = None
reg = RegressionModel.from_yaml(yamlio.convert_to_yaml(m, None))
seg._group.add_model(reg)

return seg

def add_segment(self, name, model_expression=None, ytransform='default'):
"""
Add a new segment with its own model expression and ytransform.
Expand Down Expand Up @@ -635,6 +679,10 @@ def fit(self, data):

return self._group.fit(data)

@property
def fitted(self):
return self._group.fitted

def predict(self, data):
"""
Predict new data for each group in the segmentation.
Expand All @@ -654,3 +702,70 @@ def predict(self, data):
"""
data = util.apply_filter_query(data, self.predict_filters)
return self._group.predict(data)

def _process_model_dict(self, d):
"""
Remove redundant items from a model's configuration dict.
Parameters
----------
d : dict
Modified in place.
Returns
-------
dict
Modified `d`.
"""
del d['model_type']
del d['fit_filters']
del d['predict_filters']

if d['model_expression'] == self.default_model_expr:
del d['model_expression']

if d['ytransform'] == self.default_ytransform:
del d['ytransform']

return d

def to_dict(self):
"""
Returns a dict representation of this instance suitable for
conversion to YAML.
"""
return {
'model_type': 'segmented_regression',
'segmentation_col': self.segmentation_col,
'fit_filters': self.fit_filters,
'predict_filters': self.predict_filters,
'default_config': {
'model_expression': self.default_model_expr,
'ytransform': YTRANSFORM_MAPPING[self.default_ytransform]
},
'fitted': self.fitted,
'models': {name: self._process_model_dict(m.to_dict())
for name, m in self._group.models.items()}
}

def to_yaml(self, str_or_buffer=None):
"""
Save a model respresentation to YAML.
Parameters
----------
str_or_buffer : str or file like, optional
By default a YAML string is returned. If a string is
given here the YAML will be written to that file.
If an object with a ``.write`` method is given the
YAML will be written to that object.
Returns
-------
j : str
YAML string if `str_or_buffer` is not given.
"""
return yamlio.convert_to_yaml(self.to_dict(), str_or_buffer)
81 changes: 77 additions & 4 deletions urbansim/models/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def test_RegressionModelGroup(groupby_df):
assert isinstance(hmg.models['y'], regression.RegressionModel)
assert hmg.models['y'].name == 'y'

assert hmg.fitted is False
fits = hmg.fit(groupby_df)
assert hmg.fitted is True
assert isinstance(fits['x'], RegressionResultsWrapper)
assert isinstance(fits['y'], RegressionResultsWrapper)

Expand Down Expand Up @@ -281,7 +283,9 @@ def test_SegmentedRegressionModel_raises(groupby_df):
def test_SegmentedRegressionModel(groupby_df):
seg = regression.SegmentedRegressionModel(
'group', default_model_expr='col1 ~ col2')
assert seg.fitted is False
fits = seg.fit(groupby_df)
assert seg.fitted is True

assert 'x' in fits and 'y' in fits
assert isinstance(fits['x'], RegressionResultsWrapper)
Expand All @@ -293,16 +297,85 @@ def test_SegmentedRegressionModel(groupby_df):


def test_SegmentedRegressionModel_explicit(groupby_df):
seg = regression.SegmentedRegressionModel('group')
seg = regression.SegmentedRegressionModel(
'group', fit_filters=['col1 not in [2]'],
predict_filters=['group != "z"'])
seg.add_segment('x', 'col1 ~ col2')
seg.add_segment('y', 'col2 ~ col1')
seg.add_segment('y', 'np.exp(col2) ~ np.exp(col1)', np.log)
fits = seg.fit(groupby_df)

assert 'x' in fits and 'y' in fits
assert isinstance(fits['x'], RegressionResultsWrapper)

test_data = pd.DataFrame(
{'group': ['x', 'y'], 'col1': [-5, 100], 'col2': [0.5, 10.5]})
{'group': ['x', 'z', 'y'],
'col1': [-5, 42, 100],
'col2': [0.5, 42, 10.5]})
predicted = seg.predict(test_data)

pdt.assert_series_equal(predicted.sort_index(), pd.Series([-4.5, 105]))
pdt.assert_series_equal(
predicted.sort_index(), pd.Series([-4.5, 105], index=[0, 2]))


def test_SegmentedRegressionModel_yaml(groupby_df):
seg = regression.SegmentedRegressionModel(
'group', fit_filters=['col1 not in [2]'],
predict_filters=['group != "z"'], default_model_expr='col1 ~ col2')
seg.add_segment('x')
seg.add_segment('y', 'np.exp(col2) ~ np.exp(col1)', np.log)

expected_dict = {
'model_type': 'segmented_regression',
'segmentation_col': 'group',
'fit_filters': ['col1 not in [2]'],
'predict_filters': ['group != "z"'],
'default_config': {
'model_expression': 'col1 ~ col2',
'ytransform': None
},
'fitted': False,
'models': {
'x': {
'name': 'x',
'fitted': False,
'fit_parameters': None,
'fit_rsquared': None,
'fit_rsquared_adj': None
},
'y': {
'name': 'y',
'model_expression': 'np.exp(col2) ~ np.exp(col1)',
'ytransform': 'np.log',
'fitted': False,
'fit_parameters': None,
'fit_rsquared': None,
'fit_rsquared_adj': None
}
}
}

assert yaml.load(seg.to_yaml()) == expected_dict

seg.fit(groupby_df)

expected_dict['fitted'] = True
expected_dict['models']['x']['fitted'] = True
expected_dict['models']['y']['fitted'] = True
del expected_dict['models']['x']['fit_parameters']
del expected_dict['models']['x']['fit_rsquared']
del expected_dict['models']['x']['fit_rsquared_adj']
del expected_dict['models']['y']['fit_parameters']
del expected_dict['models']['y']['fit_rsquared']
del expected_dict['models']['y']['fit_rsquared_adj']

actual_dict = yaml.load(seg.to_yaml())
assert isinstance(actual_dict['models']['x'].pop('fit_parameters'), dict)
assert isinstance(actual_dict['models']['x'].pop('fit_rsquared'), float)
assert isinstance(
actual_dict['models']['x'].pop('fit_rsquared_adj'), float)
assert isinstance(actual_dict['models']['y'].pop('fit_parameters'), dict)
assert isinstance(actual_dict['models']['y'].pop('fit_rsquared'), float)
assert isinstance(
actual_dict['models']['y'].pop('fit_rsquared_adj'), float)

assert actual_dict == expected_dict

0 comments on commit dcd1e24

Please sign in to comment.