Skip to content

Commit

Permalink
save debug info for regression
Browse files Browse the repository at this point in the history
  • Loading branch information
fscottfoti committed Jun 7, 2014
1 parent ec9f23e commit 48e61c8
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
26 changes: 21 additions & 5 deletions urbansim/models/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def __init__(self, fit_filters, predict_filters, model_expression,
self.name = name or 'RegressionModel'
self.model_fit = None
self.fit_parameters = None
self.est_data = None

@classmethod
def from_yaml(cls, yaml_str=None, str_or_buffer=None):
Expand Down Expand Up @@ -285,7 +286,7 @@ def str_model_expression(self):
return util.str_model_expression(
self.model_expression, add_constant=True)

def fit(self, data):
def fit(self, data, debug=False):
"""
Fit the model to data and store/return the results.
Expand All @@ -294,6 +295,10 @@ def fit(self, data):
data : pandas.DataFrame
Data to use for fitting the model. Must contain all the
columns referenced by the `model_expression`.
debug : bool
If debug is set to true, this sets the attribute "est_data"
to a dataframe with the actual data used for estimation of
this model.
Returns
-------
Expand All @@ -305,6 +310,12 @@ class instance for use during prediction.
fit = fit_model(data, self.fit_filters, self.str_model_expression)
self.model_fit = fit
self.fit_parameters = _model_fit_to_table(fit)
if debug:
df = pd.DataFrame(fit.model.exog, columns=fit.model.exog_names, index=data.index)
df[fit.model.endog_names] = fit.model.endog
df["fittedvalues"] = fit.fittedvalues
df["residuals"] = fit.resid
self.est_data = df
return fit

@property
Expand Down Expand Up @@ -496,22 +507,25 @@ def _iter_groups(self, data):
for name in self.models:
yield name, groups.get_group(name)

def fit(self, data):
def fit(self, data, debug=False):
"""
Fit each of the models in the group.
Parameters
----------
data : pandas.DataFrame
Must have a column with the same name as `segmentation_col`.
debug : bool
If set to true (default false) will pass the debug parameter
to model estimation.
Returns
-------
fits : dict of statsmodels.regression.linear_model.OLSResults
Keys are the segment names.
"""
return {name: self.models[name].fit(df)
return {name: self.models[name].fit(df, debug=debug)
for name, df in self._iter_groups(data)}

@property
Expand Down Expand Up @@ -661,7 +675,7 @@ def add_segment(self, name, model_expression=None, ytransform='default'):
self._group.add_model_from_params(
name, None, None, model_expression, ytransform)

def fit(self, data):
def fit(self, data, debug=False):
"""
Fit each segment. Segments that have not already been explicitly
added will be automatically added with default model and ytransform.
Expand All @@ -670,6 +684,8 @@ def fit(self, data):
----------
data : pandas.DataFrame
Must have a column with the same name as `segmentation_col`.
debug : bool
If set to true will pass debug to the fit method of each model.
Returns
-------
Expand All @@ -686,7 +702,7 @@ def fit(self, data):
if x not in self._group.models and value_counts[x] > self.min_segment_size:
self.add_segment(x)

return self._group.fit(data)
return self._group.fit(data, debug=debug)

@property
def fitted(self):
Expand Down
7 changes: 6 additions & 1 deletion urbansim/models/yamlmodelrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@ def hedonic_estimate(df, cfgname):
if model_type == "regression":
hm = RegressionModel.from_yaml(str_or_buffer=cfg)
print hm.fit(df).summary()
est_data = hm.est_data
if model_type == "segmented_regression":
hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
for k, v in hm.fit(df).items():
hm.min_segment_size = 10
for k, v in hm.fit(df, debug=True).items():
print "REGRESSION RESULTS FOR SEGMENT %s\n" % str(k)
print v.summary()
print
est_data = {name: hm._group.models[name].est_data for name in hm._group.models}
hm.to_yaml(str_or_buffer=cfg)
return est_data


def hedonic_simulate(df, cfgname, outdf, outfname):
Expand All @@ -51,6 +55,7 @@ def hedonic_simulate(df, cfgname, outdf, outfname):
hm = RegressionModel.from_yaml(str_or_buffer=cfg)
if model_type == "segmented_regression":
hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
hm.min_segment_size = 10
price_or_rent = hm.predict(df)
print price_or_rent.describe()
outdf.loc[price_or_rent.index.values, outfname] = price_or_rent
Expand Down

0 comments on commit 48e61c8

Please sign in to comment.