save debug info for regression

UDST · Jun 7, 2014 · 48e61c8 · 48e61c8
1 parent ec9f23e
commit 48e61c8
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 6 deletions.
diff --git a/urbansim/models/regression.py b/urbansim/models/regression.py
@@ -235,6 +235,7 @@ def __init__(self, fit_filters, predict_filters, model_expression,
         self.name = name or 'RegressionModel'
         self.model_fit = None
         self.fit_parameters = None
+        self.est_data = None
 
     @classmethod
     def from_yaml(cls, yaml_str=None, str_or_buffer=None):
@@ -285,7 +286,7 @@ def str_model_expression(self):
         return util.str_model_expression(
             self.model_expression, add_constant=True)
 
-    def fit(self, data):
+    def fit(self, data, debug=False):
         """
         Fit the model to data and store/return the results.
 
@@ -294,6 +295,10 @@ def fit(self, data):
         data : pandas.DataFrame
             Data to use for fitting the model. Must contain all the
             columns referenced by the `model_expression`.
+        debug : bool
+            If debug is set to true, this sets the attribute "est_data"
+            to a dataframe with the actual data used for estimation of
+            this model.
 
         Returns
         -------
@@ -305,6 +310,12 @@ class instance for use during prediction.
         fit = fit_model(data, self.fit_filters, self.str_model_expression)
         self.model_fit = fit
         self.fit_parameters = _model_fit_to_table(fit)
+        if debug:
+            df = pd.DataFrame(fit.model.exog, columns=fit.model.exog_names, index=data.index)
+            df[fit.model.endog_names] = fit.model.endog
+            df["fittedvalues"] = fit.fittedvalues
+            df["residuals"] = fit.resid
+            self.est_data = df
         return fit
 
     @property
@@ -496,22 +507,25 @@ def _iter_groups(self, data):
         for name in self.models:
             yield name, groups.get_group(name)
 
-    def fit(self, data):
+    def fit(self, data, debug=False):
         """
         Fit each of the models in the group.
 
         Parameters
         ----------
         data : pandas.DataFrame
             Must have a column with the same name as `segmentation_col`.
+        debug : bool
+            If set to true (default false) will pass the debug parameter
+            to model estimation.
 
         Returns
         -------
         fits : dict of statsmodels.regression.linear_model.OLSResults
             Keys are the segment names.
 
         """
-        return {name: self.models[name].fit(df)
+        return {name: self.models[name].fit(df, debug=debug)
                 for name, df in self._iter_groups(data)}
 
     @property
@@ -661,7 +675,7 @@ def add_segment(self, name, model_expression=None, ytransform='default'):
         self._group.add_model_from_params(
             name, None, None, model_expression, ytransform)
 
-    def fit(self, data):
+    def fit(self, data, debug=False):
         """
         Fit each segment. Segments that have not already been explicitly
         added will be automatically added with default model and ytransform.
@@ -670,6 +684,8 @@ def fit(self, data):
         ----------
         data : pandas.DataFrame
             Must have a column with the same name as `segmentation_col`.
+        debug : bool
+            If set to true will pass debug to the fit method of each model.
 
         Returns
         -------
@@ -686,7 +702,7 @@ def fit(self, data):
             if x not in self._group.models and value_counts[x] > self.min_segment_size:
                 self.add_segment(x)
 
-        return self._group.fit(data)
+        return self._group.fit(data, debug=debug)
 
     @property
     def fitted(self):

diff --git a/urbansim/models/yamlmodelrunner.py b/urbansim/models/yamlmodelrunner.py
@@ -22,13 +22,17 @@ def hedonic_estimate(df, cfgname):
     if model_type == "regression":
         hm = RegressionModel.from_yaml(str_or_buffer=cfg)
         print hm.fit(df).summary()
+        est_data = hm.est_data
     if model_type == "segmented_regression":
         hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
-        for k, v in hm.fit(df).items():
+        hm.min_segment_size = 10
+        for k, v in hm.fit(df, debug=True).items():
             print "REGRESSION RESULTS FOR SEGMENT %s\n" % str(k)
             print v.summary()
             print
+        est_data = {name: hm._group.models[name].est_data for name in hm._group.models}
     hm.to_yaml(str_or_buffer=cfg)
+    return est_data
 
 
 def hedonic_simulate(df, cfgname, outdf, outfname):
@@ -51,6 +55,7 @@ def hedonic_simulate(df, cfgname, outdf, outfname):
         hm = RegressionModel.from_yaml(str_or_buffer=cfg)
     if model_type == "segmented_regression":
         hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg)
+        hm.min_segment_size = 10
     price_or_rent = hm.predict(df)
     print price_or_rent.describe()
     outdf.loc[price_or_rent.index.values, outfname] = price_or_rent