Skip to content

Commit

Permalink
Adding new HedonicModel class and urbansim.models module
Browse files Browse the repository at this point in the history
Going to start representing individual model types with configurable
classes.
  • Loading branch information
jiffyclub committed Apr 9, 2014
1 parent 29c9fc8 commit cebe3ce
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 0 deletions.
1 change: 1 addition & 0 deletions urbansim/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .hedonic import HedonicModel
159 changes: 159 additions & 0 deletions urbansim/models/hedonic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf


def apply_filter_query(df, filters):
"""
Use the DataFrame.query method to filter a table down to the
desired rows.
Parameters
----------
df : pandas.DataFrame
filters : list of str
List of filters to apply. Will be joined together with
' and ' and passed to DataFrame.query.
Returns
-------
filtered_df : pandas.DataFrame
"""
query = ' and '.join(filters)
return df.query(query)


def fit_model(df, filters, model_expression):
"""
Use statsmodels to construct a model relation.
Parameters
----------
df : pandas.DataFrame
Data to use for fit. Should contain all the columns
referenced in the `model_expression`.
filters : list of str
Any filters to apply before doing the model fit.
model_expression : str
A patsy model expression that can be used with statsmodels.
Should contain both the left- and right-hand sides.
Returns
-------
fit : statsmodels.regression.linear_model.OLSResults
"""
df = apply_filter_query(df, filters)
model = smf.ols(formula=model_expression, data=df)
return model.fit()


def predict(df, filters, model_fit, ytransform=None):
"""
Apply model to new data to predict new dependent values.
Parameters
----------
df : pandas.DataFrame
filters : list of str
Any filters to apply before doing prediction.
model_fit : statsmodels.regression.linear_model.OLSResults
Result of model estimation.
ytransform : callable, optional
A function to call on the array of predicted output.
For example, if the model relation is predicting the log
of price, you might pass ``ytransform=np.exp`` so that
the results reflect actual price.
By default no transformation is applied.
Returns
-------
result : pandas.Series
Predicted values as a pandas Series. Will have the index of `df`
after applying filters.
"""
df = apply_filter_query(df, filters)
sim_data = model_fit.predict(df)
if ytransform:
sim_data = ytransform(sim_data)
return pd.Series(sim_data, index=df.index)


class HedonicModel(object):
"""
A hedonic (regression) model with the ability to store an
estimated model and predict new data based on the model.
Parameters
----------
fit_filters : list of str
Filters applied before fitting the model.
predict_filters : list of str
Filters applied before calculating new data points.
model_expression : str
A patsy model expression that can be used with statsmodels.
Should contain both the left- and right-hand sides.
ytransform : callable, optional
A function to call on the array of predicted output.
For example, if the model relation is predicting the log
of price, you might pass ``ytransform=np.exp`` so that
the results reflect actual price.
By default no transformation is applied.
name : str, optional
Optional descriptive name for this model that may be used
in output.
"""
def __init__(self, fit_filters, predict_filters, model_expression,
ytransform=None, name=None):
self.fit_filters = fit_filters
self.predict_filters = predict_filters
self.model_expression = model_expression
self.ytransform = ytransform
self.name = name or 'HedonicModel'
self.model_fit = None

def fit_model(self, data):
"""
Fit the model to data and store/return the results.
Parameters
----------
data : pandas.DataFrame
Data to use for fitting the model. Must contain all the
columns referenced by the `model_expression`.
Returns
-------
fit : statsmodels.regression.linear_model.OLSResults
This is returned for inspection, but also stored on the
class instance for use during prediction.
"""
fit = fit_model(data, self.fit_filters, self.model_expression)
self.model_fit = fit
return fit

def predict(self, data):
"""
Predict a new data set based on an estimated model.
Parameters
----------
data : pandas.DataFrame
Data to use for prediction. Must contain all the columns
referenced by the right-hand side of the `model_expression`.
Returns
-------
result : pandas.Series
Predicted values as a pandas Series. Will have the index of `data`
after applying filters.
"""
return predict(
data, self.predict_filters, self.model_fit, self.ytransform)

0 comments on commit cebe3ce

Please sign in to comment.