Skip to content

Commit

Permalink
Merge 7f72490 into 29c9fc8
Browse files Browse the repository at this point in the history
  • Loading branch information
jiffyclub committed May 1, 2014
2 parents 29c9fc8 + 7f72490 commit 42c46b5
Show file tree
Hide file tree
Showing 17 changed files with 1,408 additions and 6 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
'numpy>=1.8.0',
'pandas>=0.13.1',
'patsy>=0.2.1',
'prettytable>=0.7.2',
'pyyaml>=3.10',
'scipy>=0.13.3',
'shapely>=1.3.0',
Expand Down
2 changes: 2 additions & 0 deletions urbansim/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class ModelEvaluationError(Exception):
pass
2 changes: 2 additions & 0 deletions urbansim/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .regression import RegressionModel, RegressionModelGroup
from .lcm import MNLLocationChoiceModel
238 changes: 238 additions & 0 deletions urbansim/models/lcm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from __future__ import print_function, division

import numpy as np
import pandas as pd
from patsy import dmatrix
from prettytable import PrettyTable

from . import util
from ..urbanchoice import interaction, mnl


def unit_choice(chooser_ids, alternative_ids, probabilities):
"""
Have a set of choosers choose from among alternatives according
to a probability distribution. Choice is binary: each
alternative can only be chosen once.
Parameters
----------
chooser_ids : 1d array_like
Array of IDs of the agents that are making choices.
alternative_ids : 1d array_like
Array of IDs of alternatives among which agents are making choices.
probabilities : 1d array_like
The probability that an agent will choose an alternative.
Must be the same shape as `alternative_ids`. Unavailable
alternatives should have a probability of 0.
Returns
-------
choices : pandas.Series
Mapping of chooser ID to alternative ID. Some choosers
will map to a nan value when there are not enough alternatives
for all the choosers.
"""
chooser_ids = np.asanyarray(chooser_ids)
alternative_ids = np.asanyarray(alternative_ids)
probabilities = np.asanyarray(probabilities)

choices = pd.Series([np.nan] * len(chooser_ids), index=chooser_ids)

if probabilities.sum() == 0:
# return all nan if there are no available units
return choices

# probabilities need to sum to 1 for np.random.choice
probabilities = probabilities / probabilities.sum()

# need to see if there are as many available alternatives as choosers
n_available = np.count_nonzero(probabilities)
n_choosers = len(chooser_ids)
n_to_choose = n_choosers if n_choosers < n_available else n_available

chosen = np.random.choice(
alternative_ids, size=n_to_choose, replace=False, p=probabilities)

# if there are fewer available units than choosers we need to pick
# which choosers get a unit
if n_to_choose == n_available:
chooser_ids = np.random.choice(
chooser_ids, size=n_to_choose, replace=False)

choices[chooser_ids] = chosen

return choices


class MNLLocationChoiceModel(object):
"""
A location choice model with the ability to store an estimated
model and predict new data based on the model.
Based on multinomial logit.
Parameters
----------
alts_fit_filters : list of str
Filters applied to the alternatives table before fitting the model.
alts_predict_filters : list of str
Filters applied to the alternatives table before calculating
new data points.
model_expression : str
A patsy model expression. Should contain only a right-hand side.
sample_size : int
Number of choices to sample for estimating the model.
choice_column : optional
Name of the column in the `alternatives` table that choosers
should choose. e.g. the 'building_id' column. If not provided
the alternatives index is used.
name : optional
Optional descriptive name for this model that may be used
in output.
"""
def __init__(self, alts_fit_filters, alts_predict_filters,
model_expression, sample_size, choice_column=None, name=None):
self.alts_fit_filters = alts_fit_filters
self.alts_predict_filters = alts_predict_filters
# LCMs never have a constant
self.model_expression = model_expression + ' - 1'
self.sample_size = sample_size
self.choice_column = choice_column
self.name = name or 'MNLLocationChoiceModel'

self._log_lks = None
self._model_columns = None
self.fit_results = None

def fit(self, choosers, alternatives, current_choice):
"""
Fit and save model parameters based on given data.
Parameters
----------
choosers : pandas.DataFrame
Table describing the agents making choices, e.g. households.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing,
e.g. buildings.
current_choice : pandas.Series
A Series describing the `alternatives` currently chosen
by the `choosers`. Should have an index matching `choosers`
and values matching the index of `alternatives`.
Returns
-------
null_ll : float
Null Log-liklihood
conv_ll : float
Log-liklihood at convergence
ll_ratio : float
Log-liklihood ratio
"""
alternatives = util.apply_filter_query(
alternatives, self.alts_fit_filters)
_, merged, chosen = interaction.mnl_interaction_dataset(
choosers, alternatives, self.sample_size, current_choice)
model_design = dmatrix(
self.model_expression, data=merged, return_type='dataframe')
self._model_columns = model_design.columns # used for report
fit, results = mnl.mnl_estimate(
model_design.as_matrix(), chosen, self.sample_size)
self._log_lks = fit
self.fit_results = results
return fit

@property
def fitted(self):
"""
True if model is ready for prediction.
"""
return bool(self.fit_results)

def assert_fitted(self):
"""
Raises `RuntimeError` if the model is not ready for prediction.
"""
if not self.fitted:
raise RuntimeError('Model has not been fit.')

@property
def coefficients(self):
"""
Model coefficients as a list.
"""
self.assert_fitted()
return [x[0] for x in self.fit_results]

def report_fit(self):
"""
Print a report of the fit results.
"""
if not self.fitted:
print('Model not yet fit.')
return

print('Null Log-liklihood: {}'.format(self._log_lks[0]))
print('Log-liklihood at convergence: {}'.format(self._log_lks[1]))
print('Log-liklihood Ratio: {}\n'.format(self._log_lks[2]))

tbl = PrettyTable(
['Component', 'Coefficient', 'Std. Error', 'T-Score'])
tbl.align['Component'] = 'l'
for c, x in zip(self._model_columns, self.fit_results):
tbl.add_row((c,) + x)

print(tbl)

def predict(self, choosers, alternatives):
"""
Choose from among alternatives for a group of agents.
Parameters
----------
choosers : pandas.DataFrame
Table describing the agents making choices, e.g. households.
Only the first item in this table is used for determining
agent probabilities of choosing alternatives.
alternatives : pandas.DataFrame
Table describing the things from which agents are choosing.
Returns
-------
choices : pandas.Series
Mapping of chooser ID to alternative ID. Some choosers
will map to a nan value when there are not enough alternatives
for all the choosers.
"""
self.assert_fitted()

alternatives = util.apply_filter_query(
alternatives, self.alts_predict_filters)

# TODO: only using 1st item in choosers for determining probabilities.
# Need to expand options around this.
_, merged, chosen = interaction.mnl_interaction_dataset(
choosers.head(1), alternatives, self.sample_size)
model_design = dmatrix(
self.model_expression, data=merged, return_type='dataframe')

# probabilities are returned from mnl_simulate as a 2d array
# and need to be flatted for use in unit_choice.
probabilities = mnl.mnl_simulate(
model_design.as_matrix(), self.coefficients,
numalts=self.sample_size, returnprobs=True).flatten()

# figure out exactly which things from which choices are drawn
alt_choices = (
merged[self.choice_column] if self.choice_column else merged.index)

return unit_choice(
choosers.index, alt_choices, probabilities)
Loading

0 comments on commit 42c46b5

Please sign in to comment.