Skip to content

Commit

Permalink
tests of mnl_simulate compared to R's mlogit
Browse files Browse the repository at this point in the history
  • Loading branch information
jiffyclub committed Nov 22, 2014
1 parent e4ed2a4 commit 6a25b9a
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 12 deletions.
9 changes: 9 additions & 0 deletions urbansim/urbanchoice/tests/data/fish_choosers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"","mode","income","alt","price","catch","chid"
"265.beach",FALSE,2083.3332,"beach",5.922,0.0678,265
"265.boat",TRUE,2083.3332,"boat",5.922,0.0276,265
"265.charter",FALSE,2083.3332,"charter",30.922,0.0029,265
"265.pier",FALSE,2083.3332,"pier",5.922,0.0503,265
"1058.beach",FALSE,3749.9999,"beach",76.176,0.2537,1058
"1058.boat",FALSE,3749.9999,"boat",12.696,0.0531,1058
"1058.charter",TRUE,3749.9999,"charter",37.696,0.0052,1058
"1058.pier",FALSE,3749.9999,"pier",76.176,0.1498,1058
10 changes: 10 additions & 0 deletions urbansim/urbanchoice/tests/data/mnl_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@ data("Fishing", package = "mlogit")
Fish = mlogit.data(Fishing, varying = c(2:9), shape = "wide", choice = "mode")
write.csv(Fish, file='fish.csv')

fish_choosers = Fish[Fish$chid == 265 | Fish$chid == 1058,]
write.csv(fish_choosers, file='fish_choosers.csv')

print('******************')
print('******************')

mnl = mlogit(mode ~ price + catch - 1, data=Fish)
summary(mnl)
print(mnl$coefficients)
print(predict(mnl, newdata=fish_choosers))

print('******************')
print('******************')

mnl = mlogit(mode ~ price:income + catch:income + catch * price - 1, data=Fish)
summary(mnl)
print(mnl$coefficients)
print(predict(mnl, newdata=fish_choosers))

print('******************')
print('******************')
Expand All @@ -25,16 +30,21 @@ data('TravelMode', package='AER')
TravelMode = mlogit.data(TravelMode, shape='long', choice='choice', varying=c(3:7), alt.var='mode')
write.csv(TravelMode, file='travel_mode.csv')

travel_choosers = TravelMode[TravelMode$individual == 107 | TravelMode$individual == 182,]
write.csv(travel_choosers, file='travel_choosers.csv')

print('******************')
print('******************')

mnl = mlogit(choice ~ wait + travel + vcost - 1, data=TravelMode)
summary(mnl)
print(mnl$coefficients)
print(predict(mnl, newdata=travel_choosers))

print('******************')
print('******************')

mnl = mlogit(choice ~ wait + travel + income:vcost + income:gcost - 1, data=TravelMode)
summary(mnl)
print(mnl$coefficients)
print(predict(mnl, newdata=travel_choosers))
9 changes: 9 additions & 0 deletions urbansim/urbanchoice/tests/data/travel_choosers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"","individual","mode","choice","wait","vcost","travel","gcost","income","size"
"107.air","107","air",FALSE,69,108,180,128,35,1
"107.train","107","train",FALSE,34,89,901,187,35,1
"107.bus","107","bus",FALSE,35,44,891,141,35,1
"107.car","107","car",TRUE,0,34,720,112,35,1
"182.air","182","air",FALSE,69,59,121,72,26,1
"182.train","182","train",FALSE,34,31,386,73,26,1
"182.bus","182","bus",FALSE,35,25,431,72,26,1
"182.car","182","car",TRUE,0,14,270,43,26,1
75 changes: 63 additions & 12 deletions urbansim/urbanchoice/tests/test_mnl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import division

import os.path

import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
Expand All @@ -15,42 +18,71 @@ def num_alts():

@pytest.fixture(scope='module', params=[
('fish.csv',
'fish_choosers.csv',
'price + catch - 1',
'mode',
pd.Series([-0.02047652, 0.95309824], index=['price', 'catch'])),
pd.Series([-0.02047652, 0.95309824], index=['price', 'catch']),
pd.DataFrame([
[0.2849598, 0.2742482, 0.1605457, 0.2802463],
[0.1498991, 0.4542377, 0.2600969, 0.1357664]],
columns=['beach', 'boat', 'charter', 'pier'])),
('fish.csv',
'fish_choosers.csv',
'price:income + catch:income + catch * price - 1',
'mode',
pd.Series([
9.839876e-01, -2.659466e-02, 6.933946e-07, -1.324231e-04,
7.646750e-03],
index=[
'catch', 'price', 'price:income', 'catch:income', 'catch:price'])),
'catch', 'price', 'price:income', 'catch:income', 'catch:price']),
pd.DataFrame([
[0.2885868, 0.2799776, 0.1466286, 0.2848070],
[0.1346205, 0.4855238, 0.2593983, 0.1204575]],
columns=['beach', 'boat', 'charter', 'pier'])),
('travel_mode.csv',
'travel_choosers.csv',
'wait + travel + vcost - 1',
'choice',
pd.Series([
-0.033976668, -0.002192951, 0.008890669],
index=['wait', 'travel', 'vcost'])),
index=['wait', 'travel', 'vcost']),
pd.DataFrame([
[0.2776876, 0.1584818, 0.1049530, 0.4588777],
[0.1154490, 0.1653297, 0.1372684, 0.5819528]],
columns=['air', 'train', 'bus', 'car'])),
('travel_mode.csv',
'travel_choosers.csv',
'wait + travel + income:vcost + income:gcost - 1',
'choice',
pd.Series([
-3.307586e-02, -2.518762e-03, 1.601746e-04, 3.745822e-05],
index=['wait', 'travel', 'income:vcost', 'income:gcost']))])
index=['wait', 'travel', 'income:vcost', 'income:gcost']),
pd.DataFrame([
[0.2862046, 0.1439074, 0.1044490, 0.4654390],
[0.1098313, 0.1597317, 0.1344395, 0.5959975]],
columns=['air', 'train', 'bus', 'car']))])
def test_data(request):
file, form, col, expected = request.param
data, choosers, form, col, est_expected, sim_expected = request.param
return {
'file': file,
'data': data,
'choosers': choosers,
'formula': form,
'column': col,
'expected': expected
'est_expected': est_expected,
'sim_expected': sim_expected
}


@pytest.fixture
def df(test_data):
filen = os.path.join(os.path.dirname(__file__), 'data', test_data['file'])
filen = os.path.join(os.path.dirname(__file__), 'data', test_data['data'])
return pd.read_csv(filen)


@pytest.fixture
def choosers(test_data):
filen = os.path.join(
os.path.dirname(__file__), 'data', test_data['choosers'])
return pd.read_csv(filen)


Expand All @@ -65,6 +97,12 @@ def dm(df, test_data):
return dmatrix(test_data['formula'], data=df, return_type='dataframe')


@pytest.fixture
def choosers_dm(choosers, test_data):
return dmatrix(
test_data['formula'], data=choosers, return_type='dataframe')


@pytest.fixture
def fit_coeffs(dm, chosen, num_alts):
log_like, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts)
Expand All @@ -74,11 +112,24 @@ def fit_coeffs(dm, chosen, num_alts):
def test_mnl_estimate(dm, chosen, num_alts, test_data):
log_like, fit = mnl.mnl_estimate(dm.as_matrix(), chosen, num_alts)
result = pd.Series(fit.Coefficient.values, index=dm.columns)
result, expected = result.align(test_data['expected'])
result, expected = result.align(test_data['est_expected'])
npt.assert_allclose(result.values, expected.values, rtol=1e-4)


def test_mnl_simulate(dm, fit_coeffs, num_alts):
def test_mnl_simulate(dm, fit_coeffs, num_alts, test_data, choosers_dm):
# check that if all the alternatives have the same numbers
# we get an even probability distribution
data = np.array(
[[10 ** (x + 1) for x in range(len(dm.columns))]] * num_alts)

probs = mnl.mnl_simulate(
data, fit_coeffs, num_alts, returnprobs=True)

npt.assert_allclose(probs, [[1 / num_alts] * num_alts])

# now test with real data
probs = mnl.mnl_simulate(
dm.as_matrix(), fit_coeffs, num_alts, returnprobs=True)
# pytest.set_trace()
choosers_dm.as_matrix(), fit_coeffs, num_alts, returnprobs=True)
results = pd.DataFrame(probs, columns=test_data['sim_expected'].columns)
results, expected = results.align(test_data['sim_expected'])
npt.assert_allclose(results.as_matrix(), expected.as_matrix(), rtol=1e-4)

0 comments on commit 6a25b9a

Please sign in to comment.