# Experimental

This notebook demostrates the experimental next-generation Larch model,
based on xarray data formats and the new numba choice model back end.

In [1]:
import larch.numba as lx
from larch import P, X

/Users/jeffnewman/LocalGit/sharrow_pro/sharrow_pro/announce.py:4: ExtremelyDangerousMission: 

  good_news_everyone( ### sharrow advanced features are available ### )

### larch.numba is experimental, and not feature-complete ###
 the first time you import on a new system, this package will
 compile optimized binaries for your machine, which may take 
 a little while, please be patient 

OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [2]:
pool = lx.examples.EXAMPVILLE('datapool', cache_dir="_sharrow_cache_")

In [3]:
pool

<larch.dataset.DataPool>
 subspaces: hh, pp, od, do

In [4]:
# For clarity, we can define numbers as names for modes
DA = 1
SR = 2
Walk = 3
Bike = 4
Transit = 5

In [5]:
m = lx.Model(datapool=pool)
m.title = "Exampville Work Tour Mode Choice v1"

In [6]:
m.utility_co[DA] = (
        + P.InVehTime * X("od.AUTO_TIME + do.AUTO_TIME")
        + P.Cost * X("od.AUTO_COST + do.AUTO_COST") # dollars per mile
)

m.utility_co[SR] = (
        + P.ASC_SR
        + P.InVehTime * X("od.AUTO_TIME + do.AUTO_TIME")
        + P.Cost * X("od.AUTO_COST + do.AUTO_COST") * 0.5 # dollars per mile, half share
        + P("LogIncome:SR") * X("log(INCOME)")
)

m.utility_co[Walk] = (
        + P.ASC_Walk
        + P.NonMotorTime * X("od.WALK_TIME + do.WALK_TIME")
        + P("LogIncome:Walk") * X("log(INCOME)")
)

m.utility_co[Bike] = (
        + P.ASC_Bike
        + P.NonMotorTime * X("od.BIKE_TIME + do.BIKE_TIME")
        + P("LogIncome:Bike") * X("log(INCOME)")
)

m.utility_co[Transit] = (
        + P.ASC_Transit
        + P.InVehTime * X("od.TRANSIT_IVTT + do.TRANSIT_IVTT")
        + P.OutVehTime * X("od.TRANSIT_OVTT + do.TRANSIT_OVTT")
        + P.Cost * X("od.TRANSIT_FARE + do.TRANSIT_FARE")
        + P("LogIncome:Transit") * X('log(INCOME)')
)

In [7]:
Car = m.graph.new_node(parameter='Mu:Car', children=[DA,SR], name='Car')
NonMotor = m.graph.new_node(parameter='Mu:NonMotor', children=[Walk,Bike], name='NonMotor')
Motor = m.graph.new_node(parameter='Mu:Motor', children=[Car,Transit], name='Motor')
m.graph

In [8]:
m.choice_co_code = 'TOURMODE'

m.availability_co_vars = {
    DA: 'AGE >= 16',
    SR: '1',
    Walk: 'WALK_TIME < 60',
    Bike: 'BIKE_TIME < 60',
    Transit: 'TRANSIT_FARE>0',
}


In [9]:
m.required_data()

key,value
co,"['log(INCOME)',  'od.AUTO_COST + do.AUTO_COST',  'od.AUTO_TIME + do.AUTO_TIME',  'od.BIKE_TIME + do.BIKE_TIME',  'od.TRANSIT_FARE + do.TRANSIT_FARE',  'od.TRANSIT_IVTT + do.TRANSIT_IVTT',  'od.TRANSIT_OVTT + do.TRANSIT_OVTT',  'od.WALK_TIME + do.WALK_TIME']"
choice_co_code,'TOURMODE'
avail_co,"{1: 'AGE >= 16',  2: '1',  3: 'WALK_TIME < 60',  4: 'BIKE_TIME < 60',  5: 'TRANSIT_FARE>0'}"


The model is connected to a data pool to be used for estimation.  The 
first time we call the `loglike` function, it takes a few seconds to
compile and run.

In [10]:
%time m.loglike() # this take a few seconds on initial run

CPU times: user 30.5 s, sys: 1.74 s, total: 32.2 s
Wall time: 7.42 s


-28846.81581153095

In [11]:
%time m.loglike() # but only milliseconds on re-run

CPU times: user 46.9 ms, sys: 3.61 ms, total: 50.6 ms
Wall time: 4.56 ms


-28846.81581153095

In [12]:
%time m.d_loglike()

CPU times: user 453 ms, sys: 15 ms, total: 468 ms
Wall time: 43.2 ms


array([-5.029350e+03, -2.692350e+03, -1.721100e+03, -2.211183e+03,  1.670510e+04,  1.269521e+05, -5.506872e+04,
       -3.030432e+04, -1.933050e+04, -2.412038e+04,  5.825186e+03,  2.928708e+02, -3.319736e+03, -3.471825e+05,
       -2.262344e+05])

In [14]:
m.set_cap(30)

Other than the compile time, the optimization function now runs very fast.

In [15]:
r = m.maximize_loglike(method='slsqp')

Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
ASC_Bike,1.020781,0.0,0.0,-30.0,30.0,0,,1.020781
ASC_SR,2.989585,0.0,0.0,-30.0,30.0,0,,2.989585
ASC_Transit,8.508747,0.0,0.0,-30.0,30.0,0,,8.508747
ASC_Walk,7.473492,0.0,0.0,-30.0,30.0,0,,7.473492
Cost,-0.177504,0.0,0.0,-30.0,30.0,0,,-0.177504
InVehTime,-0.067607,0.0,0.0,-30.0,30.0,0,,-0.067607
LogIncome:Bike,-0.364865,0.0,0.0,-30.0,30.0,0,,-0.364865
LogIncome:SR,-0.422204,0.0,0.0,-30.0,30.0,0,,-0.422204
LogIncome:Transit,-0.696068,0.0,0.0,-30.0,30.0,0,,-0.696068
LogIncome:Walk,-0.454837,0.0,0.0,-30.0,30.0,0,,-0.454837


In [16]:
r

Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
ASC_Bike,1.020781
ASC_SR,2.989585
ASC_Transit,8.508747
ASC_Walk,7.473492
Cost,-0.177504
InVehTime,-0.067607
LogIncome:Bike,-0.364865
LogIncome:SR,-0.422204
LogIncome:Transit,-0.696068
LogIncome:Walk,-0.454837

Unnamed: 0,0
ASC_Bike,1.020781
ASC_SR,2.989585
ASC_Transit,8.508747
ASC_Walk,7.473492
Cost,-0.177504
InVehTime,-0.067607
LogIncome:Bike,-0.364865
LogIncome:SR,-0.422204
LogIncome:Transit,-0.696068
LogIncome:Walk,-0.454837

Unnamed: 0,0
ASC_Bike,0.000971
ASC_SR,-0.022883
ASC_Transit,-0.002079
ASC_Walk,0.001642
Cost,0.029123
InVehTime,-0.00767
LogIncome:Bike,0.009175
LogIncome:SR,-0.245893
LogIncome:Transit,-0.0221
LogIncome:Walk,0.014978


In [17]:
r.loglike

-8047.006193925807

In [26]:
r.n_cases

20739

The new data structure allows for filtering or replacing the main datapool table.

In [23]:
worktours = pool.main.query_cases('TOURPURP==1')
worktours

In [24]:
m.datapool_source = worktours

In [25]:
m.n_cases

7564

In [27]:
m.loglike() # -3527.6797690247113

-3527.6797690247113

In [28]:
assert m.n_cases == 7564

In [29]:
m.n_cases

7564

In [30]:
del m.datapool_source

In [31]:
m.loglike()

-8047.006193925807

In [32]:
m.datapool_source = worktours

In [33]:
m.loglike()

-3527.6797690247113

In [34]:
type(m.loglike()) ##64=-8047.102567806795


numpy.float64

In [35]:
%timeit m.loglike()

1.17 ms ± 20.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
