# Biogeme Basics: Probit Model

In [None]:
import pandas  as pd
import biogeme.database  as db
import biogeme.biogeme  as bio

**Import Swissmetro data**

In [None]:
pandas = pd.read_csv("data/swissmetro.dat",sep='\t')
database = db.Database("data/swissmetro", pandas)

**Use collumn names as variables**

In [None]:
from headers import *

**Exclude some unwanted entries**

Rmove observations where Swissmetro was chosen (CHOICE == 2) and also remove observations where one of the two alternatives is not available

In [None]:
CAR_AV_SP =  DefineVariable('CAR_AV_SP',CAR_AV * (SP != 0),database)
TRAIN_AV_SP =  DefineVariable('TRAIN_AV_SP',TRAIN_AV * (SP != 0),database)
exclude = (TRAIN_AV_SP == 0) + (CAR_AV_SP == 0) + ( CHOICE == 2 ) + (( PURPOSE != 1 ) * (  PURPOSE   !=  3  ) + ( CHOICE == 0 )) > 0
database.remove(exclude)

**Define some dummy variables**

In [None]:
SM_COST = SM_CO * ( GA == 0 )
TRAIN_COST = TRAIN_CO * ( GA == 0 )

**Rescale some data**

In [None]:
TRAIN_TT_SCALED   = DefineVariable('TRAIN_TT_SCALED',   TRAIN_TT / 100.0, database)
TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100, database)
SM_TT_SCALED      = DefineVariable('SM_TT_SCALED',      SM_TT / 100.0   , database)
SM_COST_SCALED    = DefineVariable('SM_COST_SCALED',    SM_COST / 100   , database)
CAR_TT_SCALED     = DefineVariable('CAR_TT_SCALED',     CAR_TT / 100    , database)
CAR_CO_SCALED     = DefineVariable('CAR_CO_SCALED',     CAR_CO / 100    , database)

**Create parameters to be estimated**

`Beta`
1. name of parameter
2. default value for the parameter
3. lower bound
4. upper bound
5. flag indicating if parameter is to be estimated

In [None]:
ASC_CAR = Beta('ASC_CAR',0,None,None,0)
B_TIME = Beta('B_TIME',0,None,None,0)
B_COST = Beta('B_COST',0,None,None,0)

**Define the utility functions**

\begin{align}
V_1 & = \beta_{time}X_{Train_{TT}} + \beta_{cost}X_{Train_{cost}}\\
V_3 & = \beta_{Car} + \beta_{time}X_{Car_{TT}} + \beta_{cost}X_{Car_{cost}}\\
\end{align}

In [None]:
V1 = B_TIME * TRAIN_TT_SCALED + \
     B_COST * TRAIN_COST_SCALED
V3 = ASC_CAR + \
     B_TIME * CAR_TT_SCALED + \
     B_COST * CAR_CO_SCALED

**Associate utility functions with alternatives and associate availability of alternatives**

Create a python dictionary with all utility functions

Create a python dictionary with availability of choices

In [None]:
P = {1: bioNormalCdf(V1-V3),
     3: bioNormalCdf(V3-V1)}

**Define the model**

In [None]:
logprob = log(Elem(P,CHOICE))

**Define the Biogeme object**

* Give the database with all variables
* Give the log likelihood model

In [None]:
biogeme  = bio.BIOGEME(database, logprob)

biogeme.modelName = "swissmetro_probit_basic"

**Estimate the model**

1. A `.html` can be generated with a report of the results and can be opened with a browser
2. A `.pickle` file can also be generaetd with a snapshot with the results. This file can then be used in other scripts

In [None]:
biogeme.generateHtml = True
biogeme.generatePickle = False

results = biogeme.estimate()

print(f"HTML file:    {results.data.htmlFileName}")
print(f"Pickle file:  {results.data.pickleFileName }")

**Print results**

In [None]:
betas = results.getBetaValues()
for k,v in betas.items():
    print(f"{k:10}=\t{v:.3g}")

**Get the general statistics**

In [None]:
gs = results.getGeneralStatistics()

for k,v in gs.items():
    print("{}= {}".format(k.ljust(45),v[0]))

**Clean up output files**

In [None]:
import glob, os

result_files = glob.glob(biogeme.modelName+'*')
result_files = [x for x in result_files if x != biogeme.modelName+'.ipynb']
if len(result_files) != 0:
    result_dir = "results"
    print('Moving the following files:')
    for result_file in result_files:
        print('\t',result_file)
        os.rename(result_file, os.path.join(result_dir, result_file))