In [1]:
import matplotlib.pyplot as plt
import pickle
import numpy as np
import pandas as pd
import biogeme.vns as vns
import biogeme.database as db

# Pareto reader

This notebook is used to display the information contained in the Pareto set generated by the algorithm, and saved in the pickle file. 

We need the following just to know the sample size. 

In [2]:
df = pd.read_csv('swissmetro.dat', sep='\t')

database = db.Database('swissmetro', df)

globals().update(database.variables)

exclude = ((Choice == -1) + (CostCarCHF < 0)) > 0
database.remove(exclude)

sampleSize = database.getSampleSize()
sampleSize

NameError: name 'Choice' is not defined

The information about the approximation of the Pareto set is saved regularly by the algorithm in a pickle file. We first open it. 

In [None]:
pickleFile = 'swissmetroPareto.pickle'

The first argument is the largest size of neighborhood used by the algorithm. It is irrelevant when the algorithm is not executed. Here, we simply display the Pareto solutions. So it is not needed.

In [None]:
pareto = vns.paretoClass(_, archiveInputFile=pickleFile)

In [None]:
print(f'Number of pareto solutions: {len(pareto.pareto)}')

We calculate the AIC and the BIC for each non dominated  model.

In [None]:
def AIC(k, LL):
    """Calculate the Aikaike Information Criterion for a model with k parameters and final log liklelihood LL"""
    return 2 * k - 2 * LL

In [None]:
def BIC(k, LL):
    """Calculate the Bayesian Information Criterion for a model with k parameters and final log liklelihood LL"""
    return k * np.log(sampleSize) - 2 * LL


## List of Pareto solutions: performance measures

For each model in the Pareto set, we display:

- an id,
- the negative log likelihood,
- the number of parameters,
- the Bayesian information criterion (BIC), 
- the Akaike informatoin criterion (AIC).

The models with the best BIC (B*) and the best AIC (A*) are identified. If the same model has both the best BIC and AIC, it is labeled (**). 

In [None]:
bestBIC = None
bestAIC = None
summary = []
besti = -1
bestj = -1
for p in pareto.pareto:
    res = ''
    for t, r in zip(p.objectivesNames, p.objectives):
        res += f'{t}: {r}\t'
    L = p.objectives[0]
    k = p.objectives[1]
    A = AIC(k, -L)
    B = BIC(k, -L)
    res += f'BIC = {B:.3f}\tAIC = {A:.3f}'
    summary.append(res)
    if bestBIC is None or B < bestBIC:
        bestBIC = B
        besti = len(summary) - 1
    if bestAIC is None or A < bestAIC:
        bestAIC = A
        bestj = len(summary) - 1
for i in range(len(summary)):
    if i == besti:
        if i == bestj:
            pre = f'{i+1:3} **'
        else:
            pre = f'{i+1:3} B*'
    elif i == bestj:
        pre = f'{i+1:3} A*'
    else:
        pre = f'{i+1:3}   '
    print(f'{pre} {summary[i]}')

## List of Pareto solutions: model specifications

For each model in the Pareto set, we provide a description of the model specification.

In [None]:
counter = 0 
for p in pareto.pareto:
    counter += 1
    print(f'*************** Model {counter} ************************')
    print(p)
    print('\n')
    

## List of Pareto solutions: illustration

The plot below illustrates all models considered by the algorithm. Each model corresponds to one point in the graph. 

- The x-coordinate corresponds to the negative log likelihood of the model, and the y-coordinate to the number of parameters. 
- The larger circles correspond to all models that are not dominated. They are in the Pareto set.
- The crosses corresponds to model that happened to be non dominated at some point during the course of the algorithm, but have been removed from the Pareto set afterwards, as a dominating model has been identified.
- Finally, the small dots corresponds to models that have been considered, but rejected because dominated by another model already in the set.

In [None]:
objectives = list(pareto.pareto)[0].objectivesNames
objectives

In [None]:
par_obj = [p.objectives for p in pareto.pareto]
par_x, par_y = zip(*par_obj)

In [None]:
con_obj = [p.objectives for p in pareto.considered]
con_x, con_y = zip(*con_obj)

In [None]:
rem_obj = [p.objectives for p in pareto.removed]
rem_x, rem_y = zip(*rem_obj)

In [None]:
x_buffer = 10
y_buffer = 0.1

In [None]:
plt.axis([min(par_x)-x_buffer,
          max(par_x)+x_buffer,
          min(par_y)-y_buffer,
          max(par_y)+y_buffer])
plt.plot(par_x, par_y, 'o', label='Pareto')
plt.plot(rem_x, rem_y, 'x', label='Removed')
plt.plot(con_x, con_y, ',', label='Considered')
plt.xlabel(objectives[0])
plt.ylabel(objectives[1])
plt.legend()