# MNL推定を行うためのコード
準備したデータを用いてBiogemeで交通手段選択モデルを推定します．

In [2]:
# import library
import numpy as np
import pandas as pd

import biogeme.database as db
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta, log, Variable
from biogeme import models
from biogeme.models import logit
from biogeme.results_processing import get_pandas_estimated_parameters

  from tqdm.autonotebook import tqdm


## データの読み込み&加工
準備したデータを読み込んで以下の加工を行います．
1. 着目する交通手段だけに限定したデータにする(今回は鉄道・バス・航空)．
2. Biogemeでは，ヘッダー行以外の文字は読み込めないので，当該列を削除する

In [4]:
df = pd.read_csv("../data/data.csv", encoding ='shift_jis')
df.head()

Unnamed: 0,O_code,O_name,D_code,D_name,purpose_code,purpose_name,mode_code,mode_name,sex_code,sex_name,...,car_time,bus_time,ship_time,rail_time,air_time,car_cost,bus_cost,ship_cost,rail_cost,air_cost
0,1,道北,48,道東,1,仕事,1,航空,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
1,1,道北,48,道東,1,仕事,1,航空,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
2,1,道北,48,道東,1,仕事,1,航空,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
3,1,道北,48,道東,1,仕事,2,鉄道,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
4,1,道北,48,道東,1,仕事,2,鉄道,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0


In [5]:
df = df[(df['mode_name'] == "幹線バス") | (df['mode_name'] == "鉄道") | (df['mode_name'] == "乗用車等")].reset_index(drop=True).copy()
df = df.drop(columns=['O_name', 'D_name', 'purpose_name', 'mode_name', 'sex_name', 'ship_time', 'ship_cost', 'air_time', 'air_cost']).copy()

df.head()

Unnamed: 0,O_code,D_code,purpose_code,mode_code,sex_code,age_code,num,car_time,bus_time,rail_time,car_cost,bus_cost,rail_cost
0,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
1,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
2,1,48,1,2,2.0,30.0,3.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
3,1,48,3,2,2.0,40.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
4,1,48,1,4,1.0,20.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0


In [6]:
df = df.dropna().reset_index(drop=True).copy()
df.head()

Unnamed: 0,O_code,D_code,purpose_code,mode_code,sex_code,age_code,num,car_time,bus_time,rail_time,car_cost,bus_cost,rail_cost
0,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
1,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
2,1,48,1,2,2.0,30.0,3.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
3,1,48,3,2,2.0,40.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
4,1,48,1,4,1.0,20.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0


In [7]:
df = df.loc[df.index.repeat(df['num'])].reset_index(drop=True).copy() 
df.head()

Unnamed: 0,O_code,D_code,purpose_code,mode_code,sex_code,age_code,num,car_time,bus_time,rail_time,car_cost,bus_cost,rail_cost
0,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
1,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
2,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
3,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
4,1,48,1,2,2.0,30.0,3.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0


## Biogemeによる推定
### データの読み込み

In [9]:
database = db.Database ('junryudou', df)

### 各列の数値の変数への変換

In [11]:
origin = Variable ('O_code')
destination = Variable ('D_code')
purpose = Variable ('purpose_code')
mode = Variable ('mode_code')
sex = Variable ('sex_code')
age = Variable ('age_code')
car_time = Variable ('car_time')
bus_time = Variable ('bus_time')
rail_time = Variable ('rail_time')
car_cost = Variable ('car_cost')
bus_cost = Variable ('bus_cost')
rail_cost = Variable ('rail_cost')

In [12]:
car_time_scaled = car_time / 10
car_cost_scaled = car_cost / 100
rail_time_scaled = rail_time / 10
rail_cost_scaled = rail_cost / 100
bus_time_scaled = bus_time / 10
bus_cost_scaled = bus_cost / 100

## パラメータの定義

In [14]:
# parameter = Beta('name', value, lowerBound, upperBound, status)

ASC_car = Beta('ASC_car', 0, None, None, 0)
ASC_rail = Beta('ASC_rail', 0, None, None, 0)
ASC_bus = Beta ('ASC_bus', 0, None, None, 1)  # 推定せずに固定する
B_time = Beta ('B_time', 0, None, None, 0)
B_cost = Beta ('B_cost', 0, None, None, 0)

## クラスメンバーシップの定義
prob_class1はクラス1に所属する確率

In [16]:
prob_class1 = Beta('prob_class1', 0.5, 0, 1, 0)
prob_class2 = 1 - prob_class1

## 効用関数の定義
### クラス1は費用に対してのみ感度がある

In [18]:
V_car_class1 = ASC_car + B_cost * car_cost_scaled
V_rail_class1 = ASC_rail + B_cost * rail_cost_scaled
V_bus_class1 = ASC_bus + B_cost * bus_cost_scaled

### クラス2は時間と費用に対する感度がある

In [19]:
V_car_class2 = ASC_car + B_time * car_time_scaled + B_cost * car_cost_scaled
V_rail_class2 = ASC_rail + B_time * rail_time_scaled + B_cost * rail_cost_scaled
V_bus_class2 = ASC_bus + B_time * bus_time_scaled + B_cost * bus_cost_scaled

## 選択結果との対応付け

In [21]:
# 5: 自家用車，2: 鉄道，4: 幹線バス
V_class_1 = {5: V_car_class1, 2: V_rail_class1, 4: V_bus_class1}
V_class_2 = {5: V_car_class2, 2: V_rail_class2, 4: V_bus_class2}

## 選択可能性の設定

In [23]:
av = {5: 1, 2: 1, 4: 1}

## モデルの定義と推定

In [25]:
choice_probability_class_1 = models.logit(V_class_1, av, mode)
choice_probability_class_2 = models.logit(V_class_2, av, mode)
prob = (
    prob_class1 * choice_probability_class_1 + prob_class2 * choice_probability_class_2
)
log_probability = log(prob)

In [26]:
the_biogeme = BIOGEME(database, log_probability)
the_biogeme.model_name = 'LatentClass'
the_biogeme.calculate_null_loglikelihood(av)

-2435276.28249398

In [27]:
results = the_biogeme.estimate()

## 推定結果の出力

In [48]:
print(results.short_summary())

Results for model LatentClass
Nbr of parameters:		5
Sample size:			2216684
Excluded data:			0
Null log likelihood:		-2435276
Final log likelihood:		-1080552
Likelihood ratio test (null):		2709448
Rho square (null):			0.556
Rho bar square (null):			0.556
Akaike Information Criterion:	2161114
Bayesian Information Criterion:	2161177

