# MXL推定を行うためのコード
準備したデータを用いてBiogemeで交通手段選択モデルを推定します．

In [69]:
# import library
import numpy as np
import pandas as pd

# -- Imports
import numpy as np
import pandas as pd

import biogeme.database as db
from biogeme.expressions import Beta, Variable, bioDraws, MonteCarlo, log
from biogeme import models
from biogeme.biogeme import BIOGEME

## データの読み込み&加工
準備したデータを読み込んで以下の加工を行います．
1. 着目する交通手段だけに限定したデータにする(今回は鉄道・バス・航空)．
2. Biogemeでは，ヘッダー行以外の文字は読み込めないので，当該列を削除する

In [72]:
df = pd.read_csv("../data/data.csv", encoding ='shift_jis')
display(df)

Unnamed: 0,O_code,O_name,D_code,D_name,purpose_code,purpose_name,mode_code,mode_name,sex_code,sex_name,...,car_time,bus_time,ship_time,rail_time,air_time,car_cost,bus_cost,ship_cost,rail_cost,air_cost
0,1,道北,48,道東,1,仕事,1,航空,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
1,1,道北,48,道東,1,仕事,1,航空,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
2,1,道北,48,道東,1,仕事,1,航空,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
3,1,道北,48,道東,1,仕事,2,鉄道,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
4,1,道北,48,道東,1,仕事,2,鉄道,1.0,Male,...,304.3508,515.8,,397.3,306.3,2900.0,5450.0,,13310.0,28740.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78832,47,沖縄,46,鹿児島,3,私用・帰省,3,幹線旅客船,1.0,Male,...,,,,,261.5,,,,,25680.0
78833,47,沖縄,46,鹿児島,3,私用・帰省,3,幹線旅客船,2.0,Female,...,,,,,261.5,,,,,25680.0
78834,47,沖縄,46,鹿児島,3,私用・帰省,3,幹線旅客船,2.0,Female,...,,,,,261.5,,,,,25680.0
78835,47,沖縄,46,鹿児島,4,その他,3,幹線旅客船,2.0,Female,...,,,,,261.5,,,,,25680.0


In [73]:
df2 = df[(df['mode_name'] == "幹線バス") | (df['mode_name'] == "鉄道") | (df['mode_name'] == "乗用車等")].reset_index(drop=True)
df3 = df2.drop(columns=['O_name', 'D_name', 'purpose_name', 'mode_name', 'sex_name', 'ship_time', 'ship_cost', 'air_time', 'air_cost'])

display(df3)

Unnamed: 0,O_code,D_code,purpose_code,mode_code,sex_code,age_code,num,car_time,bus_time,rail_time,car_cost,bus_cost,rail_cost
0,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
1,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
2,1,48,1,2,2.0,30.0,3.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
3,1,48,3,2,2.0,40.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
4,1,48,1,4,1.0,20.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49234,46,45,0,5,2.0,30.0,178.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
49235,46,45,0,5,2.0,40.0,114.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
49236,46,45,0,5,2.0,50.0,94.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
49237,46,45,0,5,2.0,60.0,68.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0


In [74]:
df4 = df3.dropna().reset_index(drop=True)
display(df4)

Unnamed: 0,O_code,D_code,purpose_code,mode_code,sex_code,age_code,num,car_time,bus_time,rail_time,car_cost,bus_cost,rail_cost
0,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
1,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
2,1,48,1,2,2.0,30.0,3.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
3,1,48,3,2,2.0,40.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
4,1,48,1,4,1.0,20.0,1.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
44478,46,45,0,5,2.0,30.0,178.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
44479,46,45,0,5,2.0,40.0,114.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
44480,46,45,0,5,2.0,50.0,94.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
44481,46,45,0,5,2.0,60.0,68.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0


In [75]:
df5 = df4.loc[df4.index.repeat(df4['num'])].reset_index(drop=True)
display(df5)

Unnamed: 0,O_code,D_code,purpose_code,mode_code,sex_code,age_code,num,car_time,bus_time,rail_time,car_cost,bus_cost,rail_cost
0,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
1,1,48,1,2,1.0,30.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
2,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
3,1,48,1,2,1.0,40.0,2.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
4,1,48,1,2,2.0,30.0,3.0,304.3508,515.8,397.3,2900.0,5450.0,13310.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2216679,46,45,0,5,2.0,70.0,108.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
2216680,46,45,0,5,2.0,70.0,108.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
2216681,46,45,0,5,2.0,70.0,108.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0
2216682,46,45,0,5,2.0,70.0,108.0,154.2540,276.9,208.4,4110.0,2950.0,4390.0


## Biogemeによる推定
### データの読み込み

In [77]:
df5_small = df5.sample(n=500, random_state=1223)  # 例: 5万行
database = db.Database('junryudou', df5_small)

# database = db.Database ('junryudou', df5)

### 各列の数値の変数への変換

In [79]:
origin = Variable ('O_code')
destination = Variable ('D_code')
purpose = Variable ('purpose_code')
mode = Variable ('mode_code')
sex = Variable ('sex_code')
age = Variable ('age_code')
car_time = Variable ('car_time')
bus_time = Variable ('bus_time')
rail_time = Variable ('rail_time')
car_cost = Variable ('car_cost')
bus_cost = Variable ('bus_cost')
rail_cost = Variable ('rail_cost')

## パラメータの定義

In [81]:
# parameter = Beta('name', value, lowerBound, upperBound, status)

ASC_car = Beta('ASC_car', 0, None, None, 0)
ASC_rail = Beta('ASC_rail', 0, None, None, 0)
ASC_bus = Beta ('ASC_bus', 0, None, None, 1)  # 推定せずに固定する
B_time = Beta ('B_time', 0, None, None, 0)
B_cost = Beta ('B_cost', 0, None, None, 0)

B_time_s = Beta('b_time_s', 1, None, None, 0)

B_time_rnd = B_time + B_time_s * bioDraws('b_time_rnd', 'NORMAL_MLHS')

## 効用関数の定義

In [83]:
V_car = ASC_car + B_time_rnd * car_time + B_cost * car_cost
V_rail = ASC_rail + B_time_rnd * rail_time + B_cost * rail_cost
V_bus = ASC_bus + B_time_rnd * bus_time + B_cost * bus_cost

## 選択結果との対応付け

In [87]:
V = {5: V_car, 2: V_rail, 4: V_bus }

## 選択可能性の設定

In [91]:
av = {5: 1, 2: 1, 4: 1}

## モデルの定義と推定

In [95]:
conditional_probability = models.logit(V, av, mode)
log_probability = log(MonteCarlo(conditional_probability))
the_biogeme = BIOGEME(database, log_probability, number_of_draws=100, seed=1223)
the_biogeme.model_name = 'MXL'
results = the_biogeme.estimate()

The use of argument seed in the constructor of the BIOGEME object is deprecated and will be removed in future versions of Biogeme. Instead, define parameter seed in section MonteCarlo of the .toml parameter file. The default file name is biogeme.toml
You have not defined a name for the model. The output files are named from the model name. The default is [biogemeModelDefaultName]


## 結果の出力

In [None]:
# results = the_biogeme.estimate()
print(results)