使用R语言实现多值选择模型

In [4]:
# 导入数据集
library(haven)
library(tidyverse)

mlogit <- read_dta('stata_data/mlogit.dta')
mlogit

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mdfidx[39m::filter(), [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors


[38;5;246m# A tibble: 200 × 5[39m
      id female     ice_cream      video puzzle
   [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl+lbl>[39m[23m  [3m[38;5;246m<dbl+lbl>[39m[23m      [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m    70 0[38;5;246m [male][39m   1[38;5;246m [chocolate][39m     47     57
[38;5;250m 2[39m   121 1[38;5;246m [female][39m 2[38;5;246m [vanilla][39m       63     61
[38;5;250m 3[39m    86 0[38;5;246m [male][39m   3[38;5;246m [strawberry][39m    58     31
[38;5;250m 4[39m   141 0[38;5;246m [male][39m   3[38;5;246m [strawberry][39m    53     56
[38;5;250m 5[39m   172 0[38;5;246m [male][39m   2[38;5;246m [vanilla][39m       53     61
[38;5;250m 6[39m   113 0[38;5;246m [male][39m   2[38;5;246m [vanilla][39m       63     61
[38;5;250m 7[39m    50 0[38;5;246m [male][39m   2[38;5;246m [vanilla][39m       53     61
[38;5;250m 8[39m    11 0[38;5;246m [male][39m   2[38;5;246m [vanil

In [62]:
mlogit_idx <- mlogit |> 
  expand_grid(alt = c(1, 2, 3)) |> 
  mutate(
    choice = if_else(alt == ice_cream, 1, 0)
  ) |> 
  relocate(alt, .after = id) |> 
  relocate(female, .after = ice_cream) |> 
  relocate(choice, .after = ice_cream) |> 
  dfidx(
    idx = c("id", "alt")
  )

mlogit_idx

[38;5;246m# A tibble: 600 × 6[39m
[38;5;246m# Index:    200 (id) x 3 (alt)[39m
[38;5;246m# Balanced: yes[39m
   idx   ice_cream     choice female     video puzzle
   [3m[38;5;246m<idx>[39m[23m [3m[38;5;246m<dbl+lbl>[39m[23m      [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl+lbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m 1:1   1[38;5;246m [chocolate][39m      1 1[38;5;246m [female][39m    39     41
[38;5;250m 2[39m 1:2   1[38;5;246m [chocolate][39m      0 1[38;5;246m [female][39m    39     41
[38;5;250m 3[39m 1:3   1[38;5;246m [chocolate][39m      0 1[38;5;246m [female][39m    39     41
[38;5;250m 4[39m 2:1   2[38;5;246m [vanilla][39m        0 1[38;5;246m [female][39m    42     41
[38;5;250m 5[39m 2:2   2[38;5;246m [vanilla][39m        1 1[38;5;246m [female][39m    42     41
[38;5;250m 6[39m 2:3   2[38;5;246m [vanilla][39m        0 1[38;5;246m [female][39m    42     41
[38;5;250m 7[

In [73]:
fit_mlogit <- mlogit(
  choice ~ 0 | female + video + puzzle,
  data = mlogit_idx,
  reflevel = 1  # 指定参考组
)

summary(fit_mlogit)


Call:
mlogit(formula = choice ~ 0 | female + video + puzzle, data = mlogit_idx, 
    reflevel = 1, method = "nr")

Frequencies of alternatives:choice
    1     2     3 
0.235 0.475 0.290 

nr method
5 iterations, 0h:0m:0s 
g'(-H)^-1g = 2.64E-05 
successive function values within tolerance limits 

Coefficients :
               Estimate Std. Error z-value  Pr(>|z|)    
(Intercept):2 -1.912254   1.127256 -1.6964 0.0898140 .  
(Intercept):3 -5.969578   1.437546 -4.1526 3.287e-05 ***
female:2      -0.816620   0.390981 -2.0886 0.0367400 *  
female:3      -0.849482   0.448212 -1.8953 0.0580570 .  
video:2        0.023565   0.020975  1.1235 0.2612330    
video:3        0.046487   0.025100  1.8521 0.0640176 .  
puzzle:2       0.038924   0.019517  1.9944 0.0461055 *  
puzzle:3       0.081928   0.023834  3.4375 0.0005872 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Log-Likelihood: -194.03
McFadden R^2:  0.078581 
Likelihood ratio test : chisq = 33.095 (p.value = 1.005

In [72]:
fit_mlogit <- mlogit(
  choice ~ 0 | female + video + puzzle,
  data = mlogit_idx,
  reflevel = 1,  # 指定参考组
  probit = TRUE
)

summary(fit_mlogit)


Call:
mlogit(formula = choice ~ 0 | female + video + puzzle, data = mlogit_idx, 
    reflevel = 1, probit = TRUE)

Frequencies of alternatives:choice
    1     2     3 
0.235 0.475 0.290 

bfgs method
40 iterations, 0h:0m:3s 
g'(-H)^-1g = 9.45E-08 
gradient close to zero 

Coefficients :
                Estimate Std. Error z-value Pr(>|z|)  
(Intercept):2  -1.208479   1.095679 -1.1030   0.2700  
(Intercept):3 -10.363668  39.327822 -0.2635   0.7921  
female:2       -0.513220   0.262189 -1.9574   0.0503 .
female:3       -0.846756   1.946420 -0.4350   0.6635  
video:2         0.012256   0.015594  0.7859   0.4319  
video:3         0.064677   0.221979  0.2914   0.7708  
puzzle:2        0.027317   0.022312  1.2243   0.2208  
puzzle:3        0.119920   0.401844  0.2984   0.7654  
2.3             0.811869   4.648165  0.1747   0.8613  
3.3             3.217398  12.957392  0.2483   0.8039  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Log-Likelihood: -193.83
McFadden R^2:

# 多项Logit模型

In [None]:
library(nnet)

fit_mlogit <- multinom(
  ice_cream ~ female + video + puzzle,
  data = mlogit,
)

summary(fit_mlogit)

# weights:  15 (8 variable)
initial  value 219.722458 
iter  10 value 194.078046
final  value 194.034851 
converged


警告信息:
In model.matrix.default(Terms, m, contrasts) :
  non-list contrasts argument ignored


Call:
multinom(formula = ice_cream ~ female + video + puzzle, data = mlogit, 
    contrasts = "vanilla")

Coefficients:
  (Intercept)     female      video     puzzle
2   -1.912305 -0.8166207 0.02356541 0.03892473
3   -5.969577 -0.8494647 0.04648473 0.08192982

Std. Errors:
  (Intercept)    female      video     puzzle
2    1.127259 0.3909821 0.02097473 0.01951656
3    1.437546 0.4482127 0.02510004 0.02383389

Residual Deviance: 388.0697 
AIC: 404.0697 

In [20]:
confint(fit_mlogit)

, , 2

                    2.5 %      97.5 %
(Intercept) -4.1216923355  0.29708277
female      -1.5829316096 -0.05030985
video       -0.0175443071  0.06467513
puzzle       0.0006729767  0.07717649

, , 3

                  2.5 %      97.5 %
(Intercept) -8.78711492 -3.15203940
female      -1.72794551  0.02901607
video       -0.00271045  0.09567991
puzzle       0.03521626  0.12864339


In [21]:
library(questionr)
odds.ratio(fit_mlogit)

                      OR      2.5 % 97.5 %         p    
2/(Intercept) 0.14773949 0.01621705 1.3459  0.089806 .  
2/female      0.44192251 0.20537214 0.9509  0.036740 *  
2/video       1.02384527 0.98260870 1.0668  0.261219    
2/puzzle      1.03969222 1.00067320 1.0802  0.046103 *  
3/(Intercept) 0.00255532 0.00015269 0.0428 3.287e-05 ***
3/female      0.42764378 0.17764901 1.0294  0.058062 .  
3/video       1.04758208 0.99729322 1.1004  0.064029 .  
3/puzzle      1.08537964 1.03584369 1.1373  0.000587 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [45]:
mlogit_data <- mlogit.data(
  data = mlogit,
  alt.var = ice_cream,
  shape = "short",
  id.var = id,
)

: [1m[33mError[39m:[22m
[33m![39m 找不到对象'ice_cream'

In [None]:
# 再使用mlogit包实现


: [1m[33mError[39m:[22m
[33m![39m 意外的',' 于
"  mutate(
    choice = if_else(alt == ,"

# 多项Probit模型

In [24]:
library(mlogit)
data("ModeCanada", package = "mlogit")

ModeCanada

   case   alt choice dist   cost ivt ovt freq income urban noalt
1     1 train      0   83  28.25  50  66    4     45     0     2
2     1   car      1   83  15.77  61   0    0     45     0     2
3     2 train      0   83  28.25  50  66    4     25     0     2
4     2   car      1   83  15.77  61   0    0     25     0     2
5     3 train      0   83  28.25  50  66    4     70     0     2
6     3   car      1   83  15.77  61   0    0     70     0     2
7     4 train      0   83  28.25  50  66    4     70     0     2
8     4   car      1   83  15.77  61   0    0     70     0     2
9     5 train      0   83  28.25  50  66    4     55     0     2
10    5   car      1   83  15.77  61   0    0     55     0     2
11    6 train      0   83  28.25  50  66    4     70     0     2
12    6   car      1   83  15.77  61   0    0     70     0     2
13    7 train      0   83  28.25  50  66    4     15     0     2
14    7   car      1   83  15.77  61   0    0     15     0     2
15    8 train      0   83

# 条件Logit模型

, , 2

                    2.5 %      97.5 %
(Intercept) -4.1216923355  0.29708277
female      -1.5829316096 -0.05030985
video       -0.0175443071  0.06467513
puzzle       0.0006729767  0.07717649

, , 3

                  2.5 %      97.5 %
(Intercept) -8.78711492 -3.15203940
female      -1.72794551  0.02901607
video       -0.00271045  0.09567991
puzzle       0.03521626  0.12864339


In [47]:
library(tidyverse)

In [50]:
data("Train", package = "mlogit")
glimpse(Train)

Rows: 2,929
Columns: 11
$ id        [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ choiceid  [3m[38;5;246m<int>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10…
$ choice    [3m[38;5;246m<fct>[39m[23m A, A, A, B, B, B, B, B, A, A, A, A, B, B, B, B, A, B, B, A, …
$ price_A   [3m[38;5;246m<dbl>[39m[23m 2400, 2400, 2400, 4000, 2400, 4000, 2400, 2400, 4000, 2400, …
$ time_A    [3m[38;5;246m<dbl>[39m[23m 150, 150, 115, 130, 150, 115, 150, 115, 115, 150, 108, 93, 1…
$ change_A  [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ comfort_A [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
$ price_B   [3m[38;5;246m<dbl>[39m[23m 4000, 3200, 4000, 3200, 3200, 2400, 3200, 3200, 3200, 4000, …
$ time_B    [3m[38;5;246m<dbl>[39m[23m 150, 130, 115, 150, 150, 130, 115, 150, 130, 115, 121, 93, 9…
$ change_B  [3m[38;5;246m<dbl