# Dependencies loading

In [43]:
rm(list=ls())

Sys.setenv(LANG = "en")
Sys.setenv(JAVA_HOME = "C:\\Program Files\\Java\\jre1.8.0_261")

# external modules loading
library(dplyr)
library(olsrr)
library(stargazer)
library(readr)

# local modules loading
source("../../modules/r_modules/diagnostic_tests.R") # diagnostic tests
source("../../modules/r_modules/feature_engineering.R") # initial transformations for dataset

# paths
path_read = "../../../data/main_research/processed_data"
path_write = "../../../results/main_research/ols_regression"

options(warn=0)

# Dataset loading and application of initial transformation

In [46]:
df <- read_csv(paste0(path_read,"/final_dataset.csv"))
data <- initial_transformations(df)

Parsed with column specification:
cols(
  .default = col_double(),
  country_name = [31mcol_character()[39m,
  country_text_id = [31mcol_character()[39m,
  continent = [31mcol_character()[39m,
  sub_region = [31mcol_character()[39m,
  income_group = [31mcol_character()[39m
)
See spec(...) for full column specifications.


In [47]:
names(data)

# 1) OLS with all variables

In [48]:
model_panback_1 <- lm(panback ~ rule + polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_panback_1)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1                 rule 0.15010304  6.662090
2            polyarchy 0.15913494  6.283975
3            education 0.16680912  5.994876
4          ethnic_frac 0.28626638  3.493250
5            ling_frac 0.26444696  3.781477
6           relig_frac 0.40896407  2.445203
7           gdp_pc_log 0.04448605 22.478956
8             gini_log 0.38402674  2.603985
9            inflation 0.69951989  1.429552
10       trade_gdp_log 0.50668483  1.973613
11                 oil 0.43749276  2.285752
12             mineral 0.54735257  1.826976
13         density_log 0.36611410  2.731389
14        region_geo_1 0.41922635  2.385346
15        region_geo_2 0.35172072  2.843165
16        region_geo_3 0.42363046  2.360548
17        region_geo_4 0.40986661  2.439818
18        region_geo_5 0.55790298  1.792426
19        region_geo_6 0.20119126  4.970395
20        region_geo_7 0.31895943  3.135195
21        region_geo_8 0.24804778  4.031

In [49]:
model_pandem_1 <- lm(pandem ~ rule + polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_pandem_1)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1                 rule 0.15010304  6.662090
2            polyarchy 0.15913494  6.283975
3            education 0.16680912  5.994876
4          ethnic_frac 0.28626638  3.493250
5            ling_frac 0.26444696  3.781477
6           relig_frac 0.40896407  2.445203
7           gdp_pc_log 0.04448605 22.478956
8             gini_log 0.38402674  2.603985
9            inflation 0.69951989  1.429552
10       trade_gdp_log 0.50668483  1.973613
11                 oil 0.43749276  2.285752
12             mineral 0.54735257  1.826976
13         density_log 0.36611410  2.731389
14        region_geo_1 0.41922635  2.385346
15        region_geo_2 0.35172072  2.843165
16        region_geo_3 0.42363046  2.360548
17        region_geo_4 0.40986661  2.439818
18        region_geo_5 0.55790298  1.792426
19        region_geo_6 0.20119126  4.970395
20        region_geo_7 0.31895943  3.135195
21        region_geo_8 0.24804778  4.031

In [50]:
model_pandem_dis_1 <- lm(pandem_dis ~ rule + polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_pandem_dis_1)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1                 rule 0.15010304  6.662090
2            polyarchy 0.15913494  6.283975
3            education 0.16680912  5.994876
4          ethnic_frac 0.28626638  3.493250
5            ling_frac 0.26444696  3.781477
6           relig_frac 0.40896407  2.445203
7           gdp_pc_log 0.04448605 22.478956
8             gini_log 0.38402674  2.603985
9            inflation 0.69951989  1.429552
10       trade_gdp_log 0.50668483  1.973613
11                 oil 0.43749276  2.285752
12             mineral 0.54735257  1.826976
13         density_log 0.36611410  2.731389
14        region_geo_1 0.41922635  2.385346
15        region_geo_2 0.35172072  2.843165
16        region_geo_3 0.42363046  2.360548
17        region_geo_4 0.40986661  2.439818
18        region_geo_5 0.55790298  1.792426
19        region_geo_6 0.20119126  4.970395
20        region_geo_7 0.31895943  3.135195
21        region_geo_8 0.24804778  4.031

In [51]:
stargazer(model_panback_1, model_pandem_1, model_pandem_dis_1, type="text")


                                    Dependent variable:     
                               -----------------------------
                                panback   pandem  pandem_dis
                                  (1)      (2)       (3)    
------------------------------------------------------------
rule                             0.067    0.033     0.027   
                                (0.068)  (0.077)   (0.073)  
                                                            
polyarchy                        0.091   -0.239**  -0.216** 
                                (0.082)  (0.092)   (0.086)  
                                                            
education                       -0.008   -0.0003    -0.003  
                                (0.007)  (0.008)   (0.007)  
                                                            
ethnic_frac                      0.057   -0.0003    0.013   
                                (0.059)  (0.067)   (0.063)  
                       

# 2) OLS with all variables excluding polyarchy

In [52]:
model_panback_2 <- lm(panback ~ rule + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_panback_2)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1                 rule 0.28285014  3.535441
2            education 0.17497560  5.715083
3          ethnic_frac 0.29488908  3.391106
4            ling_frac 0.27043950  3.697685
5           relig_frac 0.41003201  2.438834
6           gdp_pc_log 0.04721080 21.181594
7             gini_log 0.38637900  2.588132
8            inflation 0.70129773  1.425928
9        trade_gdp_log 0.50849922  1.966571
10                 oil 0.43780758  2.284108
11             mineral 0.54772722  1.825726
12         density_log 0.36711495  2.723942
13        region_geo_1 0.46925523  2.131036
14        region_geo_2 0.39957943  2.502631
15        region_geo_3 0.49557158  2.017872
16        region_geo_4 0.42857153  2.333333
17        region_geo_5 0.56259451  1.777479
18        region_geo_6 0.22568068  4.431040
19        region_geo_7 0.34577648  2.892042
20        region_geo_8 0.25186381  3.970400
21        region_geo_9 0.45905305  2.178

In [53]:
model_pandem_2 <- lm(pandem ~ rule + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_pandem_2)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1                 rule 0.28285014  3.535441
2            education 0.17497560  5.715083
3          ethnic_frac 0.29488908  3.391106
4            ling_frac 0.27043950  3.697685
5           relig_frac 0.41003201  2.438834
6           gdp_pc_log 0.04721080 21.181594
7             gini_log 0.38637900  2.588132
8            inflation 0.70129773  1.425928
9        trade_gdp_log 0.50849922  1.966571
10                 oil 0.43780758  2.284108
11             mineral 0.54772722  1.825726
12         density_log 0.36711495  2.723942
13        region_geo_1 0.46925523  2.131036
14        region_geo_2 0.39957943  2.502631
15        region_geo_3 0.49557158  2.017872
16        region_geo_4 0.42857153  2.333333
17        region_geo_5 0.56259451  1.777479
18        region_geo_6 0.22568068  4.431040
19        region_geo_7 0.34577648  2.892042
20        region_geo_8 0.25186381  3.970400
21        region_geo_9 0.45905305  2.178

In [54]:
model_pandem_dis_2 <- lm(pandem_dis ~ rule + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_pandem_dis_2)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1                 rule 0.28285014  3.535441
2            education 0.17497560  5.715083
3          ethnic_frac 0.29488908  3.391106
4            ling_frac 0.27043950  3.697685
5           relig_frac 0.41003201  2.438834
6           gdp_pc_log 0.04721080 21.181594
7             gini_log 0.38637900  2.588132
8            inflation 0.70129773  1.425928
9        trade_gdp_log 0.50849922  1.966571
10                 oil 0.43780758  2.284108
11             mineral 0.54772722  1.825726
12         density_log 0.36711495  2.723942
13        region_geo_1 0.46925523  2.131036
14        region_geo_2 0.39957943  2.502631
15        region_geo_3 0.49557158  2.017872
16        region_geo_4 0.42857153  2.333333
17        region_geo_5 0.56259451  1.777479
18        region_geo_6 0.22568068  4.431040
19        region_geo_7 0.34577648  2.892042
20        region_geo_8 0.25186381  3.970400
21        region_geo_9 0.45905305  2.178

In [55]:
stargazer(model_panback_2, model_pandem_2, model_pandem_dis_2, type="text")


                                    Dependent variable:     
                               -----------------------------
                                panback   pandem  pandem_dis
                                  (1)      (2)       (3)    
------------------------------------------------------------
rule                            0.119**  -0.105*   -0.097*  
                                (0.050)  (0.058)   (0.054)  
                                                            
education                       -0.006    -0.005    -0.006  
                                (0.007)  (0.008)   (0.007)  
                                                            
ethnic_frac                      0.046    0.029     0.040   
                                (0.058)  (0.067)   (0.063)  
                                                            
ling_frac                        0.026    0.0003    -0.027  
                                (0.053)  (0.061)   (0.057)  
                       

# 3) OLS with all variables excluding rule

In [56]:
model_panback_3 <- lm(panback ~ polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_panback_3)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1            polyarchy 0.29986962  3.334783
2            education 0.16855131  5.932912
3          ethnic_frac 0.29282588  3.414999
4            ling_frac 0.26495523  3.774222
5           relig_frac 0.40989102  2.439673
6           gdp_pc_log 0.04745336 21.073323
7             gini_log 0.38405501  2.603794
8            inflation 0.72876769  1.372179
9        trade_gdp_log 0.50792632  1.968789
10                 oil 0.45989172  2.174425
11             mineral 0.55132158  1.813823
12         density_log 0.36611883  2.731354
13        region_geo_1 0.43871990  2.279359
14        region_geo_2 0.36481896  2.741086
15        region_geo_3 0.45162257  2.214238
16        region_geo_4 0.42915117  2.330181
17        region_geo_5 0.55896260  1.789028
18        region_geo_6 0.20257902  4.936345
19        region_geo_7 0.32860771  3.043142
20        region_geo_8 0.24874212  4.020228
21        region_geo_9 0.44407220  2.251

In [57]:
model_pandem_3 <- lm(pandem ~ polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_pandem_3)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1            polyarchy 0.29986962  3.334783
2            education 0.16855131  5.932912
3          ethnic_frac 0.29282588  3.414999
4            ling_frac 0.26495523  3.774222
5           relig_frac 0.40989102  2.439673
6           gdp_pc_log 0.04745336 21.073323
7             gini_log 0.38405501  2.603794
8            inflation 0.72876769  1.372179
9        trade_gdp_log 0.50792632  1.968789
10                 oil 0.45989172  2.174425
11             mineral 0.55132158  1.813823
12         density_log 0.36611883  2.731354
13        region_geo_1 0.43871990  2.279359
14        region_geo_2 0.36481896  2.741086
15        region_geo_3 0.45162257  2.214238
16        region_geo_4 0.42915117  2.330181
17        region_geo_5 0.55896260  1.789028
18        region_geo_6 0.20257902  4.936345
19        region_geo_7 0.32860771  3.043142
20        region_geo_8 0.24874212  4.020228
21        region_geo_9 0.44407220  2.251

In [58]:
model_pandem_dis_3 <- lm(pandem_dis ~ polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log + region_geo_1 + region_geo_2 + region_geo_3 + region_geo_4 + region_geo_5 + region_geo_6 + region_geo_7 + region_geo_8 + region_geo_9 + region_geo_11 + region_geo_12 + region_geo_13 + region_geo_14 + region_geo_15 + region_geo_16 + region_geo_17 + region_geo_18 + region_geo_19 + low_income + lower_middle_income + upper_middle_income, data = data)
diagnostic_tests(model_pandem_dis_3)

VIF for collinearity of variables:

             Variables  Tolerance       VIF
1            polyarchy 0.29986962  3.334783
2            education 0.16855131  5.932912
3          ethnic_frac 0.29282588  3.414999
4            ling_frac 0.26495523  3.774222
5           relig_frac 0.40989102  2.439673
6           gdp_pc_log 0.04745336 21.073323
7             gini_log 0.38405501  2.603794
8            inflation 0.72876769  1.372179
9        trade_gdp_log 0.50792632  1.968789
10                 oil 0.45989172  2.174425
11             mineral 0.55132158  1.813823
12         density_log 0.36611883  2.731354
13        region_geo_1 0.43871990  2.279359
14        region_geo_2 0.36481896  2.741086
15        region_geo_3 0.45162257  2.214238
16        region_geo_4 0.42915117  2.330181
17        region_geo_5 0.55896260  1.789028
18        region_geo_6 0.20257902  4.936345
19        region_geo_7 0.32860771  3.043142
20        region_geo_8 0.24874212  4.020228
21        region_geo_9 0.44407220  2.251

In [59]:
stargazer(model_panback_3, model_pandem_3, model_pandem_dis_3, type="text")


                                    Dependent variable:     
                               -----------------------------
                               panback   pandem   pandem_dis
                                 (1)       (2)       (3)    
------------------------------------------------------------
polyarchy                      0.145**  -0.212*** -0.194*** 
                               (0.059)   (0.067)   (0.063)  
                                                            
education                       -0.008   -0.001     -0.003  
                               (0.007)   (0.008)   (0.007)  
                                                            
ethnic_frac                     0.066     0.004     0.017   
                               (0.058)   (0.066)   (0.062)  
                                                            
ling_frac                       0.015     0.022     -0.007  
                               (0.053)   (0.060)   (0.056)  
                       

# 4) OLS without categorical variables

In [60]:
model_panback_4 <- lm(panback ~ rule + polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_panback_4)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1           rule 0.2265664 4.413717
2      polyarchy 0.2840071 3.521039
3      education 0.2876398 3.476571
4    ethnic_frac 0.3706685 2.697829
5      ling_frac 0.4016352 2.489821
6     relig_frac 0.6977128 1.433254
7     gdp_pc_log 0.2118401 4.720542
8       gini_log 0.7011431 1.426242
9      inflation 0.8506435 1.175581
10 trade_gdp_log 0.8387568 1.192241
11           oil 0.6008764 1.664236
12       mineral 0.7317690 1.366552
13   density_log 0.6890623 1.451248



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.10703, p-value = 0.07052
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 1.1129, df1 = 2, df2 = 130, p-value = 0.3317




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 13.165, df = 13, p-value = 0.4352




Durbin-Watson test for autocorrelation:

	Durbin-Watson tes

In [61]:
model_pandem_4 <- lm(pandem ~ rule + polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_pandem_4)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1           rule 0.2265664 4.413717
2      polyarchy 0.2840071 3.521039
3      education 0.2876398 3.476571
4    ethnic_frac 0.3706685 2.697829
5      ling_frac 0.4016352 2.489821
6     relig_frac 0.6977128 1.433254
7     gdp_pc_log 0.2118401 4.720542
8       gini_log 0.7011431 1.426242
9      inflation 0.8506435 1.175581
10 trade_gdp_log 0.8387568 1.192241
11           oil 0.6008764 1.664236
12       mineral 0.7317690 1.366552
13   density_log 0.6890623 1.451248



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.094282, p-value = 0.1491
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.71259, df1 = 2, df2 = 130, p-value = 0.4923




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 15.551, df = 13, p-value = 0.2743




Durbin-Watson test for autocorrelation:

	Durbin-Watson te

In [62]:
model_pandem_dis_4 <- lm(pandem_dis ~ rule + polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_pandem_dis_4)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1           rule 0.2265664 4.413717
2      polyarchy 0.2840071 3.521039
3      education 0.2876398 3.476571
4    ethnic_frac 0.3706685 2.697829
5      ling_frac 0.4016352 2.489821
6     relig_frac 0.6977128 1.433254
7     gdp_pc_log 0.2118401 4.720542
8       gini_log 0.7011431 1.426242
9      inflation 0.8506435 1.175581
10 trade_gdp_log 0.8387568 1.192241
11           oil 0.6008764 1.664236
12       mineral 0.7317690 1.366552
13   density_log 0.6890623 1.451248



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.085622, p-value = 0.2348
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.73609, df1 = 2, df2 = 130, p-value = 0.481




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 10.157, df = 13, p-value = 0.6811




Durbin-Watson test for autocorrelation:

	Durbin-Watson tes

In [63]:
stargazer(model_panback_4, model_pandem_4, model_pandem_dis_4, type="text")


                                    Dependent variable:     
                               -----------------------------
                               panback   pandem   pandem_dis
                                 (1)       (2)       (3)    
------------------------------------------------------------
rule                            0.019     0.020     -0.008  
                               (0.061)   (0.065)   (0.061)  
                                                            
polyarchy                       0.055   -0.272*** -0.233*** 
                               (0.067)   (0.071)   (0.066)  
                                                            
education                       -0.003    0.001     0.0005  
                               (0.006)   (0.006)   (0.006)  
                                                            
ethnic_frac                     0.029    -0.013     0.009   
                               (0.057)   (0.060)   (0.056)  
                       

# 5) OLS without categorical variables excluding polyarchy

In [64]:
model_panback_5 <- lm(panback ~ rule + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_panback_5)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1           rule 0.4183321 2.390445
2      education 0.2878413 3.474136
3    ethnic_frac 0.3708833 2.696266
4      ling_frac 0.4024762 2.484619
5     relig_frac 0.7031366 1.422199
6     gdp_pc_log 0.2124491 4.707011
7       gini_log 0.7024337 1.423622
8      inflation 0.8585433 1.164764
9  trade_gdp_log 0.8390181 1.191869
10           oil 0.6400795 1.562306
11       mineral 0.7319796 1.366158
12   density_log 0.7035935 1.421275



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.11106, p-value = 0.05456
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.90688, df1 = 2, df2 = 131, p-value = 0.4063




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 14.254, df = 12, p-value = 0.2848




Durbin-Watson test for autocorrelation:

	Durbin-Watson test

data:  model
DW = 1.7889, p-valu

In [65]:
model_pandem_5 <- lm(pandem ~ rule + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_pandem_5)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1           rule 0.4183321 2.390445
2      education 0.2878413 3.474136
3    ethnic_frac 0.3708833 2.696266
4      ling_frac 0.4024762 2.484619
5     relig_frac 0.7031366 1.422199
6     gdp_pc_log 0.2124491 4.707011
7       gini_log 0.7024337 1.423622
8      inflation 0.8585433 1.164764
9  trade_gdp_log 0.8390181 1.191869
10           oil 0.6400795 1.562306
11       mineral 0.7319796 1.366158
12   density_log 0.7035935 1.421275



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.07949, p-value = 0.3148
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.46227, df1 = 2, df2 = 131, p-value = 0.6309




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 13.244, df = 12, p-value = 0.3515




Durbin-Watson test for autocorrelation:

	Durbin-Watson test

data:  model
DW = 1.753, p-value 

In [66]:
model_pandem_dis_5 <- lm(pandem_dis ~ rule + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_pandem_dis_5)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1           rule 0.4183321 2.390445
2      education 0.2878413 3.474136
3    ethnic_frac 0.3708833 2.696266
4      ling_frac 0.4024762 2.484619
5     relig_frac 0.7031366 1.422199
6     gdp_pc_log 0.2124491 4.707011
7       gini_log 0.7024337 1.423622
8      inflation 0.8585433 1.164764
9  trade_gdp_log 0.8390181 1.191869
10           oil 0.6400795 1.562306
11       mineral 0.7319796 1.366158
12   density_log 0.7035935 1.421275



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.085057, p-value = 0.2414
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.74064, df1 = 2, df2 = 131, p-value = 0.4788




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 12.083, df = 12, p-value = 0.4391




Durbin-Watson test for autocorrelation:

	Durbin-Watson test

data:  model
DW = 1.8512, p-valu

In [67]:
stargazer(model_panback_5, model_pandem_5, model_pandem_dis_5, type="text")


                                    Dependent variable:     
                               -----------------------------
                               panback   pandem   pandem_dis
                                 (1)       (2)       (3)    
------------------------------------------------------------
rule                            0.053   -0.148*** -0.152*** 
                               (0.045)   (0.050)   (0.046)  
                                                            
education                       -0.003    0.002     0.001   
                               (0.006)   (0.006)   (0.006)  
                                                            
ethnic_frac                     0.028    -0.008     0.014   
                               (0.057)   (0.063)   (0.059)  
                                                            
ling_frac                       0.032     0.044     0.015   
                               (0.047)   (0.053)   (0.049)  
                       

# 6) OLS without categorical variables excluding rule

In [68]:
model_panback_6 <- lm(panback ~ polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_panback_6)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1      polyarchy 0.5243905 1.906976
2      education 0.2887749 3.462904
3    ethnic_frac 0.3715966 2.691090
4      ling_frac 0.4044777 2.472324
5     relig_frac 0.6977940 1.433088
6     gdp_pc_log 0.2489348 4.017115
7       gini_log 0.7011461 1.426236
8      inflation 0.8837758 1.131509
9  trade_gdp_log 0.8400464 1.190410
10           oil 0.6112847 1.635899
11       mineral 0.7331756 1.363930
12   density_log 0.6926202 1.443793



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.11421, p-value = 0.04434
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.99562, df1 = 2, df2 = 131, p-value = 0.3723




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 12.627, df = 12, p-value = 0.3967




Durbin-Watson test for autocorrelation:

	Durbin-Watson test

data:  model
DW = 1.7617, p-valu

In [69]:
model_pandem_6 <- lm(pandem ~ polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_pandem_6)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1      polyarchy 0.5243905 1.906976
2      education 0.2887749 3.462904
3    ethnic_frac 0.3715966 2.691090
4      ling_frac 0.4044777 2.472324
5     relig_frac 0.6977940 1.433088
6     gdp_pc_log 0.2489348 4.017115
7       gini_log 0.7011461 1.426236
8      inflation 0.8837758 1.131509
9  trade_gdp_log 0.8400464 1.190410
10           oil 0.6112847 1.635899
11       mineral 0.7331756 1.363930
12   density_log 0.6926202 1.443793



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.094868, p-value = 0.1444
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.6997, df1 = 2, df2 = 131, p-value = 0.4986




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 14.533, df = 12, p-value = 0.2679




Durbin-Watson test for autocorrelation:

	Durbin-Watson test

data:  model
DW = 1.8134, p-value

In [70]:
model_pandem_dis_6 <- lm(pandem_dis ~ polyarchy + education + ethnic_frac + ling_frac + relig_frac + gdp_pc_log + gini_log + inflation + trade_gdp_log + oil + mineral + density_log, data = data)
diagnostic_tests(model_pandem_dis_6)

VIF for collinearity of variables:

       Variables Tolerance      VIF
1      polyarchy 0.5243905 1.906976
2      education 0.2887749 3.462904
3    ethnic_frac 0.3715966 2.691090
4      ling_frac 0.4044777 2.472324
5     relig_frac 0.6977940 1.433088
6     gdp_pc_log 0.2489348 4.017115
7       gini_log 0.7011461 1.426236
8      inflation 0.8837758 1.131509
9  trade_gdp_log 0.8400464 1.190410
10           oil 0.6112847 1.635899
11       mineral 0.7331756 1.363930
12   density_log 0.6926202 1.443793



Kolmogorov-Smirnov for normality:

	One-sample Kolmogorov-Smirnov test

data:  y
D = 0.085174, p-value = 0.24
alternative hypothesis: two-sided




RESET test for correct specification:

	RESET test

data:  model
RESET = 0.73607, df1 = 2, df2 = 131, p-value = 0.481




Breusch-Pagan test for homogeneity:

	studentized Breusch-Pagan test

data:  model
BP = 10.118, df = 12, p-value = 0.6056




Durbin-Watson test for autocorrelation:

	Durbin-Watson test

data:  model
DW = 1.9336, p-value =

In [71]:
stargazer(model_panback_6, model_pandem_6, model_pandem_dis_6, type="text")


                                    Dependent variable:     
                               -----------------------------
                               panback   pandem   pandem_dis
                                 (1)       (2)       (3)    
------------------------------------------------------------
polyarchy                       0.069   -0.257*** -0.239*** 
                               (0.049)   (0.052)   (0.049)  
                                                            
education                       -0.003    0.001     0.001   
                               (0.006)   (0.006)   (0.006)  
                                                            
ethnic_frac                     0.030    -0.012     0.009   
                               (0.057)   (0.060)   (0.056)  
                                                            
ling_frac                       0.035     0.036     0.007   
                               (0.047)   (0.050)   (0.047)  
                       

In [74]:
stargazer(model_panback_1, model_pandem_1, model_pandem_dis_1, 
          model_panback_2, model_pandem_2, model_pandem_dis_2,
          model_panback_3, model_pandem_3, model_pandem_dis_3,
          type="html", out=paste0(path_write,"/Table_1_regression.html"))


<table style="text-align:center"><tr><td colspan="10" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="9"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="9" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td>panback</td><td>pandem</td><td>pandem_dis</td><td>panback</td><td>pandem</td><td>pandem_dis</td><td>panback</td><td>pandem</td><td>pandem_dis</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td><td>(7)</td><td>(8)</td><td>(9)</td></tr>
<tr><td colspan="10" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">rule</td><td>0.067</td><td>0.033</td><td>0.027</td><td>0.119<sup>**</sup></td><td>-0.105<sup>*</sup></td><td>-0.097<sup>*</sup></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td>(0.068)</td><td>(0.077)</td><td>(0.073)</td><td>(0.050)</td><td>(0.058)</

In [75]:
stargazer(model_panback_4, model_pandem_4, model_pandem_dis_4, 
          model_panback_5, model_pandem_5, model_pandem_dis_5,
          model_panback_6, model_pandem_6, model_pandem_dis_6,
          type="html", out=paste0(path_write,"/Table_2_regression.html"))


<table style="text-align:center"><tr><td colspan="10" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="9"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="9" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td>panback</td><td>pandem</td><td>pandem_dis</td><td>panback</td><td>pandem</td><td>pandem_dis</td><td>panback</td><td>pandem</td><td>pandem_dis</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td><td>(7)</td><td>(8)</td><td>(9)</td></tr>
<tr><td colspan="10" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">rule</td><td>0.019</td><td>0.020</td><td>-0.008</td><td>0.053</td><td>-0.148<sup>***</sup></td><td>-0.152<sup>***</sup></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left"></td><td>(0.061)</td><td>(0.065)</td><td>(0.061)</td><td>(0.045)</td><td>(0.050)</td><td>(