In [7]:
# install stuff, set scientific variables to 999 to avoid scientific notation
if(!require("pacman")) install.packages("pacman")
pacman::p_load(WDI, tidyr, car, dplyr, knitr, broom)
options(scipen = 999)

Loading required package: pacman

Installing package into ‘/home/codespace/R/x86_64-pc-linux-gnu-library/3.6’
(as ‘lib’ is unspecified)

“dependency ‘pbkrtest’ is not available”
“installation of package ‘car’ had non-zero exit status”
“”
“there is no package called ‘car’”
“Failed to install/load:
car”


In [2]:
# get all of the WDI indicators if indicator file is not already downloaded
indicators <- if (file.exists("indicators.csv")) read.csv("indicators.csv") else WDI(indicator = c("SL.EMP.TOTL.SP.ZS", "SE.XPD.TOTL.GD.ZS", "NY.GDP.PCAP.CD", "SM.POP.NETM", "SE.TER.CUAT.BA.ZS"), country = "all") %>% write.csv("indicators.csv")
# rename the columns
wdi_indicators <- rename(indicators, c("Country Code" = "iso3c", "Total Employment" = "SL.EMP.TOTL.SP.ZS", "Education Expenditure % of GDP" = "SE.XPD.TOTL.GD.ZS", "GDP per capita" = "NY.GDP.PCAP.CD", "Net Migration" = "SM.POP.NETM", "PCT Tertiary Education" = "SE.TER.CUAT.BA.ZS"))

In [3]:
gii <- read.csv("./gii_analysis/gii_2013_2020.csv")
# filter out the rows where the indicator is "Global Innovation Index" and the subindicator type is "Score"
gii_score <- gii %>% filter(Indicator == "Global Innovation Index", Subindicator.Type == "Score (0-100)")
gii_score <- gii_score %>%
    gather(year, value, X2013:X2020) %>%
    select(-Indicator, -Indicator.Id, -Country.Name) %>%
    spread(Subindicator.Type, value)
gii_score <- rename(gii_score, c("Score" = "Score (0-100)"))

# filter out the rows where the indicator is "Global Innovation Index" and the subindicator type is "Rank"
gii_rank <- gii %>% filter(Indicator == "Global Innovation Index", Subindicator.Type == "Rank")
gii_rank <- gii_rank %>%
    gather(year, value, X2013:X2020) %>%
    select(-Indicator, -Country.Name, -Indicator.Id) %>%
    spread(Subindicator.Type, value)

# merge into one dataframe
gii_rank_score <- merge(gii_score, gii_rank, by = c("Country.ISO3", "year"))
head(gii_rank_score)
# remove the X from the year column and convert to integer
gii_rank_score$year <- as.integer(gsub("X", "", gii_rank_score$year))


Unnamed: 0_level_0,Country.ISO3,year,Score,Rank
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<dbl>
1,AGO,X2013,23.5,135.0
2,AGO,X2014,23.8,135.0
3,AGO,X2015,26.2,120.0
4,AGO,X2016,,
5,AGO,X2017,,
6,AGO,X2018,,


In [5]:
# merge gii_rank_score with wdi_indicators
gii_wdi <- gii_rank_score %>% right_join(wdi_indicators, by = c("Country.ISO3" = "Country Code", "year" = "year"))
# rename the columns
gii_wdi <- gii_wdi %>%
    rename(c("CountryCode" = "Country.ISO3", "Year" = "year", "Edu" = "PCT Tertiary Education", "Mig" = "Net Migration", "EduExp" = "Education Expenditure % of GDP", "TotEmp" = "Total Employment")) %>%
    select("Year", "CountryCode", "Score", "Rank", "Edu", "Mig", "EduExp", "TotEmp")

# write out gii_wdi to csv if it doesn't already exist
if (!file.exists("./gii_analysis/gii_wdi.csv")) write.csv(gii_wdi, "./gii_analysis/gii_wdi.csv")

# lag everything.
gii_wdi <- gii_wdi %>%
    group_by(CountryCode) %>%
    mutate(lag.Mig01 = lag(Mig, n = 1, default = NA)) %>%
    mutate(lag.Mig05 = lag(Mig, n = 5, default = NA)) %>%
    mutate(lag.Mig10 = lag(Mig, n = 10, default = NA)) %>%
    mutate(lag.Mig20 = lag(Mig, n = 20, default = NA)) %>%
    mutate(lag.Edu01 = lag(Edu, n = 1, default = NA)) %>%
    mutate(lag.Edu05 = lag(Edu, n = 5, default = NA)) %>%
    mutate(lag.Edu10 = lag(Edu, n = 10, default = NA)) %>%
    mutate(lag.Edu20 = lag(Edu, n = 20, default = NA)) %>%
    mutate(lag.EduExp01 = lag(EduExp, n = 1, default = NA)) %>%
    mutate(lag.EduExp05 = lag(EduExp, n = 5, default = NA)) %>%
    mutate(lag.EduExp10 = lag(EduExp, n = 10, default = NA)) %>%
    mutate(lag.EduExp20 = lag(EduExp, n = 20, default = NA)) %>%
    mutate(lag.TotEmp01 = lag(TotEmp, n = 1, default = NA)) %>%
    mutate(lag.TotEmp05 = lag(TotEmp, n = 5, default = NA)) %>%
    mutate(lag.TotEmp10 = lag(TotEmp, n = 10, default = NA)) %>%
    mutate(lag.TotEmp20 = lag(TotEmp, n = 20, default = NA))

In [6]:
lag1 <- lm(Score ~ lag.Mig01 + lag.Edu01 + lag.TotEmp01 + lag.EduExp01, data = gii_wdi)
nobs(lag1)
summary(lag1)


Call:
lm(formula = Score ~ lag.Mig01 + lag.Edu01 + lag.TotEmp01 + lag.EduExp01, 
    data = gii_wdi)

Residuals:
     Min       1Q   Median       3Q      Max 
-28.4685  -4.7542  -0.3441   4.8511  16.7390 

Coefficients:
                 Estimate   Std. Error t value             Pr(>|t|)    
(Intercept)  16.846049515  3.949533597   4.265      0.0000276039206 ***
lag.Mig01     0.000010150  0.000002526   4.018      0.0000761950429 ***
lag.Edu01     0.750243273  0.056476255  13.284 < 0.0000000000000002 ***
lag.TotEmp01 -0.023602891  0.057828929  -0.408                0.683    
lag.EduExp01  2.240464484  0.331121699   6.766      0.0000000000813 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.787 on 271 degrees of freedom
  (16216 observations deleted due to missingness)
Multiple R-squared:  0.5718,	Adjusted R-squared:  0.5655 
F-statistic: 90.46 on 4 and 271 DF,  p-value: < 0.00000000000000022


In [7]:
lag05 <- lm(Score ~ lag.Mig05 + lag.Edu05 + lag.TotEmp05 + lag.EduExp05, data = gii_wdi)
nobs(lag05)
summary(lag05)


Call:
lm(formula = Score ~ lag.Mig05 + lag.Edu05 + lag.TotEmp05 + lag.EduExp05, 
    data = gii_wdi)

Residuals:
     Min       1Q   Median       3Q      Max 
-28.1564  -4.9021  -0.7819   3.9213  16.1670 

Coefficients:
                 Estimate   Std. Error t value             Pr(>|t|)    
(Intercept)  17.845972056  5.495314976   3.247             0.001533 ** 
lag.Mig05     0.000010200  0.000004208   2.424             0.016933 *  
lag.Edu05     0.863009090  0.088769062   9.722 < 0.0000000000000002 ***
lag.TotEmp05 -0.061208720  0.084041610  -0.728             0.467928    
lag.EduExp05  1.959979844  0.506086007   3.873             0.000181 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.576 on 113 degrees of freedom
  (16374 observations deleted due to missingness)
Multiple R-squared:  0.6173,	Adjusted R-squared:  0.6037 
F-statistic: 45.57 on 4 and 113 DF,  p-value: < 0.00000000000000022


In [8]:
lag10 <- lm(Score ~ lag.Mig10 + lag.Edu10 + lag.TotEmp10 + lag.EduExp10, data = gii_wdi)
nobs(lag10)
summary(lag10)

ERROR: Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases


In [9]:
lag20 <- lm(Score ~ lag.Mig20 + lag.Edu20 + lag.TotEmp20 + lag.EduExp20, data = gii_wdi)
nobs(lag20)
summary(lag20)

ERROR: Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases


In [11]:
model4 <- lm(Score ~ Mig + Edu + Mig:Edu, data = gii_wdi)
nobs(model4)
summary(model4)


Call:
lm(formula = Score ~ Mig + Edu + Mig:Edu, data = gii_wdi)

Residuals:
     Min       1Q   Median       3Q      Max 
-23.4586  -5.3258  -0.8875   5.4187  19.0858 

Coefficients:
                 Estimate    Std. Error t value            Pr(>|t|)    
(Intercept) 25.4191850458  1.0844105193  23.441 <0.0000000000000002 ***
Mig          0.0000196087  0.0000076642   2.558               0.011 *  
Edu          0.7643891871  0.0516739137  14.793 <0.0000000000000002 ***
Mig:Edu     -0.0000002212  0.0000002634  -0.840               0.402    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.318 on 322 degrees of freedom
  (16166 observations deleted due to missingness)
Multiple R-squared:  0.4893,	Adjusted R-squared:  0.4845 
F-statistic: 102.8 on 3 and 322 DF,  p-value: < 0.00000000000000022


In [13]:
model5 <- lm(Score ~ Mig + EduExp + Mig:EduExp, data = gii_wdi)
nobs(model5)
summary(model5)


Call:
lm(formula = Score ~ Mig + EduExp + Mig:EduExp, data = gii_wdi)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.808  -8.308  -1.933   7.548  30.754 

Coefficients:
                Estimate   Std. Error t value             Pr(>|t|)    
(Intercept) 28.387074178  1.108628924  25.606 < 0.0000000000000002 ***
Mig         -0.000007655  0.000004502  -1.700               0.0894 .  
EduExp       1.743836346  0.227923417   7.651   0.0000000000000493 ***
Mig:EduExp   0.000005487  0.000001115   4.922   0.0000010103192624 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 10.86 on 941 degrees of freedom
  (15547 observations deleted due to missingness)
Multiple R-squared:  0.1536,	Adjusted R-squared:  0.1509 
F-statistic: 56.93 on 3 and 941 DF,  p-value: < 0.00000000000000022


In [6]:
car::vif(model5)

ERROR: Error in loadNamespace(name): there is no package called ‘car’
