In [1]:
# install stuff, set scientific variables to 999 to avoid scientific notation
if(!require("pacman")) install.packages("pacman")
pacman::p_load(WDI, tidyr, dplyr, knitr, broom)
options(scipen = 999)

Loading required package: pacman



In [2]:
# get all of the WDI indicators if indicator file is not already downloaded
indicators <- if (file.exists("indicators.csv")) read.csv("indicators.csv") else WDI(indicator = c("SL.EMP.TOTL.SP.ZS", "SE.XPD.TOTL.GD.ZS", "NY.GDP.PCAP.CD", "SM.POP.NETM", "SE.TER.CUAT.BA.ZS"), country = "all") %>% write.csv("indicators.csv")
# rename the columns
wdi_indicators <- rename(indicators, c("Country Code" = "iso3c", "Total Employment" = "SL.EMP.TOTL.SP.ZS", "Education Expenditure % of GDP" = "SE.XPD.TOTL.GD.ZS", "GDP per capita" = "NY.GDP.PCAP.CD", "Net Migration" = "SM.POP.NETM", "PCT Tertiary Education" = "SE.TER.CUAT.BA.ZS"))

In [3]:
gii <- read.csv("./gii_analysis/gii_2013_2020.csv")
# filter out the rows where the indicator is "Global Innovation Index" and the subindicator type is "Score"
gii_score <- gii %>% filter(Indicator == "Global Innovation Index", Subindicator.Type == "Score (0-100)")
gii_score <- gii_score %>%
    gather(year, value, X2013:X2020) %>%
    select(-Indicator, -Indicator.Id, -Country.Name) %>%
    spread(Subindicator.Type, value)
gii_score <- rename(gii_score, c("Score" = "Score (0-100)"))

# filter out the rows where the indicator is "Global Innovation Index" and the subindicator type is "Rank"
gii_rank <- gii %>% filter(Indicator == "Global Innovation Index", Subindicator.Type == "Rank")
gii_rank <- gii_rank %>%
    gather(year, value, X2013:X2020) %>%
    select(-Indicator, -Country.Name, -Indicator.Id) %>%
    spread(Subindicator.Type, value)

# merge into one dataframe
gii_rank_score <- merge(gii_score, gii_rank, by = c("Country.ISO3", "year"))
head(gii_rank_score)
# remove the X from the year column and convert to integer
gii_rank_score$year <- as.integer(gsub("X", "", gii_rank_score$year))


Unnamed: 0_level_0,Country.ISO3,year,Score,Rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>
1,AGO,X2013,23.5,135.0
2,AGO,X2014,23.8,135.0
3,AGO,X2015,26.2,120.0
4,AGO,X2016,,
5,AGO,X2017,,
6,AGO,X2018,,


In [14]:
# merge gii_rank_score with wdi_indicators
gii_wdi <- gii_rank_score %>% right_join(wdi_indicators, by = c("Country.ISO3" = "Country Code", "year" = "year"), all.y = TRUE)
# rename the columns
gii_wdi <- gii_wdi %>%
    rename(c("CountryCode" = "Country.ISO3", "Year" = "year", "Edu" = "PCT Tertiary Education", "Mig" = "Net Migration", "EduExp" = "Education Expenditure % of GDP", "TotEmp" = "Total Employment")) %>%
    select("Year", "CountryCode", "Score", "Rank", "Edu", "Mig", "EduExp", "TotEmp")

# write out gii_wdi to csv if it doesn't already exist
if (!file.exists("./gii_analysis/gii_wdi.csv")) write.csv(gii_wdi, "./gii_analysis/gii_wdi.csv")

# lag everything.
gii_wdi <- gii_wdi %>%
    group_by(CountryCode) %>%
    mutate(lag.Mig01 = lag(Mig, n = 1, default = NA)) %>%
    mutate(lag.Mig05 = lag(Mig, n = 5, default = NA)) %>%
    mutate(lag.Mig10 = lag(Mig, n = 10, default = NA)) %>%
    mutate(lag.Mig20 = lag(Mig, n = 20, default = NA)) %>%
    mutate(lag.Edu01 = lag(Edu, n = 1, default = NA)) %>%
    mutate(lag.Edu05 = lag(Edu, n = 5, default = NA)) %>%
    mutate(lag.Edu10 = lag(Edu, n = 10, default = NA)) %>%
    mutate(lag.Edu20 = lag(Edu, n = 20, default = NA)) %>%
    mutate(lag.EduExp01 = lag(EduExp, n = 1, default = NA)) %>%
    mutate(lag.EduExp05 = lag(EduExp, n = 5, default = NA)) %>%
    mutate(lag.EduExp10 = lag(EduExp, n = 10, default = NA)) %>%
    mutate(lag.EduExp20 = lag(EduExp, n = 20, default = NA)) %>%
    mutate(lag.TotEmp01 = lag(TotEmp, n = 1, default = NA)) %>%
    mutate(lag.TotEmp05 = lag(TotEmp, n = 5, default = NA)) %>%
    mutate(lag.TotEmp10 = lag(TotEmp, n = 10, default = NA)) %>%
    mutate(lag.TotEmp20 = lag(TotEmp, n = 20, default = NA))

In [10]:
lag1 <- lm(Score ~ lag.Mig01 + lag.Edu01 + lag.TotEmp01 + lag.EduExp01, data = gii_wdi)
nobs(lag1)
summary(lag1)


Call:
lm(formula = Score ~ lag.Mig01 + lag.Edu01 + lag.TotEmp01 + lag.EduExp01, 
    data = gii_wdi)

Residuals:
     Min       1Q   Median       3Q      Max 
-28.5547  -4.8131  -0.3217   4.8295  16.7270 

Coefficients:
                 Estimate   Std. Error t value             Pr(>|t|)    
(Intercept)  16.447633797  3.876665857   4.243      0.0000303437851 ***
lag.Mig01     0.000010125  0.000002526   4.008      0.0000792820792 ***
lag.Edu01     0.749106223  0.056630047  13.228 < 0.0000000000000002 ***
lag.TotEmp01 -0.016558744  0.054696231  -0.303                0.762    
lag.EduExp01  2.244549820  0.331449533   6.772      0.0000000000787 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.788 on 271 degrees of freedom
  (16216 observations deleted due to missingness)
Multiple R-squared:  0.5717,	Adjusted R-squared:  0.5653 
F-statistic: 90.42 on 4 and 271 DF,  p-value: < 0.00000000000000022


In [11]:
lag05 <- lm(Score ~ lag.Mig05 + lag.Edu05 + lag.TotEmp05 + lag.EduExp05, data = gii_wdi)
nobs(lag05)
summary(lag05)


Call:
lm(formula = Score ~ lag.Mig05 + lag.Edu05 + lag.TotEmp05 + lag.EduExp05, 
    data = gii_wdi)

Residuals:
     Min       1Q   Median       3Q      Max 
-28.3616  -4.6885  -0.8597   3.8622  15.8738 

Coefficients:
                 Estimate   Std. Error t value             Pr(>|t|)    
(Intercept)  16.376798851  5.413838107   3.025             0.003079 ** 
lag.Mig05     0.000010133  0.000004214   2.405             0.017800 *  
lag.Edu05     0.864284832  0.089511167   9.656 < 0.0000000000000002 ***
lag.TotEmp05 -0.034747696  0.080111199  -0.434             0.665302    
lag.EduExp05  1.951629228  0.506653499   3.852             0.000195 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.587 on 113 degrees of freedom
  (16374 observations deleted due to missingness)
Multiple R-squared:  0.6161,	Adjusted R-squared:  0.6025 
F-statistic: 45.34 on 4 and 113 DF,  p-value: < 0.00000000000000022


In [12]:
lag10 <- lm(Score ~ lag.Mig10 + lag.Edu10 + lag.TotEmp10 + lag.EduExp10, data = gii_wdi)
nobs(lag10)
summary(lag10)

ERROR: Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases


In [13]:
lag20 <- lm(Score ~ lag.Mig20 + lag.Edu20 + lag.TotEmp20 + lag.EduExp20, data = gii_wdi)
nobs(lag20)
summary(lag20)

ERROR: Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases
