# Survival Analysis in International Relations
## Replication Code for Figures in Chapter

We need the following packages to run this notebook:
```R
install.packages(c("haven", "stargazer", "survival","ggplot2", "ggrepel", "flexsurv", 'eha', "survminer"))
```

In [None]:
install.packages(c("coxed", "easypackages"))

In [None]:
easypackages::libraries("haven", "stargazer", "survival","ggplot2", "eha", "coxed", "dplyr", "flexsurv", "survminer")
options(scipen = 999)

#### Figure 2: Duration of months until civil conflict ends, Findley and Young (2015)

In [None]:
duration <- haven::read_dta('replication-data/duration_main.dta')
duration <- duration[!is.na(duration$warmonths),]
duration <- as.data.frame(duration)

a = ggplot(duration, aes(warmonths))
a + geom_histogram(binwidth=15, colour="black", fill="white", aes(y = ..density..)) + 
  geom_density() + labs(title = 'Duration of months until civil conflict ends, Findley and Young (2015)', 
     x ="Months until civil war ends", y= "") + xlim(c(0, 800))

#### Figure 4 - Weibull Model: Civil War Hazard Expectations 

The plots are produced using two pakcages, `eha` and `fluxsurv` to better illustrate the differences and similarities between the two packages. 

In [None]:
duration <- haven::read_dta('replication-data/duration_main_est.dta')
duration <- duration[duration$`_st` == 1,]

duration$start_date <- duration$`_t0`
duration$end_date <- duration$`_t`

duration <- as.data.frame(duration)

In [None]:
flex_weibull <- flexsurv::flexsurvreg(Surv(start_date, end_date, warend) ~ 
                                      lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+guarantee, 
                                      data = duration, dist = 'weibull')

We create two new dataframes at different values of security guarantee and terrorism to be used in plotting the model

In [None]:
duration_m <- duration %>% 
select(
    logpop, 
    elf, 
    uppsalaMaxed,
    lngdp,
    logbattledeaths, 
    mountains) %>% 
summarize_all(.,mean, na.rm = TRUE)
duration_g <- cbind(guarantee = c(0, 0),rbind(duration_m, duration_m))
duration_g <- cbind(lagLogTotalWarRelated = c(0.8247647, 1.9610567),rbind(duration_g, duration_g)) 

duration_g1 <- cbind(guarantee = c(1, 1),rbind(duration_m, duration_m))
duration_g1 <- cbind(lagLogTotalWarRelated = c(0.8247647, 1.9610567),rbind(duration_g1, duration_g1))       

To save the figures as pdf we use `pdf` function. For example: 

```R
pdf("model_output/Figure4a_flexsurv_weibull.pdf")
```

In [None]:
plot(NULL, xlim=c(0,600), ylim=c(0,0.06), ylab="Survival Rate", xlab="Analysis Time", 
     main='Weibull Expectation: No Security Guarantee', axes=F)
axis(side=1)
axis(side=2)
lines(flex_weibull, newdata = duration_g, type = "hazard", ci=FALSE, col = c('blue','red'))
legend("topright", legend = c("Mean Level of Terrorism", "One SD Increase"), col = c('blue','red'), lty=c(1,1))

In [None]:
plot(NULL, xlim=c(0,600), ylim=c(0,0.06), ylab="Survival Rate", xlab="Analysis Time", main='Weibull Expectation: Security Guarantee', axes=F)
axis(side=1)
axis(side=2)
lines(flex_weibull, newdata = duration_g1, type = "hazard", ci=FALSE, col = c('blue','red'))
legend("topright", legend = c("Mean Level of Terrorism", "One SD Increase"), col = c('blue','red'), lty=c(1,1))

In [None]:
plot(flex_weibull, type = "cumhaz", ci = FALSE, est = TRUE, main = 'Weibull Cumulative Hazard')
plot(flex_weibull, type = "survival", ci = FALSE, est = TRUE, main = 'Weibull Survival Rate')

Using aftreg function from `eha` package

In [None]:
weibulAFT = aftreg(Surv(start_date, end_date, warend) ~ lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+strata(guarantee), 
                          data = duration, dist = 'weibull')

In [None]:
duration_m <- duration %>% 
select(
    lagLogTotalWarRelated,
    logpop, 
    elf, 
    uppsalaMaxed,
    lngdp,
    logbattledeaths, 
    mountains) %>% 
summarize_all(.,mean, na.rm = TRUE)

In [None]:
plot(weibulAFT, fn ='sur', new.data=duration_m, xlab='Analysis Time', ylab='Survival Rate', main = 'Weibull Model: Civil War Hazard Expectations')

The author of the package recommends using `phreg` combined with `strata(variable)` for the purpose of plotting `weibull` at different values of a binary variable.

In [None]:
weibull_ph = phreg(Surv(start_date, end_date, warend) ~ lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+strata(guarantee), 
              dist= 'weibull', data = duration)

plot(weibull_ph, xlab='Analysis Time', ylab='Weibull Hazard', col= c('red', 'blue'), main = 'Civil War Hazard Expectations', printLegend=TRUE)

#### Figure 5: Log Linear Model: Civil War Hazard Expectations 

In [None]:
flex_normal = flexsurv::flexsurvreg(Surv(start_date, end_date, warend) ~ 
                                    lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+guarantee, 
                                    data = duration, dist = 'lognormal')

We create a new dataframe using two values of terrorism at mean and mea+1sd to be used for plotting the results. 

In [None]:
duration_m <- duration %>% 
select(
    logpop, 
    elf, 
    uppsalaMaxed,
    lngdp,
    logbattledeaths, 
    mountains, 
    guarantee) %>% 
summarize_all(.,mean, na.rm = TRUE)

duration.df <- cbind(lagLogTotalWarRelated = c(0.8247647, 1.9610567),rbind(duration_m, duration_m))
duration <- as.data.frame(duration)

In [None]:
plot(NULL, xlim=c(0,600), ylim=c(0,1), ylab="Survival Rate", xlab="Analysis Time", main='Survival Rate', axes=F)
axis(side=1)
axis(side=2)
lines(flex_normal, newdata = duration.df, type = "survival", ci=FALSE, col = c('blue','red'))
legend("topright", legend = c("Mean Level of Terrorism", "One SD Increase"), col = c('blue','red'), lty=c(1,1))

In [None]:
plot(NULL, xlim=c(0,600), ylim=c(0,0.01), ylab="Hazard Rate", xlab="Analysis Time", main='Hazard Rate', axes=F)
axis(side=1)
axis(side=2)
lines(flex_normal, newdata = duration.df, type = "hazard", ci=FALSE, col = c('blue','red'))
legend("topright", legend = c("Mean Level of Terrorism", "One SD Increase"), col = c('blue','red'), lty=c(1,1))

In [None]:
plot(flex_normal, type = "cumhaz", ci = FALSE, est = TRUE, main = 'Log-Normal Cumulative Hazard')
plot(flex_normal, type = "survival", ci = FALSE, est = TRUE, main = 'Log-Normal Survival Rate')

Now we fit and plot the model using `aftreg` function

In [None]:
eha_normal= aftreg(Surv(start_date, end_date, warend) ~ lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+strata(guarantee), 
              dist= 'lognormal', data = duration)

plot(eha_normal, new.data= duration.m)
plot(eha_normal, fn = c('haz', 'sur'), new.data= duration.m, main='Log Linear Model: Civil War Hazard Expectations')

#### Figure 6: Cox Model: Civil War Hazard Expectations 



We create newdata at the mean of variables, and append a variable at two levels of terrorism to it for creating Hazard and Survival Rate plots. 

In [None]:
duration.mean <- duration %>% select(
                                logpop,
                                elf,
                                uppsalaMaxed,
                                lngdp,
                                logbattledeaths,
                                mountains, 
    guarantee) %>%
        summarize_all(.,mean, na.rm = TRUE)
duration.df <- cbind(lagLogTotalWarRelated = c(0.8247647, 1.9610567),rbind(duration.mean,duration.mean))

In [None]:
cox_model <- coxreg(Surv(start_date, end_date, warend) ~ lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+
             mountains+guarantee,data = duration, method="breslow")

fit <- survfit(cox_model, newdata = duration.df, data = duration)

splots <- list()
splots[[2]] <- survminer::ggsurvplot(fit, conf.int = FALSE, 
                      ylab = "Survival Rate",
                      xlab = "Analysis Time",
                      linetype = "solid",
                      legend.title = "", 
                      legend.labs=c("Mean Level of Terrorism", "One SD Increase"), 
                      legend = 'bottom',
                      palette = c("#E7B800", "#2E9FDF"),
                      ggtheme = theme_classic())

splots[[1]] <- survminer::ggsurvplot(fit, conf.int = FALSE, 
                      fun = 'cumhaz',
                      ylab = 'Hazard Rate',
                      xlab = 'Analysis Time',
                      linetype = "solid",
                      legend.title = "", 
                      legend.labs=c("Mean Level of Terrorism", "One SD Increase"), 
                      legend = 'bottom',
                      palette = c("#E7B800", "#2E9FDF"),
                      ggtheme = theme_classic())

cox_plots <- survminer::arrange_ggsurvplots(splots,
                               surv.plot.height = 2,
                               title = 'Cox Model: Civil War Hazard Expectations (Findley and Young 2015)',
                               ncol = 2, nrow = 1, ggsave = 'cox_plots.pdf')

Here we plot hazard rate using `coxed` package 

In [None]:
me <- coxed(coxmodel, method="npsf")

baseline <- tidyr::gather(ed1$baseline.functions, cbh, survivor, key="survivefunction", value="value")

ggplot(baseline, aes(x=time, y=value)) +
     geom_line() +
     xlab("Analysis Time") +
     ylab("Function") +
     facet_wrap( ~ survivefunction, scales = "free")

Forest plot for cox proportional hazards model using `survminer`

In [None]:
survminer::ggforest(
  coxmodel,
  data = duration,
  main = "Hazard ratio",
  cpositions = c(0.02, 0.22, 0.4),
  fontsize = 0.7,
  refLabel = "reference",
  noDigits = 2
)

#### Figure 7: Kaplan Meier survival curves and hazard estimates

For reproducing this plot, we utilize `ggsurvplot` function from `survminer` package.

In [None]:
duration.km <- survfit(Surv(start_date, end_date, warend) ~ 1, data = duration)

survminer::ggsurvplot(duration.km, legend = "right", legend.title = "", legend.labs = c("Kaplan-Meier"), 
                        , ggtheme = theme_grey(), conf.int = FALSE, linetype = c(1), palette = "darkblue", censor = FALSE) + 
ggtitle("Kaplan Meier survival estimates") + labs(x ="Analysis Time", y = "") 

In [None]:
mh <- muhaz::muhaz(duration$warmonths, duration$warend, bw.method="g",  kern="epanechnikov")
plot(mh, xlab="Analysis Time", main="Smoothed Hazard Rates")

In [None]:
fit <- survival::survfit(survival::Surv(start_date, end_date, warend) ~ guarantee, data = duration)
survminer::ggsurvplot(fit, xlab='Analysis Time', ylab = '', 
                      ggtheme = theme_grey(),
                      legend = "right", legend.title = "Legend", legend.labs = c('No Security Guarantee', 'Security Guarantee'), 
                      linetype = c('solid','solid'), palette = c("darkblue", 'brown'))

In [None]:
duration$mount <- as.factor(ifelse(duration$mountains > 23, 'mountainous', 'non-mountainous'))

fit <- survival::survfit(survival::Surv(start_date, end_date, warend) ~ mount, data = duration)
duration <- as.data.frame(duration)

survminer::ggsurvplot(fit, xlab='Analysis Time', ylab = '', group_by='mount', title ='Mountainous Terrain', 
                      ggtheme = theme_grey(),
                      legend = "right", legend.title = "Legend", legend.labs = c('Mountainous', 'Non-Mountainous'), 
                      linetype = c('solid','solid'), palette = c("darkblue", 'brown'))

#### Figure 8: Proportional hazards Test

In [None]:
formula <- as.formula(Surv(start_date, end_date, warend) ~ lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+guarantee, 
                  cluster(warnumber))
coxmodel <- coxph(formula, data = duration, method="breslow")

diag = cox.zph(coxmodel)

In [None]:
plot(diag[1], main = 'Terrorism', ylab = 'Scaled Schoenfeld Residuals', xlab ='War Months')
abline(h=0)
plot(diag[7], main = 'Mountainous Terrain', ylab = 'Scaled Schoenfeld Residuals', xlab ='War Months')
abline(h=0)

#### GAM Difference

In [None]:
duration = haven::read_dta('replication-data/duration_main_est.dta')
duration = duration[duration$`_st` == 1,]

duration$start_date <- duration$`_t0`
duration$end_date <- duration$`_t`

formula0 <- as.formula(Surv(warmonths, warend) ~ lagLogTotalWarRelated+logpop+elf+lngdp+uppsalaMaxed+logbattledeaths+mountains+guarantee, 
                  cluster(warnumber))
mv.cox <- coxph(formula0, data = duration, method="breslow")

me <- coxed(mv.cox, method = "gam", bootstrap = TRUE,
            newdata = dplyr::mutate(duration, guarantee=0, na.rm=TRUE),
            newdata2 = dplyr::mutate(duration, guarantee=1, na.rm=TRUE))

meplot <- rbind(me$mean.diff, me$mean1, me$mean2)
meplot$description <- c("Difference",
                        "Security Guarantee = 0",
                        "Security Guarantee = 1")

meplot$order <- 1:3
meplot$model <- factor(meplot$order, labels = meplot$description)
ggplot(meplot)+ geom_segment(aes(x = model, xend = model, y = lb, yend = ub))+
  geom_hline(aes(yintercept = 0), colour = "grey")+
  geom_point(aes(x = model, y = mean))+
  coord_flip()+ ylab(expression(paste(Delta, "Expected Duration of War (Months)")))+
  xlab("") + theme_classic()