# Integrated Exposure-Response function code

This notebook produces point estimates of premature deaths caused by ambient particulate matter.

As input, the notebook in its current state requires the files (documented elsewhere):

- pohjoismaat_0322.geojson
- pohjoismaat_0323.geojson
- estat_demo.pop.csv

The files will be used for population grid data which is combined with disease and pollution data. The population file should be changed per-year.

The disease data in particular, is just taken from Eurostat's publications. It should most likely also be updated per-year.

As output, the notebook in its current state produces the files:

- pohjoismaat_0322-DEATHS.geojson
- pohjoismaat_0323-DEATHS.geojson

This code is provided as-is and I do not take responsibility for its misuse. It should be scheduled for a major redesign before any product based on this method is ever used in practice.

In [130]:
library(sf)
library(dplyr)

In [156]:
## WARNING: EXTREMELY UNINTELLIGENT SOLUTION/HACK

# METHODOLOGIST'S NOTE 13.03.2025: 
# I am sorry for this, I have sworn to repent and better myself before 2027.
# Please do not think badly of me or my organization for this.
# At the time this code was written, two things were running: time, and me.
# Time was running out.
# I was running on fumes.

# global variables for shiny app
# horrible solution
param_draws <- read.csv("parameter_draws.csv")
RISK_FACTORS <- unique(param_draws$cause_code)
CAUSE_PARAMS <- list()

for (c in RISK_FACTORS) {

    data = param_draws[param_draws$cause_code == c, ]
    am <- mean(data$alpha)
    bm <- mean(data$beta)
    cm <- mean(data$gamma)
    CAUSE_PARAMS[[c]] = list(cause = c, a0 = am, b0 = bm, c0 = cm)
    
}


# mortality rates by cause, country, sex (per 100,000 persons)
# sources: 
# https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Respiratory_diseases_statistics
# https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Cardiovascular_diseases_statistics
# 
# note: lri = pneumonia + other alri
MORT_FI <- data.frame(
    risk_factor = c("cvd_ihd", "cvd_stroke", "resp_copd", "neo_lung", "lri"),
    male = c(216.7, 71.0, 30.4, 53.5, 30.5),
    female = c(89.4, 57.6, 13.5, 24.6, 13.0)
)

MORT_FI$tot <- (MORT_FI$male + MORT_FI$female)/2
#print(MORT_FI)

MORT_SWE <- data.frame(
    risk_factor = c("cvd_ihd", "cvd_stroke", "resp_copd", "neo_lung", "lri"),
    male = c(127.1, 53.2, 24.4, 33.1, 40.4),
    female = c(58.8, 42.7, 25.6, 32.8, 32.2)
)

MORT_SWE$tot <- (MORT_SWE$male + MORT_SWE$female)/2
#print(MORT_SWE)

# baseline avg. ambient pollution exposure ~5 micrograms / m^3
# this is the 
RR <- function(z, cause = "cvd_ihd", param_list = CAUSE_PARAMS, xcf = 5) {
  # x, xcf are in micrograms / m^3
  # ref: the USA EPA
  # https://www.epa.gov/sites/default/files/2020-07/documents/parametrizing_the_integrated_exposure_response_function.pdf
  
  alpha <- param_list[[cause]]$a0
  beta <- param_list[[cause]]$b0
  gamma <- param_list[[cause]]$c0
  
  #rr <- ifelse(x <= xcf, 1, 1 + alpha*(1 - exp(-beta*(x - xcf)^gamma)))
  if (z <= xcf) {
      rr <- 1
  }
  else {
     rr <- 1 + alpha*(1 - exp(-beta*(z - xcf)^gamma))
  }
  
  return(rr)
}

excess_deaths <- function(z, P, country="FI", type='tot', risk_factors=RISK_FACTORS) {
  # input x : avg micrograms / m^3
  # P : input population number
  # type : "tot", "male", or "female"
  #print(P[1])
  # Y = baseline mortality rate
   # print(type)
   # print(country)
  Y <- 0
  #B <- 17.7 # deprecated parameters
  #B_sd <- 3.7
    type <- case_when(
            type == "F:Female" ~ "female",
            type == "M:Male" ~ "male",
            TRUE ~ "tot"
        )

    tmp_df <- MORT_FI
    tmp_df[,2:4] <- (tmp_df[,2:4] + MORT_SWE[,2:4])/2
  risk_df <- case_when(
      country == "FI" ~ MORT_FI,
      country == "SE" ~ MORT_SWE,
      TRUE ~ tmp_df
  )
  
  for (f in risk_factors) {
    
    rr <- RR(z, f)
     # print(rr)
    Y0 <- risk_df[risk_df$risk_factor==f,type]
      
    Y <- c(Y, Y0*(rr-1))  # note 13.03: this seems suspicious. reread IRE papers.
  }
  #  print(Y)
  
  return(sum(Y)*P/100000) # this is not entirely true, because the disease risks actually correlate
}

In [168]:
# edit this to produce deaths for other months
data_map <- read_sf("./pohjoismaat_0323.geojson")

In [169]:
data_map2 <- st_transform(data_map, crs = '+proj=longlat +datum=WGS84')

In [170]:
#data_map2 <- data_map2 %>% st_drop_geometry(data_map2) # do NOT do this

In [171]:
pop <- read.csv("./estat_demo.pop.csv")
pop2 <- pop %>%
  filter(sex %in% c('M:Males','F:Females', 'T:Total'), age=='TOTAL:Total', !geo %in% c('FI:Finland', 'SE:Sweden') ) %>%
  #filter(sex %in% c( 'T:Total'), age=='TOTAL:Total', !geo %in% c('FI:Finland', 'SE:Sweden') ) %>%
  mutate(NUTS_ID = substring(geo, 1, 5))

In [172]:
tmp <- data_map2 %>%
  left_join(pop2, by="NUTS_ID")
#tmp$pm = tmp$so2 + tmp$dust + tmp$ectot + tmp$no + tmp$no2 + tmp$pm10 + tmp$sia # not all particles
# TODO: are these really the relevant PM2.5 molecules???
# there should be a correlation formula?
tmp$pm = tmp$ectot + tmp$so2 + tmp$no2 + tmp$sia

In [173]:
tmp$pm

In [174]:
tmp_res <- as.data.frame(tmp[,c("pm", "OBS_VALUE", "CNTR_CODE", "sex")])
tmp$deaths <- apply(tmp_res, 1, function(x) { return(excess_deaths(as.numeric(x["pm"]), as.numeric(x["OBS_VALUE"]), country=x["CNTR_CODE"], type=x["sex"])) })

In [175]:
head(tmp$deaths, 26)

In [176]:
# edit this to produce deaths for other months
st_write(tmp, dsn = "~/pohjoismaat_0323-DEATHS.geojson", layer = "pohjoismaat.geojson")

Writing layer `pohjoismaat.geojson' to data source 
  `/home/eouser/pohjoismaat_0323-DEATHS.geojson' using driver `GeoJSON'
Writing 120 features with 30 fields and geometry type Multi Polygon.
