generated from opensafely/sro-template
/
calculate_tpp_coverage.R
120 lines (95 loc) · 4.42 KB
/
calculate_tpp_coverage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
################################################################################
# Description: Script to calculate percentage of TPP coverage in ONS population per NUTS1 Region
#
# input: /output/cohorts/input.csv.gz - pull NUTS1 region for all TPP-registered patients
# /output/data/age_ons_sex.csv.gz - total population ONS estimates per NUTS1 region
#
# output: /output/plots/tpp_coverage_map.svg
# /output/tables/tpp_pop_all.csv
#
# Author: Colm D Andrews
# Date: 26/11/2021
#
################################################################################
time_total <- Sys.time()
################################################################################
library(tidyverse)
library(lubridate)
library(ggplot2)
library(sf)
library(data.table)
library(dtplyr)
fs::dir_create(here::here("output", "plots"))
fs::dir_create(here::here("output", "tables"))
theme_set(theme_minimal())
options(datatable.old.fread.datetime.character = TRUE)
# ---------------------------------------------------------------------------- #
#----------------------#
# LOAD/CLEAN DATA #
#----------------------#
## TPP-registered patient records (from study definition)
## Include ALL patients with non-missing region in calculation of TPP populations
input <- read_csv(here::here("output", "cohorts","input.csv.gz")) %>%
# Remove individuals w missing region
filter(!is.na(region))
## National NUTS1 population estimates (ONS mid-2020):
nuts1_pop <- read_csv(here::here("data","age_ons_sex.csv.gz"),n_max=9) %>%
select(Region,Total) %>%
rename("region"="Region")
# ---------------------------------------------------------------------------- #
print("No. Regions in England:")
n_distinct(nuts1_pop$region)
print("No. TPP-registered patients with non-missing Regions:")
nrow(input)
print("No. unique Regions with patients registered in TPP:")
n_distinct(input$region)
# ---------------------------------------------------------------------------- #
#----------------------------------------------------#
# Aggregate by Region and merge with ONS population #
#----------------------------------------------------#
tpp_cov<-input %>%
# Count records per Nuts1
group_by(region) %>%
tally(name = "tpp_pop_all") %>%
ungroup() %>%
right_join(nuts1_pop,by="region") %>%
mutate(nuts118cd = case_when(region=="East"~"UKH",
region=="North West"~"UKD",
region=="North East"~"UKC",
region=="Yorkshire and The Humber"~"UKE",
region=="East Midlands"~"UKF",
region=="West Midlands"~"UKG",
region=="London"~"UKI",
region=="South East"~"UKJ",
region=="South West"~"UKK"),
region = as.factor(region),
tpp_pop_all=round(tpp_pop_all/5)*5,
Total=round(Total/5)*5,
tpp_cov_all = tpp_pop_all*100/Total)
summary(tpp_cov)
# ---------------------------------------------------------------------------- #
#------------------------------------------#
# Save #
#------------------------------------------#
write_csv(tpp_cov, here::here("output", "tables","tpp_pop_all.csv"))
################################################################################
#----------------------#
# FIGURES #
#----------------------#
## Load shapefiles
nuts_shp<-st_read("data/NUTS_Level_1_(January_2018)_Boundaries.shp")
saveRDS(nuts_shp,here::here("data", "nuts_shp.rds"))
nuts_shp<-readRDS(here::here("data", "nuts_shp.rds"))
coverage_plot<-nuts_shp %>%
filter(nuts118nm!="Wales" & nuts118nm!="Northern Ireland" & nuts118nm!="Scotland") %>%
left_join(tpp_cov,by="nuts118cd") %>%
ggplot(aes(geometry = geometry,fill=tpp_cov_all)) +
geom_sf(lwd = 0, colour='grey') +
scale_fill_gradient2(midpoint = median(tpp_cov$tpp_cov_all), high = "navyblue",
mid = "indianred", low = "ivory1",na.value = "white") +
theme(legend.position = c(0.2,0.5),legend.text.align = 1,
panel.background=element_rect(fill="lightblue")) +
ggtitle("TPP population coverage per NUTS 1 Region") +
guides(fill=guide_legend(title="TPP population\ncoverage (%)")) +
xlab("Longitude") + ylab("Latitude")
ggsave(filename=here::here("output", "plots","tpp_coverage_map.svg"),coverage_plot,width = 20,height = 20, units = "cm")