generated from opensafely/sro-template
/
calculate_tpp_coverage.R
124 lines (87 loc) · 3.65 KB
/
calculate_tpp_coverage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
################################################################################
# Description: Script to calculate TPP coverage per MSOA and ONS population estimates per MSOA
#
# input:
#
# Author: Colm D Andrews
# Date: 08/10/2021
#
################################################################################
time_total <- Sys.time()
################################################################################
library(tidyverse)
library(lubridate)
library(ggplot2)
library(sf)
library(data.table)
library(dtplyr)
dir.create(here::here("output", "plots"), showWarnings = FALSE, recursive=TRUE)
dir.create(here::here("output", "tables"), showWarnings = FALSE, recursive=TRUE)
dir.create(here::here("output", "cohorts"), showWarnings = FALSE, recursive=TRUE)
theme_set(theme_minimal())
options(datatable.old.fread.datetime.character = TRUE)
# ---------------------------------------------------------------------------- #
#----------------------#
# LOAD/CLEAN DATA #
#----------------------#
# * input.csv
# - pull MSOA for all TPP-registered patients
# * msoa_pop.csv
# - total population estimates per MSOA
# - population estimates by single year age
#
args <- c("./output/cohorts/input.csv.gz","./data/sape23dt4mid2020msoa.csv","./data/msoa_shp.rds")
## TPP-registered patient records (from study definition)
## Include ALL patients with non-missing MSOA in calculation of TPP populations
input <- read_csv(args[1]) %>%
# Remove individuals w missing/non-England MSOA
filter(grepl("E",msoa) & !is.na(msoa)) %>%
mutate(`65+` =case_when(age>=65~1) )
## National MSOA population estimates (ONS mid-2020):
msoa_pop <- fread(args[2], data.table = FALSE, na.strings = "") %>%
mutate(msoa = as.factor(`MSOA Code`),
msoa_pop = parse_number(`All Ages`)) %>%
# Filter to England
filter(grepl("E", msoa)) %>%
ungroup() %>%
select(msoa, msoa_pop)
# ---------------------------------------------------------------------------- #
print("No. MSOAs in England:")
n_distinct(msoa_pop$msoa)
print("No. TPP-registered patients with non-missing MSOA:")
nrow(input)
print("No. unique MSOAs with patients registered in TPP:")
n_distinct(input$msoa)
# ---------------------------------------------------------------------------- #
#----------------------------------------------------#
# Aggregate by MSOA and merge with ONS population #
#----------------------------------------------------#
tpp_cov<-input %>%
# Count records per MSOA
group_by(msoa) %>%
tally(name = "tpp_pop_all") %>%
ungroup() %>%
right_join(msoa_pop,by="msoa") %>%
mutate(msoa = as.factor(msoa),
tpp_cov_all = tpp_pop_all*100/msoa_pop)
summary(tpp_cov)
# ---------------------------------------------------------------------------- #
#------------------------------------------#
# Save #
#------------------------------------------#
write_csv(tpp_cov, here::here("output", "tables","tpp_pop_all.csv.gz"))
################################################################################
## Load shapefiles
msoa_shp <- readRDS(args[3])
# ---------------------------------------------------------------------------- #
#----------------------#
# FIGURES #
#----------------------#
coverage_plot<-msoa_shp %>%
filter(grepl("E",MSOA11CD)) %>%
full_join(tpp_cov, by = c("MSOA11CD" = "msoa")) %>%
ggplot(aes(geometry = geometry, fill = tpp_cov_all)) +
geom_sf(lwd = 0, colour='grey') +
scale_fill_gradient2(midpoint = 100, high = "black", mid = "indianred", low = "white") +
theme(legend.position = c(0.2,0.9),panel.background=element_rect(fill="lightblue"))
ggsave(filename=here::here("output", "plots","tpp_coverage_map.svg"),coverage_plot)