generated from opensafely/research-template
/
calculate_tpp_coverage.R
127 lines (100 loc) · 3.85 KB
/
calculate_tpp_coverage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
################################################################################
# Description: Script to calculate TPP coverage per MSOA according to household
# size of TPP-registered patients and ONS population estimates per MSOA
#
# input:
#
# Author: Emily S Nightingale
# Date: 13/10/2020
#
################################################################################
time_total <- Sys.time()
################################################################################
library(tidyverse)
library(data.table)
library(dtplyr)
# write("Calculating TPP coverage",file="coverage_log.txt")
sink("./coverage_log.txt", type = "output")
options(datatable.old.fread.datetime.character=TRUE)
# ---------------------------------------------------------------------------- #
#----------------------#
# LOAD/CLEAN DATA #
#----------------------#
# * input.csv
# - pull household ID, size and MSOA for all TPP-registered patients
# * msoa_pop.csv
# - total population estimates per MSOA
# - population estimates by single year age
#
# args <- c("./input.csv","./data/SAPE22DT15_mid_2019_msoa.csv")
args = commandArgs(trailingOnly=TRUE)
## National MSOA population estimates (ONS mid-2019):
msoa_pop <- fread(args[2], data.table = FALSE, na.strings = "") %>%
mutate(msoa = as.factor(`MSOA Code`),
msoa_pop = `All Ages`) %>%
# Filter to England
filter(grepl("E", msoa)) %>%
# Sum older populations
rowwise() %>%
mutate(`70+` = sum(`70-74`:`90+`)) %>%
dplyr::select(msoa, msoa_pop, `70+`) %>%
ungroup()
print("No. MSOAs in England:")
n_distinct(msoa_pop$msoa)
## TPP-registered patient records (from study definition)
## Include ALL patients with non-missing MSOA in calculation of TPP populations
input <- fread(args[1], data.table = FALSE, na.strings = "") %>%
# Remove individuals w missing/non-England MSOA
filter(grepl("E",msoa) & !is.na(msoa))
print("No. TPP-registered patients with non-missing MSOA:")
nrow(input)
print("No. unique MSOAs with patients registered in TPP:")
n_distinct(input$msoa)
print("No. TPP-registered patients with non-missing MSOA and non-missing HHID:")
nrow(input[input$household_id > 0,])
print("No. unique MSOAs with patients registered in TPP and non-missing HHID:")
n_distinct(input$msoa[input$household_id > 0])
print("Count probable cases with/without HHID:")
input %>%
group_by((household_id > 0)) %>%
summarise(n_probable = sum(!is.na(primary_care_case_probable)))
# ---------------------------------------------------------------------------- #
#------------------------------------------#
# Aggregate by MSOA and merge with pops #
#------------------------------------------#
input %>%
# Count records per MSOA
group_by(msoa) %>%
tally(name = "tpp_pop_all") %>%
ungroup() -> tpp_pop_all
input %>%
filter(household_id > 0) %>%
# Count records per MSOA
group_by(msoa) %>%
tally(name = "tpp_pop_wHHID") %>%
ungroup() -> tpp_pop_wHHID
tpp_pop_all %>%
full_join(tpp_pop_wHHID) %>%
# Merge MSOAs in OS with total population estimates
left_join(msoa_pop) %>%
mutate(msoa = as.factor(msoa),
tpp_cov_all = tpp_pop_all*100/msoa_pop,
tpp_cov_wHHID = tpp_pop_wHHID*100/msoa_pop,
cov_gt_100 = as.factor(ifelse(tpp_cov_all > 100, "Yes", "No"))) -> tpp_cov
summary(tpp_cov)
over100 <- filter(tpp_cov, cov_gt_100 == "Yes")
write.csv(over100, "./msoa_gt_100_cov.csv", row.names = FALSE)
png("./total_vs_tpp_pop.png", height = 800, width = 800)
tpp_cov %>%
pivot_longer(c("tpp_cov_all","tpp_cov_wHHID")) %>%
ggplot(aes(value)) +
geom_histogram(bins = 30, fill = "steelblue") +
facet_wrap(~name) +
labs(x = "TPP coverage per MSOA") +
theme_minimal()
dev.off()
saveRDS(tpp_cov, file = "./tpp_msoa_coverage.rds")
write.csv(tpp_cov, "./tpp_msoa_coverage.csv", row.names = FALSE)
tpp_msoas <- unique(input$msoa)
write.csv(tpp_msoas, "./msoas_in_tpp.csv", row.names = FALSE)
sink()