generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculate_tpp_coverage.R
82 lines (64 loc) · 2.35 KB
/
calculate_tpp_coverage.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
################################################################################
# Description: Script to calculate TPP coverage per MSOA according to household
# size of TPP-registered patients and ONS population estimates per MSOA
#
# input:
#
# Author: Emily S Nightingale
# Date: 13/10/2020
#
################################################################################
time_total <- Sys.time()
################################################################################
library(tidyverse)
library(data.table)
library(dtplyr)
# write("Calculating TPP coverage",file="coverage_log.txt")
sink("./coverage_log.txt", type = "output")
# shp <- sf::st_read(dsn = "./data/Middle_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BGC",
# layer = "Middle_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BGC")
# ---------------------------------------------------------------------------- #
#----------------------#
# LOAD DATA #
#----------------------#
# * input_coverage.csv
# - household ID, size and MSOA for all TPP-registered patients
# * msoa_pop.csv
# - total population estimates per MSOA
# - population estimates by single year age
# args <- c("./output/input_coverage.csv","./data/msoa_pop.csv")
args = commandArgs(trailingOnly=TRUE)
input <- fread(args[1], data.table = FALSE, na.strings = "") %>%
mutate(msoa = as.factor(msoa))
msoa_pop <- fread(args[2], data.table = FALSE, na.strings = "") %>%
rename(msoa = `Area Codes`,
msoa_pop = `All Ages`) %>%
rowwise() %>%
mutate(`70+` = sum(`70`:`90+`)) %>%
dplyr::select(msoa, msoa_pop, `70+`)
# Sum household sizes across all unique household IDs
summary(input)
input %>%
group_by(msoa) %>%
count(name = "tpp_pop") %>%
full_join(msoa_pop) %>%
mutate(tpp_cov = tpp_pop*100/msoa_pop) -> tpp_cov
summary(tpp_cov)
png("./total_vs_tpp_pop.png", height = 800, width = 800)
tpp_cov %>%
ggplot(aes(msoa_pop - tpp_pop)) +
geom_histogram(bins = 30, fill = "steelblue") +
theme_minimal()
dev.off()
# shp %>%
# full_join(shp, by = c("MSOA11CD" = "msoa")) %>%
# ggplot() +
# geom_sf(aes(geometry = geometry, fill = tpp_cov)) +
# theme_bw() -> map_cov
#
# pdf("./map_coverage_msoa.pdf", height = 10, width = 10)
# map_cov
# dev.off()
saveRDS(tpp_cov, file = "./tpp_msoa_coverage.rds")
write.csv(tpp_cov, "./tpp_msoa_coverage.csv", row.names = FALSE)
sink()