generated from opensafely/research-template
/
data_check_figs.R
113 lines (92 loc) · 3.31 KB
/
data_check_figs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
################################################################################
# Description: Summary figures to sense-check cleaned data
#
# input: Cleaned input data from data_clean.R
#
# Author: Emily S Nightingale
# Date: 01/10/2020
#
################################################################################
################################################################################
#----------------------#
# SETUP ENVIRONMENT #
#----------------------#
library(tidyverse)
library(lubridate)
library(ggplot2)
library(sf)
theme_set(theme_minimal())
# ---------------------------------------------------------------------------- #
#----------------------#
# LOAD DATA #
#----------------------#
# * input_clean.csv
# - individual health records for identification of covid events
# args <- c("input_clean.rds", "data/msoa_shp.rds")
args = commandArgs(trailingOnly = TRUE)
input <- readRDS(args[1]) %>%
rename(tpp_cov = tpp_cov_wHHID)
## Load shapefiles
msoa_shp <- readRDS(args[2])
# ---------------------------------------------------------------------------- #
#----------------------#
# FIGURES #
#----------------------#
# Distribution of household size,
png("./hh_size_dist.png", height = 800, width = 1400)
input %>%
filter(household_size < quantile(household_size, 0.999, na.rm = T)) %>%
group_by(care_home_type, household_id) %>%
summarise(household_size = unique(household_size)) %>%
ggplot(aes(household_size)) +
geom_histogram(fill = "white", col = "black") +
facet_wrap(~care_home_type, scales = "free")
dev.off()
# Age distribution in carehomes/community
png("./age_dist.png", height = 800, width = 1400)
input %>%
mutate(group = case_when(ch_ge65 == T ~ "Care home",
ch_ge65 == F ~ "Community")) %>%
ggplot(aes(age)) +
geom_histogram(bins = 30, fill = "white", col = "black") +
geom_vline(aes(xintercept = median(age, na.rm = T)), col = "red") +
facet_wrap(~group, scales = "free")
dev.off()
# TPP coverage by MSOA
png("./tpp_coverage_msoa.png", height = 500, width = 600)
input %>%
group_by(msoa) %>%
summarise(tpp_cov = unique(tpp_cov)) %>%
ungroup() %>%
ggplot(aes(tpp_cov)) +
geom_histogram(bins = 30, fill = "white", col = "black")
dev.off()
png("./tpp_coverage_carehomes.png", height = 500, width = 600)
input %>%
filter(care_home_type != "U") %>%
dplyr::select(household_id, percent_tpp) %>%
unique() %>%
ggplot(aes(percent_tpp)) +
geom_histogram(bins = 30, fill = "white", col = "black")
dev.off()
input %>%
group_by(msoa) %>%
summarise(tpp_cov = mean(tpp_cov, na.rm = T)) %>%
ungroup() -> by_msoa
pdf("./tpp_coverage_map.pdf", height = 10, width = 8)
msoa_shp %>%
full_join(by_msoa, by = c("MSOA11CD" = "msoa")) %>%
ggplot(aes(geometry = geometry, fill = tpp_cov)) +
geom_sf(lwd = 0) +
scale_fill_gradient2(midpoint = 100, high = "steelblue", low = "indianred", mid = "white") +
theme(legend.position = c(0.2,0.9))
dev.off()
png("./infection_death_delays.png", height = 500, width = 900)
input %>%
filter(care_home_type != "U") %>%
pivot_longer(prob_death_delay:test_death_delay) %>%
ggplot(aes(value)) +
geom_histogram(bins = 30, fill = "white", col = "black") +
facet_wrap(~name, scales = "free")
dev.off()
# ---------------------------------------------------------------------------- #