generated from opensafely/research-template
/
process.R
124 lines (79 loc) · 2.99 KB
/
process.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# # # # # # # # # # # # # # # # # # # # #
# This script:
# define covid infection (case) & potiential control group
#
#
# # # # # # # # # # # # # # # # # # # # #
## Import libraries---
library('tidyverse')
library("ggplot2")
library('dplyr')
library('lubridate')
#### COVID INFECTION
# impoprt data
df1 <- read_csv(here::here("output", "input_covid_SGSS.csv"))
df2<- read_csv(here::here("output", "input_covid_primarycare.csv"))
# has covid infection record
df1 =df1%>%filter(patient_index_date>0) # SGSS case
df2 =df2%>%filter(patient_index_date>0) # primary care case
df=rbind(df1,df2)
# keep earlist covid infection date
df=df%>%
group_by(patient_id)%>%
arrange(patient_id,patient_index_date)%>%
distinct(patient_id, .keep_all = TRUE)
# exclude case has previous covid related history (variables before patient_index_date)
df=df%>%
filter(is.na(covid_admission_date),
#is.na(icu_date_admitted),
is.na(died_date_cpns),
is.na(died_date_ons_covid))
write_csv(df, here::here("output", "case_covid_infection.csv"))
df$cal_YM=format(df$patient_index_date,"%Y-%m")
write_csv(df, here::here("output", "control_covid_infection.csv"))
rm(list=ls())
#### COVID admission
# impoprt data
df <- read_csv(here::here("output", "input_covid_admission.csv"))
# has covid admission record
df =df%>%filter(patient_index_date>0) # hosp admission case
# exclude case has previous covid related history (variables before patient_index_date)
df=df%>%
filter(
#is.na(icu_date_admitted),
is.na(died_date_cpns),
is.na(died_date_ons_covid))
df$cal_YM=format(df$patient_index_date,"%Y-%m")
write_csv(df, here::here("output", "case_covid_admission.csv"))
rm(list=ls())
#### COVID severe outcome (icu or death)
# impoprt data
#df1 <- read_csv(here::here("output", "input_covid_icu.csv"))
df2<- read_csv(here::here("output", "input_covid_death_cpns.csv"))
df3<- read_csv(here::here("output", "input_covid_death_ons.csv"))
# has covid infection record
#df1 =df1%>%filter(patient_index_date>0) # icu
df2 =df2%>%filter(patient_index_date>0) # cpns
df3 =df3%>%filter(patient_index_date>0) # ons_covid
df=rbind(df2,df3)
# keep earlist covid severe outcome date
df=df%>%
group_by(patient_id)%>%
arrange(patient_id,patient_index_date)%>%
distinct(patient_id, .keep_all = TRUE)
df$cal_YM=format(df$patient_index_date,"%Y-%m")
write_csv(df, here::here("output", "case_covid_icu_death.csv"))
# #### general population
# rm(list=ls())
# list=seq(as.Date("2020-02-01"), as.Date("2021-12-01"), "month")
# for (i in 1:length(list)){
# df=read_csv(here::here("output","measures", paste0("input_covid_general_population_",list[i],".csv.gz")))
# df$patient_index_date=as.Date(list[i])
# df=df%>%
# filter(is.na(covid_admission_date),
# is.na(icu_date_admitted),
# is.na(died_date_cpns),
# is.na(died_date_ons_covid))
# df$cal_YM=format(df$patient_index_date,"%Y-%m")
# write_csv(df, here::here("output", paste0("control_general_population_",list[i],".csv")))
# }