generated from opensafely/research-template
/
process_sepsis_2019.R
57 lines (48 loc) · 2.08 KB
/
process_sepsis_2019.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# # # # # # # # # # # # # # # # # # # # #
# This script: #
# define case cohort #
# # # # # # # # # # # # # # # # # # # # #
## Import libraries---
library("tidyverse")
library('plyr')
library('dplyr')
library('lubridate')
library('stringr')
library("data.table")
library("ggpubr")
library("finalfit")
# import data
col_spec <-cols_only(patient_index_date = col_date(format = ""),
age = col_number(),
sex = col_character(),
stp = col_character(),
region = col_character(),
imd = col_integer(),
has_outcome_1yr = col_number(),
uti_record = col_number(),
lrti_record = col_number(),
urti_record = col_number(),
sinusitis_record = col_number(),
ot_externa_record = col_number(),
ot_media_record = col_number(),
pneumonia_record = col_number(),
patient_id = col_number()
)
df <- read_csv(here::here("output", "input_sepsis_2019.csv"),
col_types = col_spec)
# filter cohort (defalut)
df = df %>% filter(!is.na(patient_index_date))
df$imd <- ifelse(is.na(df$imd),"0",df$imd)
df$imd <- as.factor(df$imd)
# check cohort_case
dttable <- select(df,age,sex,stp,region,imd,has_outcome_1yr,uti_record,lrti_record,urti_record,sinusitis_record,ot_externa_record,
ot_media_record,pneumonia_record)
colsfortab <- colnames(dttable)
dttable %>% summary_factorlist(explanatory = colsfortab) -> t
write_csv(t, here::here("output", "table_sepsis_2019_0.csv"))
df_1 <- df %>% filter(has_outcome_1yr == "0")
dttable_1 <- select(df_1,age,sex,stp,region,imd,uti_record,lrti_record,urti_record,sinusitis_record,ot_externa_record,
ot_media_record,pneumonia_record)
colsfortab_1 <- colnames(dttable_1)
dttable_1 %>% summary_factorlist(explanatory = colsfortab_1) -> t1
write_csv(t1, here::here("output", "table_sepsis_2019_1.csv"))