generated from opensafely/research-template
/
ab_recorded_indication_3.R
116 lines (86 loc) · 3.45 KB
/
ab_recorded_indication_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
### This script is to transger patinet/row --> ab_prescription_times/ row
### every patient has 10 times of ab extraction
### variabless include:
### patient(id), age, sex, times(1-10), ab_date, prevalent(1/0),ab_count, infection type,
## Import libraries---
library("tidyverse")
library("ggplot2")
library('plyr')
library('dplyr')#conflict with plyr; load after plyr
library('lubridate')
library('stringr')
library("data.table")
library("ggpubr")
rm(list=ls())
setwd(here::here("output", "measures"))
# # ### read data_ 2020-01-01 ro 2020-06-01
# # ### mport patient-level data(study definition input.csv) to summarize antibiotics counts
# # ############ loop reading multiple CSV files ################
# # # read file list from input.csv
# # csvFiles = list.files(pattern="input_antibiotics_2_", full.names = TRUE)
# # csvFiles =csvFiles [1:6] # select half year
# # temp <- vector("list", length(csvFiles))
# # for (i in seq_along(csvFiles)){
# # filename <- csvFiles[i]
# # temp_df <- read_csv(filename)
# # filename <- basename(filename)
# # filename <-str_remove(filename, "input_antibiotics_2_")
# # filename <-str_remove(filename, ".csv.gz")
# # #add to per-month temp df
# # temp_df$date <- filename
# # mutate(temp_df, date = as.Date(date, "%Y-%m-%d"))
# # #add df to list
# # temp[[i]] <- temp_df
# # }
# # # combine list -> data.table/data.frame
# # df <-plyr::ldply(temp, data.frame)
# # rm(temp,csvFiles,i,temp_df,filename)# remove temporary list
# read in one-month data
df <- read_csv(
here::here("output", "measures", "input_antibiotics_2_2020-01-01.csv.gz"))
# filter all antibiotics users
df=df%>%filter(antibacterial_brit !=0)
### remove last month data
last.date=max(df$date)
df=df%>% filter(date!=last.date)
first_mon=min(df$date)
last_mon= max(df$date)
df$date=as.Date(df$date)
# variables names list
prevalent_check=paste0("prevalent_AB_date_",rep(1:10))
ab_count_10=paste0("AB_date_",rep(1:10),"_count")
ab_category=paste0("AB_date_",rep(1:10),"_indication")
indications=c("uti","lrti","urti","sinusits","otmedia","ot_externa","asthma","cold","cough","copd","pneumonia","renal","sepsis","throat","uncoded")
ab_date_10=paste0("AB_date_",rep(1:10))
# #replace NA with "uncoded" in AB_indication_1-10 columns
# for (i in 1:10){
# df[,ab_category[i]]=ifelse(is.na(df[,ab_category[i]]),"uncoded", df[,ab_category[i]])}
#### patient/row --> prescription/row
# ab_date_1-10
df1=df%>%select(patient_id,age,sex,ab_date_10)
colnames(df1)[4:13]=paste0("time",rep(1:10))
df1.1=df1%>%gather(times,date,paste0("time",rep(1:10)))
rm(df1)
# prevalent_AB_date_1-10
df2=df%>%select(patient_id,age,sex,prevalent_check)
colnames(df2)[4:13]=paste0("time",rep(1:10))
df2.1=df2%>%gather(times,prevalent,paste0("time",rep(1:10)))
rm(df2)
# "AB_date_count"1-10
df3=df%>%select(patient_id,age,sex,ab_count_10)
colnames(df3)[4:13]=paste0("time",rep(1:10))
df3.1=df3%>%gather(times,count,paste0("time",rep(1:10)))
rm(df3)
# ab_category 1-10
df4=df%>%select(patient_id,age,sex,ab_category)
colnames(df4)[4:13]=paste0("time",rep(1:10))
df4.1=df4%>%gather(times,infection,paste0("time",rep(1:10)))
rm(df4)
# merge
DF=merge(df1.1,df2.1,by=c("patient_id","age","sex","times"))
DF=merge(DF,df3.1,by=c("patient_id","age","sex","times"))
DF=merge(DF,df4.1,by=c("patient_id","age","sex","times"))
# exclude observation without AB prescription date
DF=DF%>%filter(!is.na(date))
DF$date=as.Date(DF$date,origin="1970-01-01")
write_rds(DF, here::here("output", "total_ab.rds"))