/
regCovid_example.R
88 lines (66 loc) · 3.8 KB
/
regCovid_example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#regCOVID
#ClinicalTrials.gov Connection
# To run this script first go to https://aact.ctti-clinicaltrials.org/connect and create a login
# Enter created username and password below
user=username
psw=password
# Connects to AACt relational version of CTG database
# AACT is about 4 days delayed
library(RPostgreSQL)
library(tidyverse)
drv <- dbDriver('PostgreSQL')
con <- dbConnect(drv, dbname="aact",host="aact-db.ctti-clinicaltrials.org", port=5432, user= user, password= psw)
tbls<-RPostgreSQL::dbListTables(con)
tbls
library(dplyr)
library(tidyquant)
library(xts)
library(readxl)
library(reshape2)
library(htmltab)
library(data.table)
#-Book A: Search Method A (Search for select keywords in the title of study)
# The most complete method in our analysis
# Limited to studies started after the first reporting of COVID-19
# Some studies started before this time were modified to include COVID-19
# These studies were excluded for consistency and to avoid the inclusion of non COVID-19 studies
# Generates list of COVID-19 studies from search method A
co_studies_q<-"select * FROM studies
where study_first_submitted_date > '2019-12-27' and (lower(official_title) like '%covid%'
or lower(official_title) like '%sars-cov%'or lower(official_title) like '%2019-ncov%'
or lower(official_title) like '%cov2%'or lower(official_title) like '%cov-2%'
or lower(official_title) like '%coronavirus%'or lower(official_title) like '%corona virus%' )"
co_studies<-dbGetQuery(con, co_studies_q)
# Finds the amount of COVID-19 studies with each study status
Counts_by_status<-co_studies %>% group_by(overall_status)%>% count()
colnames(Counts_by_status)[2]<-"Study_Count"
Counts_by_status %>% write_csv('finaloutput/regCovid_status_cnt.csv')
# We chose to only look at studies that have progressed naturally due to the activation of certain quality checks
#This meant we included recruiting, active or completed
# Excluded not yet recruiting, terminated and withdrawn
covid9<- co_studies %>% filter(overall_status == 'Recruiting'| overall_status == 'Completed'|overall_status == 'Enrolling by invitation'|overall_status == 'Active, not recruiting')
covid9<-covid9%>% filter(nct_id != 'NCT04331860'& nct_id!= 'NCT04372069' )
# Final list of analyzed COVID-19 studies
covid9 %>% write_csv('finaloutput/regCovid_all_studies-a.csv')
# Primary Analysis portion
# We analyzed each study type separately
#------Chapter 1: Covid studies by type
# int suffix refers to Interventional Trials
# obs suffix refers to Observational Studies
# reg suffix refers to Registry-based Studies
covid9_int<-covid9%>%filter(study_type == 'Interventional')
covid9_int %>%write_csv('finaloutput/regCovid_int_a.csv')
covid9_obs<-covid9%>%filter(study_type == 'Observational')
covid9_obs %>%write_csv('finaloutput/regCovid_obs_a.csv')
covid9_reg<-covid9%>%filter(study_type == 'Observational [Patient Registry]')
covid9_reg %>%write_csv('finaloutput/regCovid_registry_a.csv')
Study_counts<-data.frame(Type= c("Total_Studies","interventional", "Observational", "Registry"),
Study_count= c(nrow(covid9), nrow(covid9_int), nrow(covid9_obs), nrow(covid9_reg)))
Study_counts%>% write_csv("finaloutput/regCovid_study_cnts.csv")
library(RCurl)
x <- getURL
# Read into R lists of studies from project repository
covid9<-read_csv(url('https://raw.githubusercontent.com/lhncbc/r-snippets-bmi/master/regCOVID/regCovid_all_studies-a.csv'))
covid9_int<-read_csv(url('https://raw.githubusercontent.com/lhncbc/r-snippets-bmi/master/regCOVID/regCovid_int_a.csv'))
covid9_obs<-read_csv(url('https://raw.githubusercontent.com/lhncbc/r-snippets-bmi/master/regCOVID/regCovid_obs_a.csv'))
covid9_reg<-read_csv(url('https://raw.githubusercontent.com/lhncbc/r-snippets-bmi/master/regCOVID/regCovid_registry_a.csv'))