Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
SeaWAS/SeaWAS_RR_allconditions_OHDSI.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
327 lines (255 sloc)
18.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Copyright: Mary Regina Boland | |
#Climate-Birth Month Disease Risk Study with OHDSI | |
#For details on method please see Boland et al. 2015 http://jamia.oxfordjournals.org/content/early/2015/06/01/jamia.ocv046.full | |
#Please direct any questions or comments to Mary Boland at mb3402@columbia.edu | |
########################################################### | |
# SeaWAS Proportions and RR Script # | |
########################################################### | |
# Install any needed libraries | |
pkgTest <- function(x) | |
{ | |
if (!require(x,character.only = TRUE)) | |
{ | |
install.packages(x,dep=TRUE) | |
if(!require(x,character.only = TRUE)) stop("Package not found") | |
} | |
} | |
pkgTest('devtools') | |
install_github("ohdsi/SqlRender") | |
install_github("ohdsi/DatabaseConnector") | |
# Load libraries | |
library(devtools) | |
library(SqlRender) | |
library(DatabaseConnector) | |
########################################################### | |
# Parameters: Please change these to the correct values: # | |
########################################################### | |
folder = "F:/Documents/OHDSI/StudyProtocols/Study4_SeaWAS_ClimateBirthMonth_OHDSI/R_scripts" # Folder containing the R files, which will also be used to store output | |
folder_for_storing_output = paste(folder, "/SeaWAS_proportions_output/", sep="") #output folder - subfolder of above for storing output of script | |
user <- NULL | |
pw <- NULL | |
port <- NULL | |
server <- "server_name" #server or host | |
dialect <- "sql server" #the target sql dialect | |
#make sure below is properly updated | |
connectionDetails <- createConnectionDetails(dbms = dialect, server = server) | |
cdmDatabaseSchema <- "omop4.dbo" | |
resultsDatabaseSchema <- "omop4_cohort_results.dbo" #This script doesn't require a results database | |
connection <- connect(connectionDetails) | |
########################################################### | |
# End of Parameters # | |
########################################################### | |
########################################################### | |
# List of Conditions with at least 1000 Patients # | |
########################################################### | |
SQL <- paste("SELECT DISTINCT a.condition_concept_id, c.concept_name, count(distinct a.person_id) AS num_patients_with_condition ", | |
"FROM @cdmDatabaseSchema.CONDITION_OCCURRENCE a ", | |
"JOIN @cdmDatabaseSchema.CONCEPT c ON a.condition_concept_id=c.concept_id ", | |
"JOIN @cdmDatabaseSchema.PERSON b ON a.person_id=b.person_id ", | |
"WHERE b.year_of_birth>=1900 AND b.year_of_birth<=2000 ", | |
"GROUP BY (a.condition_concept_id) ", | |
"ORDER BY (count(distinct a.person_id)) desc;", sep="") | |
sql <- renderSql(SQL, cdmDatabaseSchema=cdmDatabaseSchema)$sql | |
sql <- translateSql(sql, targetDialect = dialect)$sql | |
conditions_with_ptcounts <- querySql(connection, sql) | |
#trimming the list to only retain the conditions with at least 1000 patients | |
conditions_with_ptcounts = subset(conditions_with_ptcounts, conditions_with_ptcounts$num_patients_with_condition>=1000) | |
########################################################### | |
# End of List of Conditions with at least 1000 Patients # | |
########################################################### | |
########################################################### | |
# Getting All Patients with a Birth Month and a Diagnosis # | |
########################################################### | |
#getting patient data from server | |
SQL <- paste("SELECT distinct a.person_id, a.month_of_birth, a.day_of_birth ", | |
"FROM @cdmDatabaseSchema.PERSON a ", | |
"JOIN @cdmDatabaseSchema.CONDITION_OCCURRENCE b ON a.person_id=b.person_id ", | |
"WHERE a.year_of_birth>=1900 AND a.year_of_birth<=2000;", sep="") | |
sql <- renderSql(SQL, cdmDatabaseSchema=cdmDatabaseSchema)$sql | |
sql <- translateSql(sql, targetDialect = dialect)$sql | |
birthmonths_allpts <- querySql(connection, sql) | |
#rs = dbSendQuery(cumc, query) | |
#birthmonths_allpts = fetch(rs, n=-1) | |
daily_index_forweeks=seq(from=1,to=366, by=7) | |
matrix_pop_birthmonth = table(birthmonths_allpts$month_of_birth,birthmonths_allpts$day_of_birth) | |
########################################################### | |
# End Getting Data from Server # | |
########################################################### | |
########################################################### | |
# Smoothed and Raw Proportions # | |
########################################################### | |
i=1 | |
week_proportion_all=c() | |
month_proportion_all=c() | |
month_proportion_all_SD = c() | |
week_proportion_all_SD = c() | |
raw_month_proportion_all_STAN=c() | |
while (i<=dim(conditions_with_ptcounts)[1]){ | |
condition=conditions_with_ptcounts$condition_concept_id[i] | |
SQL <- paste("SELECT DISTINCT person_id ", | |
"FROM @cdmDatabaseSchema.CONDITION_OCCURRENCE ", | |
"WHERE condition_concept_id=", | |
conditions_with_ptcounts$condition_concept_id[i], | |
";", sep="") | |
sql <- renderSql(SQL, cdmDatabaseSchema=cdmDatabaseSchema)$sql | |
sql <- translateSql(sql, targetDialect = dialect)$sql | |
pts_w_condition <- querySql(connection, sql) | |
birthmonth_pts_w_condition = merge(birthmonths_allpts, pts_w_condition, by='person_id') | |
#getting and saving the raw proportions | |
raw_month_proportion_all_STAN[[i]] = table(birthmonth_pts_w_condition$month_of_birth) | |
matrix_birthmonth_wcondition = table(birthmonth_pts_w_condition$month_of_birth,birthmonth_pts_w_condition$day_of_birth) | |
proportion_condition_over_pop = matrix_birthmonth_wcondition/matrix_pop_birthmonth | |
proportion_condition_over_pop_daily=c() | |
proportion_condition_over_pop_daily =c(proportion_condition_over_pop[1, ], proportion_condition_over_pop[2 , ], proportion_condition_over_pop[3 , ], proportion_condition_over_pop[4 , ], proportion_condition_over_pop[5 , ], proportion_condition_over_pop[6, ], proportion_condition_over_pop[7, ], proportion_condition_over_pop[8, ], proportion_condition_over_pop[9, ], proportion_condition_over_pop[10, ], proportion_condition_over_pop[11, ], proportion_condition_over_pop[12 , ]) | |
proportion_condition_over_pop_daily = na.omit(proportion_condition_over_pop_daily) | |
daily_index_fordays = seq(from=1, to=366, by=1) | |
names(proportion_condition_over_pop_daily) = daily_index_fordays | |
#first calculate the smoothed per day proportion | |
smoothed_proportion_condition_over_pop_daily = c() | |
daily_index_formonths=seq(from=0,to=372, by=31) | |
m=1 | |
while (m < length(daily_index_formonths)){ | |
start_month = daily_index_formonths[m] | |
end_month = daily_index_formonths[m+1] | |
smoothed_proportion_condition_over_pop_daily[m] = mean(proportion_condition_over_pop_daily[start_month:end_month]) | |
m=m+1 | |
} | |
#smoothing daily proportion | |
smoothed_2mo_daily_proportion=c() | |
for (n in 1: length(proportion_condition_over_pop_daily)){ #processing over each day | |
if ((n-31)<0){ | |
k=n-31 | |
begin_2mo_window = 366 + k | |
}else{ | |
begin_2mo_window = n-31 | |
} | |
if((n+31)>366){ | |
d=n+31 | |
end_2mo_window = d-366 | |
}else{ | |
end_2mo_window = n+31 | |
} | |
if (((n-31)<0)||((n+31)>366)){ | |
smoothed_2mo_daily_proportion[n] = mean(c(proportion_condition_over_pop_daily[begin_2mo_window:366], proportion_condition_over_pop_daily[1:end_2mo_window])) | |
}else{ | |
smoothed_2mo_daily_proportion[n] = mean(proportion_condition_over_pop_daily[begin_2mo_window:end_2mo_window]) | |
} | |
} | |
r=1 | |
smoothed_2mo_daily_proportion_weeks=c() | |
smoothed_2mo_daily_proportion_weeks_sd = c() | |
while (r < length(daily_index_forweeks)){ | |
start_week = daily_index_forweeks[r] | |
end_week = daily_index_forweeks[r+1] | |
smoothed_2mo_daily_proportion_weeks[r] = mean(smoothed_2mo_daily_proportion[start_week:end_week]) | |
smoothed_2mo_daily_proportion_weeks_sd[r] = sd(smoothed_2mo_daily_proportion[start_week:end_week]) | |
r=r+1 | |
} | |
smoothed_2mo_daily_proportion_months = c() | |
smoothed_2mo_daily_proportion_months_sd = c() | |
k=1 | |
while (k < length(daily_index_formonths)){ | |
start_month = daily_index_formonths[k] | |
end_month = daily_index_formonths[k+1] | |
if(k==12){ | |
smoothed_2mo_daily_proportion_months[k]= mean(smoothed_2mo_daily_proportion[start_month:366]) | |
smoothed_2mo_daily_proportion_months_sd[k] = sd(smoothed_2mo_daily_proportion[start_month:366]) | |
}else{ | |
smoothed_2mo_daily_proportion_months[k]= mean(smoothed_2mo_daily_proportion[start_month:end_month]) | |
smoothed_2mo_daily_proportion_months_sd[k] = sd(smoothed_2mo_daily_proportion[start_month:end_month]) | |
} | |
k=k+1 | |
} | |
condition_name = conditions_with_ptcounts$concept_name[conditions_with_ptcounts$condition_concept_id==condition] | |
week_proportion_all[[i]] = smoothed_2mo_daily_proportion_weeks[1:52] | |
month_proportion_all[[i]] =smoothed_2mo_daily_proportion_months | |
month_proportion_all_SD[[i]] = smoothed_2mo_daily_proportion_months_sd | |
week_proportion_all_SD[[i]] = smoothed_2mo_daily_proportion_weeks_sd[1:52] | |
cat("iteration", i, "condition", condition, "\n") | |
i = i + 1 | |
} | |
#condition names | |
conditions_all=conditions_with_ptcounts$condition_concept_id | |
condition_names_all = list() | |
for (i in 1:length(conditions_all)){ | |
condition_names_all[i] = as.character(conditions_with_ptcounts$concept_name[conditions_with_ptcounts$condition_concept_id==conditions_all[i]] ) | |
} | |
head(conditions_with_ptcounts) | |
#########Relative Risk | |
#for all conditions | |
i=1 | |
total_number_people_in_db = length(birthmonths_allpts$person_id) | |
relative_risk_allconditions = list() | |
while (i <=length(month_proportion_all)){ | |
v = month_proportion_all[[i]] | |
total_number_disease = conditions_with_ptcounts$num_patients_with_condition[conditions_with_ptcounts$concept_name==condition_names_all[i]] | |
relative_risk_allconditions[[i]] = (v / (total_number_disease / total_number_people_in_db)) | |
i = i+1 | |
} | |
i=1 | |
SeaWAS_RR_all1688conditions = data.frame() | |
while (i <=length(relative_risk_allconditions)){ | |
rr_per_month = unlist(relative_risk_allconditions[[i]]) | |
seawas_data_per_month = data.frame(concept_id = conditions_all[i], concept_name = condition_names_all[i], Jan = rr_per_month[1], Feb = rr_per_month[2], Mar = rr_per_month[3], Apr = rr_per_month[4], May = rr_per_month[5], Jun = rr_per_month[6], Jul = rr_per_month[7], Aug = rr_per_month[8], Sept = rr_per_month[9], Oct = rr_per_month[10], Nov = rr_per_month[11], Dec = rr_per_month[12]) | |
colnames(seawas_data_per_month)[2] = c("concept_name") | |
SeaWAS_RR_all1688conditions = rbind(SeaWAS_RR_all1688conditions, seawas_data_per_month) | |
rm(seawas_data_per_month) | |
i = i + 1 | |
} | |
#for the monthly proportions | |
i=1 | |
SeaWAS_MonthProp_all1688conditions = data.frame() | |
while (i <=length(month_proportion_all)){ | |
prop_per_month = unlist(month_proportion_all[[i]]) | |
SD_prop_per_month = unlist(month_proportion_all_SD[[i]]) | |
seawas_data_per_month = data.frame(concept_id = conditions_all[i], concept_name = condition_names_all[i], Jan = prop_per_month[1], Jan_SD = SD_prop_per_month[1], Feb = prop_per_month[2], Feb_SD = SD_prop_per_month[2], Mar = prop_per_month[3], Mar_SD = SD_prop_per_month[3], Apr = prop_per_month[4], Apr_SD = SD_prop_per_month[4], May = prop_per_month[5], May_SD = SD_prop_per_month[5], Jun = prop_per_month[6], Jun_SD = SD_prop_per_month[6], Jul = prop_per_month[7], Jul_SD = SD_prop_per_month[7], Aug = prop_per_month[8], Aug_SD = SD_prop_per_month[8], Sept = prop_per_month[9], Sept_SD = SD_prop_per_month[9], Oct = prop_per_month[10], Oct_SD = SD_prop_per_month[10], Nov = prop_per_month[11], Nov_SD = SD_prop_per_month[11], Dec = prop_per_month[12], Dec_SD = SD_prop_per_month[12]) | |
colnames(seawas_data_per_month)[2] = c("concept_name") | |
SeaWAS_MonthProp_all1688conditions = rbind(SeaWAS_MonthProp_all1688conditions, seawas_data_per_month) | |
rm(seawas_data_per_month) | |
i = i + 1 | |
} | |
#for the weekly proportion | |
i=1 | |
SeaWAS_WeekProp_all1688conditions = data.frame() | |
while (i <=length(week_proportion_all)){ | |
prop_per_month = unlist(week_proportion_all[[i]]) | |
SD_prop_per_week = unlist(week_proportion_all_SD[[i]]) | |
seawas_data_per_week = data.frame(concept_id = conditions_all[i], concept_name = condition_names_all[i], Week1 = seawas_data_per_week[1], Week1_SD = SD_prop_per_week[1], Week2 = seawas_data_per_week[2], Week2_SD = SD_prop_per_week[2], Week3 = seawas_data_per_week[3], Week3_SD = SD_prop_per_week[3], Week4 = seawas_data_per_week[4], Week4_SD = SD_prop_per_week[4], Week5 = seawas_data_per_week[5], Week5_SD = SD_prop_per_week[5], Week6 = seawas_data_per_week[6], Week6_SD = SD_prop_per_week[6], Week7 = seawas_data_per_week[7], Week7_SD = SD_prop_per_week[7], Week8 = seawas_data_per_week[8], Week8_SD = SD_prop_per_week[8], Week9 = seawas_data_per_week[9], Week9_SD = SD_prop_per_week[9], Week10 = seawas_data_per_week[10], Week10_SD = SD_prop_per_week[10], Week11 = seawas_data_per_week[11], Week11_SD = SD_prop_per_week[11], Week12 = seawas_data_per_week[12], Week12_SD = SD_prop_per_week[12], Week13 = seawas_data_per_week[13], Week13_SD = SD_prop_per_week[13], Week14 = seawas_data_per_week[14], Week14_SD = SD_prop_per_week[14], Week15 = seawas_data_per_week[15], Week15_SD = SD_prop_per_week[15], Week16 = seawas_data_per_week[16], Week16_SD = SD_prop_per_week[16], Week17 = seawas_data_per_week[17], Week17_SD = SD_prop_per_week[17], Week18 = seawas_data_per_week[18], Week18_SD = SD_prop_per_week[18], Week19 = seawas_data_per_week[19], Week19_SD = SD_prop_per_week[19], Week20 = seawas_data_per_week[20], Week20_SD = SD_prop_per_week[20], Week21 = seawas_data_per_week[21], Week21_SD = SD_prop_per_week[21], Week22 = seawas_data_per_week[22], Week22_SD = SD_prop_per_week[22], Week23 = seawas_data_per_week[23], Week23_SD = SD_prop_per_week[23], Week24 = seawas_data_per_week[24], Week24_SD = SD_prop_per_week[24], Week25 = seawas_data_per_week[25], Week25_SD = SD_prop_per_week[25], Week26 = seawas_data_per_week[26], Week26_SD = SD_prop_per_week[26], Week27 = seawas_data_per_week[27], Week27_SD = SD_prop_per_week[27], Week28 = seawas_data_per_week[28], Week28_SD = SD_prop_per_week[28], Week29 = seawas_data_per_week[29], Week29_SD = SD_prop_per_week[29], Week30 = seawas_data_per_week[30], Week30_SD = SD_prop_per_week[30], Week31 = seawas_data_per_week[31], Week31_SD = SD_prop_per_week[31], Week32 = seawas_data_per_week[32], Week32_SD = SD_prop_per_week[32], Week33 = seawas_data_per_week[33], Week33_SD = SD_prop_per_week[33], Week34 = seawas_data_per_week[34], Week34_SD = SD_prop_per_week[34], Week35 = seawas_data_per_week[35], Week35_SD = SD_prop_per_week[35], Week36 = seawas_data_per_week[36], Week36_SD = SD_prop_per_week[36], Week37 = seawas_data_per_week[37], Week37_SD = SD_prop_per_week[37], Week38 = seawas_data_per_week[38], Week38_SD = SD_prop_per_week[38], Week39 = seawas_data_per_week[39], Week39_SD = SD_prop_per_week[39], Week40 = seawas_data_per_week[40], Week40_SD = SD_prop_per_week[40], Week41 = seawas_data_per_week[41], Week41_SD = SD_prop_per_week[41], Week42 = seawas_data_per_week[42], Week42_SD = SD_prop_per_week[42], Week43 = seawas_data_per_week[43], Week43_SD = SD_prop_per_week[43], Week44 = seawas_data_per_week[44], Week44_SD = SD_prop_per_week[44], Week45 = seawas_data_per_week[45], Week45_SD = SD_prop_per_week[45], Week46 = seawas_data_per_week[46], Week46_SD = SD_prop_per_week[46], Week47 = seawas_data_per_week[47], Week47_SD = SD_prop_per_week[47], Week48 = seawas_data_per_week[48], Week48_SD = SD_prop_per_week[48], Week49 = seawas_data_per_week[49], Week49_SD = SD_prop_per_week[49], Week50 = seawas_data_per_week[50], Week50_SD = SD_prop_per_week[50], Week51 = seawas_data_per_week[51], Week51_SD = SD_prop_per_week[51], Week52 = seawas_data_per_week[52], Week52_SD = SD_prop_per_week[52]) | |
colnames(seawas_data_per_month)[2] = c("concept_name") | |
SeaWAS_WeekProp_all1688conditions = rbind(SeaWAS_WeekProp_all1688conditions, seawas_data_per_week) | |
rm(seawas_data_per_week) | |
i = i + 1 | |
} | |
###For Stan algorithm | |
i=1 | |
SeaWAS_monthprop_all1688conditions_raw_STAN = data.frame() | |
while (i <=length(raw_month_proportion_all_STAN)){ | |
month_prop_per_month_STAN = unlist(raw_month_proportion_all_STAN[[i]]) | |
seawas_data_per_month = data.frame(concept_id = conditions_all[i], concept_name = condition_names_all[i], Jan = month_prop_per_month_STAN[1], Feb = month_prop_per_month_STAN[2], Mar = month_prop_per_month_STAN[3], Apr = month_prop_per_month_STAN[4], May = month_prop_per_month_STAN[5], Jun = month_prop_per_month_STAN[6], Jul = month_prop_per_month_STAN[7], Aug = month_prop_per_month_STAN[8], Sept = month_prop_per_month_STAN[9], Oct = month_prop_per_month_STAN[10], Nov = month_prop_per_month_STAN[11], Dec = month_prop_per_month_STAN[12]) | |
colnames(seawas_data_per_month)[2] = c("concept_name") | |
SeaWAS_monthprop_all1688conditions_raw_STAN = rbind(SeaWAS_monthprop_all1688conditions_raw_STAN, seawas_data_per_month) | |
rm(seawas_data_per_month) | |
i = i + 1 | |
} | |
#Overall Month Distribution for Stan | |
overall_month_dist = table(birthmonths_allpts$month_of_birth) #this is the overall month distribution-RAW | |
seawas_data_per_month = data.frame(concept_id = "00000", concept_name = "Overall Month Dist", Jan = overall_month_dist[1], Feb = overall_month_dist[2], Mar = overall_month_dist[3], Apr = overall_month_dist[4], May = overall_month_dist[5], Jun = overall_month_dist[6], Jul = overall_month_dist[7], Aug = overall_month_dist[8], Sept = overall_month_dist[9], Oct = overall_month_dist[10], Nov = overall_month_dist[11], Dec = overall_month_dist[12]) | |
colnames(seawas_data_per_month)[2] = c("concept_name") | |
#Binding to same DataFrame | |
SeaWAS_monthprop_all1688conditions_raw_STAN = rbind(SeaWAS_monthprop_all1688conditions_raw_STAN, overall_month_dist) | |
#Writing Output to Disk | |
#RR | |
write.csv(SeaWAS_RR_all1688conditions, paste(folder_for_storing_output, "SeaWAS_RR_all_1688conditions.csv", sep="")) | |
#Month Proportions - Smoothed | |
write.csv(SeaWAS_MonthProp_all1688conditions, paste(folder_for_storing_output, "SeaWAS_MonthProp_all1688conditions.csv")) | |
#Week Proportions - Smoothed | |
write.csv(SeaWAS_WeekProp_all1688conditions, paste(folder_for_storing_output, "SeaWAS_WeekProp_all1688conditions.csv")) | |
#Raw Month Proportions for Stan Model | |
write.csv(SeaWAS_monthprop_all1688conditions_raw_STAN, paste(folder_for_storing_output, "SeaWAS_MonthProp_all1688conditions_raw_STAN.csv")) | |
########################################################### | |
# Please Remember to Email (to mb3402@columbia.edu): # | |
# 1. SeaWAS_RR_all_1688conditions.csv # | |
# 2. SeaWAS_MonthProp_all1688conditions.csv # | |
# 3. SeaWAS_WeekProp_all1688conditions.csv # | |
# 4. SeaWAS_MonthProp_all1688conditions_raw_STAN.csv # | |
########################################################### |