project.yaml

version: '3.0'

expectations:
  population_size: 1000

actions:
  
# study cohort

  generate_study_population_covid_admission:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_covid_admission
    outputs:
      highly_sensitive:
        cohort: output/input_covid_admission.csv

  process_1:  
    run: r:latest analysis/process_1.R
    needs: [generate_study_population_covid_admission]
    outputs:
      highly_sensitive:
        case: output/case_covid_icu_death.csv
       # case2: output/case_covid_icu_death_2.csv
        control: output/control_covid_hosp.csv
      #  control2: output/control_covid_hosp_2.csv

  check_process_1: 
    run: r:latest -e 'rmarkdown::render("analysis/check_process_1.Rmd", knit_root_dir = "/workspace", output_dir = "output")'
    needs: [generate_study_population_covid_admission]
    outputs:
      moderately_sensitive:
        html: output/check_process_1.html

# matching
       
  matching: # matching with replacement # died covid
    run: r:latest -e 'rmarkdown::render("analysis/matching.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_1]
    outputs:
      moderately_sensitive:
        html: output/matching.html
      highly_sensitive: 
        rds1: output/matched_patients.rds
        rds2: output/unmatched_patients.rds
        csv: output/matched_patients_id.csv

  check_unmatched:
    run: r:latest -e 'rmarkdown::render("analysis/check_unmatched.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [matching]
    outputs:
      moderately_sensitive:
        html: output/check_unmatched.html

  extract_variables: 
    run: cohortextractor:latest generate_cohort --with-end-date-fix --study-definition study_definition_outcome
    needs: [matching]
    outputs:
      highly_sensitive:
        cohort: output/input_outcome.csv

  process_Rmatching: # add variables 
    run: r:latest analysis/process_Rmatching.R
    needs: [extract_variables,matching]
    outputs:
      highly_sensitive:
        cohort1: output/matched_outcome.rds
        rds1: output/abtype79.rds
        rds2: output/comor17.rds

# main analysis

  table1_round: # matching variables
    run: r:latest analysis/table1.R
    needs: [process_1,process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1: output/table1_unmatched.csv
        csv2: output/table1_matched.csv
        csv3: output/table1_random.csv
        
  table2_round: # confounders
    run: r:latest analysis/table2.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1: output/table2_matched.csv
        csv3: output/table2_random.csv

  table3_round: # antibiotics
    run: r:latest analysis/table3.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1: output/table3.csv
        csv3: output/table3_ab.csv

  model: 
    run: r:latest analysis/model/model.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1: output/model_1_crude.csv
        csv2: output/model_1_adjusted.csv
  
  model_2_3grp:     # # abtype modeling
    run: r:latest -e 'rmarkdown::render("analysis/model_2_3grp.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_2_3grp.html

  model_2: 
    run: r:latest -e 'rmarkdown::render("analysis/model_2.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_2.html

 # sensitivity analysis 

  model_subgroup: 
    run: r:latest analysis/model/model_subgroup.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1.1: output/model_3_male_crude.csv
        csv1.2: output/model_3_male_adjusted.csv 
        csv1.3: output/model_3_male_ab.csv       
        csv2.1: output/model_3_female_crude.csv
        csv2.2: output/model_3_female_adjusted.csv 
        csv2.3: output/model_3_female_ab.csv       
        csv3.1: output/model_3_age1_crude.csv
        csv3.2: output/model_3_age1_adjusted.csv     
        csv3.3: output/model_3_age1_ab.csv 
        csv4.1: output/model_3_age2_crude.csv
        csv4.2: output/model_3_age2_adjusted.csv    
        csv4.3: output/model_3_age2_ab.csv       
        csv5.1: output/model_3_age3_crude.csv
        csv5.2: output/model_3_age3_adjusted.csv 
        csv5.3: output/model_3_age3_ab.csv       
        csv6.1: output/model_3_age4_crude.csv
        csv6.2: output/model_3_age4_adjusted.csv   
        csv6.3: output/model_3_age4_ab.csv       

  model_age_plot:
    run: r:latest -e 'rmarkdown::render("analysis/model_age_plot.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_age_plot.html
  
  model_gender_plot:
    run: r:latest -e 'rmarkdown::render("analysis/model_gender_plot.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_gender_plot.html

  model_time: 
    run: r:latest analysis/model/model_time.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1.1: output/model_time_adj1.csv
        csv1.2: output/model_time_adj2.csv 

  model_time_plot: # adjust for last AB time
    run: r:latest -e 'rmarkdown::render("analysis/model_time_plot.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_time_plot.html
        
  model_disease: 
    run: r:latest analysis/model/model_disease.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1.1: output/model_disease_adj1.csv
        csv1.2: output/model_disease_adj2.csv 
     
  model_disease_plot: # adjust for individual
    run: r:latest -e 'rmarkdown::render("analysis/model_disease_plot.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_disease_plot.html
  
  # model_wave: # separate wave
  #   run: r:latest -e 'rmarkdown::render("analysis/model_wave.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
  #   needs: [process_Rmatching]
  #   outputs:
  #     moderately_sensitive:
  #       html: output/model_wave.html
      
  model_cca: 
    run: r:latest analysis/model/model_cca.R
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        csv1.1: output/model_cca_crude.csv
        csv1.2: output/model_cca_adjusted.csv 

  model_complete_case_plot: 
    run: r:latest -e 'rmarkdown::render("analysis/model_complete_case_plot.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching]
    outputs:
      moderately_sensitive:
        html: output/model_complete_case_plot.html

 # adjust for 6 weeks exclusion

  extract_variables_6w: 
    run: cohortextractor:latest generate_cohort --study-definition study_definition_outcome_6w
    needs: [matching]
    outputs:
      highly_sensitive:
        cohort: output/input_outcome_6w.csv
  
  process_Rmatching_6w: # add variables 
    run: r:latest analysis/process_Rmatching_6w.R
    needs: [extract_variables_6w,matching,process_Rmatching]
    outputs:
      highly_sensitive:
        cohort1: output/matched_outcome_6w.rds

  model_6w: 
    run: r:latest analysis/model/model_6w.R
    needs: [process_Rmatching_6w]
    outputs:
      moderately_sensitive:
        csv1.1: output/model_6w_binary_adj1.csv
        csv1.2: output/model_6w_total_adj1.csv 
        csv1.3: output/model_6w_types_adj1.csv
        csv2.1: output/model_6w_binary_adj2.csv
        csv2.2: output/model_6w_total_adj2.csv 
        csv2.3: output/model_6w_types_adj2.csv
    
  model_6w_time: 
    run: r:latest analysis/model/model_6w_time.R
    needs: [process_Rmatching_6w]
    outputs:
      moderately_sensitive:
        csv1.1: output/model_6w_time_adj1.csv
        csv1.2: output/model_6w_time_adj2.csv 

  model_6w_plot: # adjust for last AB time
    run: r:latest -e 'rmarkdown::render("analysis/model_6w_plot.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching_6w]
    outputs:
      moderately_sensitive:
        html: output/model_6w_plot.html


  # ongoing project

  extract_variables_ab_yr1: 
    run: cohortextractor:latest generate_cohort --study-definition study_definition_ab_yr1
    needs: [matching]
    outputs:
      highly_sensitive:
        cohort: output/input_ab_yr1.csv


# sensitivity analysis - combine ICU+death outcome
  generate_study_population_covid_admission_2:
    run: cohortextractor:latest generate_cohort --study-definition study_definition_covid_admission_2
    outputs:
      highly_sensitive:
        cohort: output/input_covid_admission_2.csv

  process_2:  
    run: r:latest analysis/process_2.R
    needs: [generate_study_population_covid_admission_2]
    outputs:
      highly_sensitive:
        case: output/case_covid_icu_death_2.csv
        control: output/control_covid_hosp_2.csv
        case2: output/case_covid_icu_2.csv

  matching_2: 
    run: r:latest -e 'rmarkdown::render("analysis/matching_2.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_2]
    outputs:
      moderately_sensitive:
        html: output/matching_2.html
      highly_sensitive: 
        rds1: output/matched_patients_2.rds
        rds2: output/unmatched_patients_2.rds
        csv: output/matched_patients_id_2.csv

  extract_variables_2: 
    run: cohortextractor:latest generate_cohort --with-end-date-fix --study-definition study_definition_outcome_2
    needs: [matching_2]
    outputs:
      highly_sensitive:
        cohort: output/input_outcome_2.csv

  process_Rmatching_2: # add variables 
    run: r:latest analysis/process_Rmatching_2.R
    needs: [extract_variables_2,matching_2]
    outputs:
      highly_sensitive:
        cohort1: output/matched_outcome_2.rds
        rds1: output/abtype79_2.rds
        rds2: output/comor17_2.rds

  model_ICU_death: 
    run: r:latest analysis/model/model_ICU_death.R
    needs: [process_Rmatching_2]
    outputs:
      moderately_sensitive:
        csv1: output/model_1_crude_ICU_death.csv
        csv2: output/model_1_adjusted_ICU_death.csv
  
  model_2_ICU_death: 
    run: r:latest -e 'rmarkdown::render("analysis/model_2_ICU_death.Rmd", knit_root_dir = "/workspace", output_dir="/workspace/output")'
    needs: [process_Rmatching_2]
    outputs:
      moderately_sensitive:
        html: output/model_2_ICU_death.html