project.yaml

version: '3.0'

expectations:
  population_size: 3000

actions:

  generate_study_population:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2019-04-01 to 2019-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2019*.csv.gz

  generate_study_population_range2:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2020-01-01 to 2020-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2020*.csv.gz

  generate_study_population_range3:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2021-01-01 to 2021-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2021*.csv.gz

  generate_study_population_range4:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition
        --index-date-range "2022-01-01 to 2022-03-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_2022*.csv.gz

  generate_study_population_allmedrev:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2019-04-01 to 2019-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_allmedrev_2019*.csv.gz

  generate_study_population_allmedrev_range2:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2020-01-01 to 2020-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort2: output/input_allmedrev_2020*.csv.gz

  generate_study_population_allmedrev_range3:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2021-01-01 to 2021-12-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort3: output/input_allmedrev_2021*.csv.gz

  generate_study_population_allmedrev_range4:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_allmedrev
        --index-date-range "2022-01-01 to 2022-03-01 by month"
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort3: output/input_allmedrev_2022*.csv.gz

  generate_ethnicity_cohort:
    run: >
      cohortextractor:latest generate_cohort
        --study-definition study_definition_ethnicity
        --output-format='csv.gz'
    outputs:
      highly_sensitive:
        cohort: output/input_ethnicity.csv.gz

  join_cohorts:
    run: >
      cohort-joiner:v0.0.44
        --lhs output/input_20*.csv.gz
        --rhs output/input_ethnicity.csv.gz
        --output-dir output/joined
    needs: [generate_study_population, generate_study_population_range2, generate_study_population_range3, generate_study_population_range4, generate_ethnicity_cohort]
    outputs:
      highly_sensitive:
        cohort: output/joined/input_20*.csv.gz

  join_cohorts_allmedrev:
    run: >
      cohort-joiner:v0.0.44
        --lhs output/input_allmedrev*.csv.gz
        --rhs output/input_ethnicity.csv.gz
        --output-dir output/joined
    needs: [generate_study_population_allmedrev, generate_study_population_allmedrev_range2, generate_study_population_allmedrev_range3, generate_study_population_allmedrev_range4, generate_ethnicity_cohort]
    outputs:
      highly_sensitive:
        cohort: output/joined/input_allmedrev_*.csv.gz

## generate Structured Medication Review Measures and plots

  generate_measures_mr_smr:
     run: >
       cohortextractor:latest generate_measures 
       --study-definition study_definition
       --output-dir=output/joined
     needs: [join_cohorts]
     outputs:
       moderately_sensitive:
         mr_measure_csv: output/joined/measure_mr_*_rate.csv
         mr12m_measure_csv: output/joined/measure_mr12m_*_rate.csv
         smr_measure_csv: output/joined/measure_smr_*_rate.csv
         smr12m_measure_csv: output/joined/measure_smr12m_*_rate.csv

  generate_measures_all_reviews:
     run: >
       cohortextractor:latest generate_measures 
       --study-definition study_definition_allmedrev
       --output-dir=output/joined
     needs: [join_cohorts_allmedrev]
     outputs:
       moderately_sensitive:
         allmedrev_measure_csv: output/joined/measure_allmedrv_*_rate.csv
         allmedrev12m_measure_csv: output/joined/measure_allmedrv12m_*_rate.csv
         allmedrev_measure_asgrouped_csv: output/joined/measure_allmedrv_*_rate_agestandardgrouped.csv
         allmedrev12m_measure_asgrouped_csv: output/joined/measure_allmedrv12m_*_rate_agestandardgrouped.csv

  generate_deciles_charts:
    run: >
      deciles-charts:v0.0.33
        --input-files output/joined/measure_*_practice_rate.csv
        --output-dir output/joined
    config:
      show_outer_percentiles: false
      tables:
        output: true
      charts:
        output: true
    needs: [generate_measures_mr_smr, generate_measures_all_reviews]
    outputs:
      moderately_sensitive:
        deciles_charts: output/joined/deciles_*_*.*

  redact_and_round:
    run: python:latest python analysis/redact_and_round.py
    needs: [generate_measures_mr_smr, generate_codeuse_output, generate_allmedrev_codeuse_output, generate_measures_all_reviews]
    outputs:
      moderately_sensitive:
        cohort: output/redacted/redacted_measure_*.csv
        cohort_codeuse: output/redacted/redacted_totalcodeuse*.csv

  generate_plots:
    run: python:latest python analysis/plots.py
    needs: [redact_and_round, generate_deciles_charts]
    outputs:
      moderately_sensitive:
        percent_cohort: output/figures/*_*_rate_percentage.jpeg
        perthousand_cohort: output/figures/*_*_rate_perthousand.jpeg 

  generate_table_1:
    run: python:latest python analysis/table_1.py --study_def_paths="output/joined/input_20*.csv.gz" --demographics="age_band,sex,region,imdQ5,ethnicity,learning_disability,care_home_type" --outcome "had_smr"
    needs: [join_cohorts]
    outputs:
      moderately_sensitive:
        counts: output/table_1.csv
        had_outcome: output/table_1_had_outcome.csv

  generate_codeuse_output:
    run: python:latest python analysis/code_use_summary.py --study_def_paths="output/joined/input_20*.csv.gz" --codelistfile="user-chriswood-medication-review.csv" --outputfile="codeuse"
    needs: [join_cohorts]
    outputs:
      moderately_sensitive:
        code_counts: output/codeuse.csv
        total_code_counts: output/totalcodeuse.csv

  generate_allmedrev_codeuse_output:
    run: python:latest python analysis/code_use_summary.py --study_def_paths="output/joined/input_allmedrev_*.csv.gz" --codelistfile="user-chriswood-all-medication-reviews.csv" --outputfile="codeuse_allmedrev" --totalstart="01/01/2020" --totalend="01/12/2020"
    needs: [join_cohorts_allmedrev]
    outputs:
      moderately_sensitive:
        code_counts: output/codeuse_allmedrev.csv
        total_code_counts: output/totalcodeuse_allmedrev.csv

  run_tests:
    run: python:latest python -m pytest --junit-xml=output/pytest.xml --verbose
    outputs:
      moderately_sensitive:
        log: output/pytest.xml