generated from opensafely/research-template
/
project.yaml
112 lines (102 loc) · 3.05 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
version: "3.0"
expectations:
population_size: 1000
actions:
query_distinct_values:
run: >
sqlrunner:latest
--output output/distinct_values/rows.csv
analysis/distinct_values/query.sql
outputs:
highly_sensitive:
rows: output/distinct_values/rows.csv
round_distinct_values:
needs: [query_distinct_values]
run: >
python:latest python -m analysis.actions.round
--output output/distinct_values/results.csv
output/distinct_values/rows.csv
--column-names num_distinct_values num_values
outputs:
moderately_sensitive:
results: output/distinct_values/results.csv
query_date_range:
run: >
sqlrunner:latest
--output output/date_range/rows.csv
analysis/date_range/query.sql
outputs:
highly_sensitive:
rows: output/date_range/rows.csv
copy_date_range:
needs: [query_date_range]
run: >
python:latest python -m analysis.actions.copy
--output output/date_range/results.csv
output/date_range/rows.csv
outputs:
moderately_sensitive:
results: output/date_range/results.csv
query_num_rows_by_month:
run: >
sqlrunner:latest
--output output/num_rows_by_month/rows.csv
analysis/num_rows_by_month/query.sql
outputs:
highly_sensitive:
rows: output/num_rows_by_month/rows.csv
round_num_rows_by_month:
needs: [query_num_rows_by_month]
run: >
python:latest python -m analysis.actions.round
--output output/num_rows_by_month/results.csv
output/num_rows_by_month/rows.csv
--column-names num_rows
outputs:
moderately_sensitive:
results: output/num_rows_by_month/results.csv
query_lead_time:
run: >
sqlrunner:latest
--output output/lead_time/rows.csv
analysis/lead_time/query.sql
outputs:
highly_sensitive:
rows: output/lead_time/rows.csv
round_lead_time:
needs: [query_lead_time]
run: >
python:latest python -m analysis.actions.round
--output output/lead_time/results.csv
output/lead_time/rows.csv
--column-names frequency
outputs:
moderately_sensitive:
results: output/lead_time/results.csv
make_html_reports:
# --execute
# execute notebooks before converting them to HTML reports
# --no-input
# exclude input cells and output prompts from HTML reports
# --to=html
# convert notebooks to HTML reports (not e.g. to PDF reports)
# --template basic
# use the basic (unstyled) template for HTML reports
# --output-dir=output/reports
# write HTML reports to the `output/reports` directory
run: >
python:latest jupyter nbconvert
--execute
--no-input
--to=html
--template basic
--output-dir=output/reports
analysis/reports/*.ipynb
needs:
- round_distinct_values
- copy_date_range
- round_num_rows_by_month
- round_lead_time
outputs:
moderately_sensitive:
reports: output/reports/*.html