generated from opensafely/research-template
/
test_codelists.py
38 lines (29 loc) · 1.62 KB
/
test_codelists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
from codelists import *
df = pd.read_csv("output/input_tests.csv.gz")
def get_counts_codes(df, code_list, test):
counts = {}
for code in code_list:
counts[f"{test}_snomed_{code}"] = df[f"{test}_snomed_{code}"].sum()
pd.DataFrame.from_dict(counts, orient ='index', columns=["count"]).to_csv(f"output/tests_{test}.csv")
def get_numeric_values(df, code_list, test):
counts = {}
for code in code_list:
counts[f"{test}_snomed_{code}_numeric_value"] = df[f"{test}_snomed_{code}_numeric_value"].count()
pd.DataFrame.from_dict(counts, orient ='index', columns=["count_with_numeric_value"]).to_csv(f"output/tests_{test}_numeric_value.csv")
get_counts_codes(df, acr_codelist, "acr")
# get_counts_codes(df, albumin_codelist, "albumin")
# get_counts_codes(df, cystatin_c_codelist, "cystatin")
# get_counts_codes(df, eGFR_codelist, "egfr")
# get_counts_codes(df, eGFR_qualifier_codelist, "egfr_qualifier")
# get_counts_codes(df, eGFR_qualifier_rate_codelist, "egfr_qualifier_rate")
# get_counts_codes(df, creatinine_codelist, "creatinine")
# get_counts_codes(df, creatinine_unit_codelist, "creatinine_unit")
# get_counts_codes(df, creatinine_clearance_codelist, "creatinine_clearance")
# get_numeric_values(df, acr_codelist, "acr")
# get_numeric_values(df, albumin_codelist, "albumin")
# get_numeric_values(df, cystatin_c_codelist, "cystatin")
# get_numeric_values(df, eGFR_codelist, "egfr")
# get_numeric_values(df, eGFR_qualifier_codelist, "egfr_qualifier")
# get_numeric_values(df, creatinine_codelist, "creatinine")
# get_numeric_values(df, creatinine_clearance_codelist, "creatinine_clearance")