generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
redact_and_round.py
39 lines (30 loc) · 1.46 KB
/
redact_and_round.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
from redaction_utils import *
from pathlib import Path
from utilities import *
if not (OUTPUT_DIR / "redacted").exists():
Path.mkdir(OUTPUT_DIR / "redacted")
breakdowns=[
"age_band",
"sex",
"imdQ5",
"region",
"ethnicity",
"nhome",
"learning_disability",
"care_home_type"
]
med_review_type=["smr", "mr"]
#Redact counts <=7 then round counts to nearest 5
for med_review in med_review_type:
df = pd.read_csv(OUTPUT_DIR / f"joined/measure_{med_review}_population_rate.csv", parse_dates=["date"])
if (med_review=="smr"):
df = df.loc[(df['date'] >= '2020-01-01')] #Filter to only include dates inc and after Jan 2020
df = redact_small_numbers(df, n=7, rounding_base=5, numerator=f'had_{med_review}', denominator="population", rate_column="value", date_column="date")
df.to_csv(OUTPUT_DIR / f"redacted/redacted_measure_{med_review}_population_rate.csv", index=False,)
for breakdownby in breakdowns:
df = pd.read_csv(OUTPUT_DIR / f"joined/measure_{med_review}_{breakdownby}_rate.csv", parse_dates=["date"])
if (med_review=="smr"):
df = df.loc[(df['date'] >= '2020-01-01')] #Filter to only include dates inc and after Jan 2020
df = redact_small_numbers(df, n=7, rounding_base=5, numerator=f'had_{med_review}', denominator="population", rate_column="value", date_column="date")
df.to_csv(OUTPUT_DIR / f"redacted/redacted_measure_{med_review}_{breakdownby}_rate.csv", index=False,)