# Measuring uptake of inhaled budesonide prescription guidance following COVID-19 infection

## Setup, data loading and prepreprocessing

In [None]:
import pandas as pd

In [None]:
## low count suppression function
def suppress(df_to_suppress,columns,n=10):
    for c in columns:
        if not c in df_to_suppress.columns:
            raise ValueError(f'column {c} not in dataframe')

    df = df_to_suppress
    for c in df.columns[df.columns.isin(columns)]:
        df[f'{c}'] = df[f'{c}'].astype(str)
        df[f'{c}'] = df[f'{c}'].apply(lambda x: '<10' if (int(x)>0 and int(x)<n ) else x)
    return df

Import output from cohort extractor with following exclusions already applied:

```
NOT has_died
AND registered
AND
(age_band = "65_plus" OR (age_band = "50_65" AND (primis_shield OR primis_nonshield))) 
AND
(sex = "M" OR sex = "F")
AND NOT covid_admission
AND NOT covid_emergency_admission
AND NOT has_previous_steroid_prescription
AND NOT primary_covid_hospital_admission
AND first_positive_test_type != "LFT_Only"
```

Expected row count is c.8000 (as of 2021-07-30)


In [None]:
df = pd.read_csv('../output/input.csv')

In [None]:
print(f'Total input row count: {len(df.index)}')

In [None]:
print(f'Most recent positive test date: {pd.to_datetime(df.first_positive_test_date).max()}')

Apply labels for three cohorts based on age at time of positive COVID-19 test and PRIMIS sheilding flags (estimated expected proportion):
* over 65s (85%)
* over 50s (15%)
  - with shielding flag (5%)
  - with non-shielding flag (10%)

In [None]:
df['cohort'] = df.apply(lambda x: x.age_band if x.age_band == '65_plus' else str(x.age_band) + ('H' if x.primis_shield==1 else 'L'),axis=1)
df_cohort_counts = df.groupby('cohort').count()['patient_id'].reset_index().rename(columns={'patient_id':'Patient count',"cohort":"Cohort"})
suppressed_df_cohort_counts = suppress(df_cohort_counts,['Patient count'])
suppressed_df_cohort_counts.style.hide_index()

## COVID-19 Test Types

Only patients with "PCR_Only" (expected ~82%) and "LFT_WithPCR" (18%) positive COVID-19 test results included

In [None]:
df_test_counts = df.groupby('first_positive_test_type').count()['patient_id'].reset_index().rename(columns={'patient_id':'Patient count',"first_positive_test_type":"Type of first recorded positive test"})
supressed_test_type_counts = suppress(df_test_counts,['Patient count'])
supressed_test_type_counts.style.hide_index()

## Budesonide prescriptions by cohort

In [None]:
supressed_bd_prescriptions_all = suppress(df.groupby('budesonide_prescription').count()['patient_id'].reset_index().rename(columns={"budesonide_prescription":"Count of budesonide prescriptions","patient_id":"Count of patients"}),["Count of patients"])
supressed_bd_prescriptions_all['cohort'] = 'All'

supressed_bd_prescriptions_by_cohort = suppress(df.groupby(['cohort','budesonide_prescription']).count()['patient_id'].reset_index().rename(columns={"budesonide_prescription":"Count of budesonide prescriptions","patient_id":"Count of patients"}),['Count of patients'])
supressed_bd_prescriptions_by_cohort = pd.concat([supressed_bd_prescriptions_all,supressed_bd_prescriptions_by_cohort],ignore_index=True)
supressed_bd_prescriptions_by_cohort.style.hide_index()

## Budesonide prescriptions by region

In [None]:
supressed_bd_prescriptions_by_region = suppress(df.groupby(['region','budesonide_prescription']).count()['patient_id'].reset_index().rename(columns={"budesonide_prescription":"Count of budesonide prescriptions","patient_id":"Count of patients"}),['Count of patients'])
supressed_bd_prescriptions_by_region = pd.concat([supressed_bd_prescriptions_all.rename(columns={'cohort':'region'}),supressed_bd_prescriptions_by_region],ignore_index=True)
supressed_bd_prescriptions_by_region.style.hide_index()