In [None]:
import pandas as pd

In [None]:
## low count suppression function
def suppress(df_to_suppress,columns,n=10):
    for c in columns:
        if not c in df_to_suppress.columns:
            raise ValueError(f'column {c} not in dataframe')

    df = df_to_suppress
    for c in df.columns[df.columns.isin(columns)]:
        df[f'{c}'] = df[f'{c}'].astype(str)
        df[f'{c}'] = df[f'{c}'].apply(lambda x: '<10' if (int(x)>0 and int(x)<n ) else x)
    return df

In [None]:
def suppress_scalar(val):
    return '<10' if int(val) <10 else val

In [None]:
def print_remainder(df):
    print(f'Remaining people: {suppress_scalar(len(df.index))}')

In [None]:
df = pd.read_csv('../output/input_exclusioncounts.csv')

## Preliminary exclusions
Remove people who have died or are no longer registered at the practice at the time of study

In [None]:
print(f'People having been recorded as having died: {suppress_scalar(len(df[df.has_died == 1].index))}')
df = df[df.has_died == 0]
print_remainder(df)

In [None]:
print(f'People having been recorded as not being registered: {suppress_scalar(len(df[df.registered == 0].index))}')
df = df[df.registered == 1]
print_remainder(df)

## Sex
Include people recorded as having a sex of 'M' or 'F'

In [None]:
df = df[(df.sex == 'M') | (df.sex == 'F')]
print_remainder(df)

## Age
Three cohorts included based on age at time of positive COVID-19 test and PRIMIS sheilding flags (estimated expected proportion):
* over 65s (85%)
* over 50s (15%)
  - with shielding flag (5%)
  - over 50s with non-shielding flag (10%)

In [None]:
df['age_band'] =  df.apply(lambda x: '65_plus' if x.age>=65 else '50_65' if (x.age>=55 and x.age<65) else 'lt_50',axis=1)
df['cohort'] = df.apply(lambda x: x.age_band if x.age_band == '65_plus' else x.age_band if x.age_band == 'lt_50' else str(x.age_band) + ('H' if x.primis_shield==1 else 'L' if x.primis_nonshield==1 else 'N'),axis=1)
df_cohort_counts = df.groupby('cohort').count()['patient_id'].reset_index().rename(columns={'patient_id':'Patient count',"cohort":"Cohort"})
suppressed_df_cohort_counts = suppress(df_cohort_counts,['Patient count'])
suppressed_df_cohort_counts.style.hide_index()

In [None]:
#drop under 50s and 50-65s with no COVID risk status
df=df[(df.cohort!='lt_50')]
df=df[(df.cohort!='50_65N')]
print_remainder(df)

## PCR Testing
Only patients identified as having COVID-19 by PCR testing to be included in this study

In [None]:
print(f'Most recent positive test date: {pd.to_datetime(df.first_positive_test_date).max()}')

In [None]:
excl_count = len(df[df.first_positive_test_type == "LFT_Only"].index)
print(f'Patients identified as COVID-positive only by lateral flow test {suppress_scalar(excl_count)}')
df = df[df.first_positive_test_type != "LFT_Only"]
print_remainder(df)

# Hospital Admissions
Exclude persons identified as 
* having been admitted to hospital with a
  - diagnosis of COVID-19 or 
  - primary diagnosis of COVID-19
* having attended amergency care with a diagnosis of COVID-19

In [None]:
df_hosp = df.groupby(['primary_covid_hospital_admission','covid_admission','covid_emergency_admission']).count()['patient_id'].reset_index()
df_hosp.insert(0,'cohort','All')
df_hosp = pd.concat([df_hosp,df.groupby(['cohort','primary_covid_hospital_admission','covid_admission','covid_emergency_admission']).count()['patient_id'].reset_index()],ignore_index=True)
df_hosp.rename(columns={'patient_id':'Count of patients','primary_covid_hospital_admission':'Admitted w/primary CV19 diagnosis','covid_admission':'Admitted w/CV19 diagnosis','covid_emergency_admission':'Emergency Admission w/CV19 diagnosis'},inplace=True)
df_hosp = suppress(df_hosp,['Count of patients'])
df_hosp.style.hide_index()

# Previous Steroid Prescription
Exclude patients recorded as having recieved a prescription for inhaled or oral steroids in the three months (90 days) leading up the issuance of the guidance

In [None]:
df_prev_ccs = df.groupby('has_previous_steroid_prescription').count()['patient_id'].reset_index()
df_prev_ccs.insert(0,'cohort','All')
df_prev_ccs = pd.concat([df_prev_ccs,df.groupby(['cohort','has_previous_steroid_prescription']).count()['patient_id'].reset_index()],ignore_index=True)
df_prev_ccs.rename(columns={'patient_id':'Count of patients','has_previous_steroid_prescription':'Having previous corticosteroid prescription'},inplace=True)
df_prev_ccs = suppress(df_prev_ccs,['Count of patients'])
df_prev_ccs.style.hide_index()

# Hospitalisation x Previous Steroid Prescription
Cross-tabluation of previous two exclusion criteria to understand interactions

In [None]:
hospXccs = pd.pivot_table(df,values='patient_id',index=['primary_covid_hospital_admission','covid_admission','covid_emergency_admission'],columns='has_previous_steroid_prescription',aggfunc='count',fill_value=0,margins=True)
hospXccs.rename(columns=lambda y: f'{y}',inplace=True)
hospXccs = suppress(hospXccs,hospXccs.columns)
hospXccs

In [None]:
df=df[(df.primary_covid_hospital_admission==0)&(df.covid_admission==0)&(df.covid_emergency_admission==0)&(df.has_previous_steroid_prescription==0)]
print_remainder(df)

## GP Consultation
The presence of a GP consultation within 7 days prior to, or 7 days post the first positive test result is included as an explanatory variable rather than an exclusion criteria due to uncertainty about its recording.

In [None]:
excl_count = len(df[df.with_consultation==0].index)
print(f'Patients without GP consultation +/- 7 days from positive test {suppress_scalar(excl_count)}')
df = df[df.with_consultation == 1]
print_remainder(df)