# Quick analysis

In [1]:
## Import libraries

%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
from datetime import date, datetime
from IPython.display import display, Markdown


In [2]:
# import data

df = pd.read_feather("../output/input.feather")
#df = pd.read_csv("../output/input.csv")

maxdate = df["any_admission_date"].max()
display(maxdate)
display(df["outpatient_covid_therapeutic_name"].unique())

'2022-04-21'

array(['sotrovimab', nan, 'paxlovid', 'molnupiravir', 'remdesivir',
       'casirivimab and imdevimab'], dtype=object)

In [4]:

fields = {
'Outpatient':
    ['outpatient_covid_therapeutic_date',
     'elective_or_op',
     'elective_short_stay', 
     'daycase_admission_date',
     'elective_x892_date', 
     'elective_x292_date', 
     'hospital_attendance_date'],
'Inpatient':      
    ['inpatient_covid_therapeutic_date',
     'any_admission_date', 
     'any_admission_x892_date',
     'any_admission_x292_date']
}

for x in fields:
    f = fields[x][0]
    display(Markdown(f"## {x}"))

    # filter to treatment dates within available SUS data range
    df1 = df.copy().loc[(df[f] <= maxdate)]

    if x=="Inpatient": # don't count admissions if discharge date was after treatment date
        for c in fields[x][1:]: # for each admission type
            # compare discharge date with treatment date and remove admission date if not in window
            df1.loc[df1[f] > df1[c.replace("admission", "discharge")], c] = np.nan
            display(Markdown(f"Note: for inpatients, recent spells may not yet have completed so some data may be missing"))
    
    # create df for mabs and separate df for Antivirals
    name_field = f'{x.lower()}_covid_therapeutic_name'
    df1[name_field] = df1[name_field].astype(str).fillna("")
    df_mab = df1.loc[df1[name_field].str.contains("mab")]
    df_non_mab = df1.loc[~df1[name_field].str.contains("mab")]
    treatments = {"MABs":df_mab,
                  "Antivirals":df_non_mab}

    # Breakdown by treatment type (MABs/Avs)
    for t in treatments:
        df_t = treatments[t]
        summary1 = pd.DataFrame(df_t[fields[x]].count()).rename(columns={0:"count"})
        summary1["percent"] = (100*(summary1["count"]/summary1["count"][f])).fillna(0).round(2).astype(int)
        summary1.index = summary1.index.str.replace("_date","")
        display(Markdown(f"### {t}"), summary1)

    # Breakdown by region and age group (MABs only, most inclusive hospital record field only)
    for col in ["region_covid_therapeutics","age_group"]:
        summary2 = pd.DataFrame(df_mab.groupby(col)\
                [fields[x][0:2]].count()).rename(columns={0:"count"})
        summary2["percent"] = (100*summary2[fields[x][1]]/summary2[f]).fillna(0).round(2).astype(int)
        summary2.columns = summary2.columns.str.replace("_date","")
        display(Markdown("### Breakdown by region and age group (MABs only)"), summary2)

## Outpatient

### MABs

Unnamed: 0,count,percent
outpatient_covid_therapeutic,250,100
elective_short_stay,250,100
daycase_admission,18,7
elective_x892,5,2
elective_x292,11,4
hospital_attendance,13,5


### Antivirals

Unnamed: 0,count,percent
outpatient_covid_therapeutic,542,100
elective_short_stay,542,100
daycase_admission,19,3
elective_x892,34,6
elective_x292,33,6
hospital_attendance,27,4


### Breakdown by region and age group (MABs only)

Unnamed: 0_level_0,outpatient_covid_therapeutic,elective_short_stay,percent
region_covid_therapeutics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East,19,19,100
East Midlands,21,21,100
London,49,49,100
North East,30,30,100
North West,25,25,100
South East,26,26,100
South West,27,27,100
West Midlands,28,28,100
Yorkshire and The Humber,25,25,100


### Breakdown by region and age group (MABs only)

Unnamed: 0_level_0,outpatient_covid_therapeutic,elective_short_stay,percent
age_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12-24,20,20,100
25-34,28,28,100
35-44,44,44,100
45-54,49,49,100
55-64,22,22,100
65-74,27,27,100
75+,24,24,100
missing,36,36,100


## Inpatient

### MABs

Unnamed: 0,count,percent
inpatient_covid_therapeutic,262,100
any_admission,102,38
any_admission_x892,76,29
any_admission_x292,78,29


### Antivirals

Unnamed: 0,count,percent
inpatient_covid_therapeutic,532,100
any_admission,198,37
any_admission_x892,133,25
any_admission_x292,132,24


### Breakdown by region and age group (MABs only)

Unnamed: 0_level_0,inpatient_covid_therapeutic,any_admission,percent
region_covid_therapeutics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East,23,5,21
East Midlands,25,6,24
London,47,12,25
North East,28,15,53
North West,27,14,51
South East,31,12,38
South West,21,9,42
West Midlands,28,12,42
Yorkshire and The Humber,32,17,53


### Breakdown by region and age group (MABs only)

Unnamed: 0_level_0,inpatient_covid_therapeutic,any_admission,percent
age_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12-24,23,8,34
25-34,26,8,30
35-44,51,18,35
45-54,47,15,31
55-64,22,12,54
65-74,43,18,41
75+,22,10,45
missing,28,13,46
