# Comparing prevalence of PRIMIS codelists - individual codes

In [1]:
import pandas as pd
import numpy as np
import os
from IPython.display import display, Markdown

### Load data

In [2]:
# load data
filepath = os.path.join("..","released-outputs", "by-codelist")
file_list = os.listdir(filepath)

ddict = {}
ddict["tpp"] = {}
ddict["emis"] = {}

for item in file_list:
    string_1 = item.split('age-and-sex_')[1].replace('.csv', "") 
    codelist, system = string_1.split('__')
    df = pd.read_csv(os.path.join(filepath,item)).set_index(["ageband","sex",codelist])
    ddict[system][codelist] = df


In [3]:
# define order in which to look at the codelist (by type)
output_order = ['carehome', 'carer', 'notcarer', 
              'bmi_stage', 'sev_obesity',
              'sev_mental', 'smhres',
              'preg', 'pregdel'
       ]

# define display headers for each group
headers = {'carehome': 'Care staff',  'carer': 'Carer / household',
  'bmi_stage': 'BMI', 
  'preg': 'Pregnancy/delivery',
  'sev_mental': 'MH'}

# load codelists for descriptions
codelists = {}
for c in output_order:
    codelists[c] = pd.read_csv(os.path.join("..","codelists","primis-covid19-vacc-uptake-"+c+".csv"))


In [4]:
pd.set_option('display.max_rows', None)

for i in output_order:
    if i in headers:
        display(Markdown(f"## \n ## {headers[i]}"))
    df_t = ddict["tpp"][i]
    df_t = df_t.rename(columns={"rate_per_1000":"rate_per_1000 TPP"})
    df_e = ddict["emis"][i]
    df_e = df_e.rename(columns={"rate_per_1000":"rate_per_1000 EMIS"})

    df = pd.concat([df_t, df_e], axis=1)#.fillna(0)
    
        # calculate difference in rates
    #df["diff (e - t)"] = df["rate_per_1000 EMIS"] - df["rate_per_1000 TPP"]
    #df["diff (% of t)"] = round((100*df["diff (e - t)"]/df["rate_per_1000 TPP"]),1).fillna(0)
    
    df = df.reset_index()
    codelist = codelists[i]
    df = df.merge(codelist, left_on=i, right_on="code").drop("code",1)
    
    df = df.sort_values(by=["ageband", "sex", "rate_per_1000 TPP", "rate_per_1000 EMIS"], #"diff (% of t)", "diff (e - t)", "rate_per_1000 TPP"],
                            ascending = [True, True, False, False])
    df = df.set_index(["ageband","sex",i,"term"])
    
    display(df)

## 
 ## Care staff

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,carehome,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,1092561000000107,Employed by care home,0.8,0.29
16-39,F,158942005,Residential child care worker,0.0,0.03
16-39,F,158943000,Residential youth care worker,0.0,0.02
16-39,M,1092561000000107,Employed by care home,0.1,0.05
16-39,M,158942005,Residential child care worker,0.0,0.01
16-39,M,158943000,Residential youth care worker,0.0,0.01
40-69,F,1092561000000107,Employed by care home,1.3,0.49
40-69,F,158942005,Residential child care worker,0.1,0.06
40-69,F,158943000,Residential youth care worker,0.0,0.04
40-69,F,158944006,Matron (old people's home),0.0,0.01


## 
 ## Carer / household

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,carer,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,229774002,Caregiver,8.7,0.5
16-39,F,224484003,Patient themselves providing care,4.6,19.1
16-39,F,407542009,Informal caregiver,1.1,
16-39,F,302767002,Cares for a relative,0.8,0.3
16-39,F,407543004,Primary caregiver,0.5,0.2
16-39,F,413761004,Caregiver of a person with learning disability,0.1,0.1
16-39,F,413760003,Caregiver of a person with chronic disease,0.1,0.0
16-39,F,413763001,Caregiver of a person with physical disability,0.1,0.0
16-39,F,276040005,Looks after someone,0.1,
16-39,F,413762006,Caregiver of a person with mental health problem,0.0,0.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,notcarer,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,506401000000109,Not a carer,14.0,45.6
16-39,F,199361000000101,Is no longer a carer,2.8,2.9
16-39,F,933581000000105,No longer carer of patient with dementia,,0.0
16-39,M,506401000000109,Not a carer,9.8,31.2
16-39,M,199361000000101,Is no longer a carer,1.8,1.2
16-39,M,933581000000105,No longer carer of patient with dementia,,0.0
40-69,F,506401000000109,Not a carer,11.0,39.2
40-69,F,199361000000101,Is no longer a carer,8.4,8.6
40-69,F,933581000000105,No longer carer of patient with dementia,0.0,0.0
40-69,M,506401000000109,Not a carer,10.3,36.1


## 
 ## BMI

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,bmi_stage,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,162864005,Body mass index 30+ - obesity,21.1,16.6
16-39,F,412768003,Body mass index 20-24 - normal,11.0,7.6
16-39,F,162863004,Body mass index 25-29 - overweight,10.0,7.2
16-39,F,35425004,Normal body mass index,7.3,6.3
16-39,F,310252000,Body mass index less than 20,5.8,4.9
16-39,F,408512008,Body mass index 40+ - severely obese,4.6,3.9
16-39,F,6497000,Decreased body mass index,1.6,1.6
16-39,F,914721000000105,Obese class I (body mass index 30.0 - 34.9),0.6,0.5
16-39,F,914731000000107,Obese class II (body mass index 35.0 - 39.9),0.5,0.3
16-39,F,914741000000103,Obese class III (body mass index equal to or greater than 40.0),0.5,0.3


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,sev_obesity,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,408512008,Body mass index 40+ - severely obese,5.2,4.3
16-39,F,914741000000103,Obese class III (body mass index equal to or greater than 40.0),0.6,0.3
16-39,M,408512008,Body mass index 40+ - severely obese,1.6,1.4
16-39,M,914741000000103,Obese class III (body mass index equal to or greater than 40.0),0.2,0.1
40-69,F,408512008,Body mass index 40+ - severely obese,13.2,11.3
40-69,F,914741000000103,Obese class III (body mass index equal to or greater than 40.0),1.3,0.7
40-69,M,408512008,Body mass index 40+ - severely obese,6.5,5.2
40-69,M,914741000000103,Obese class III (body mass index equal to or greater than 40.0),0.7,0.4
70+,F,408512008,Body mass index 40+ - severely obese,8.7,7.6
70+,F,914741000000103,Obese class III (body mass index equal to or greater than 40.0),0.7,0.4


## 
 ## MH

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,sev_mental,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,13746004,Bipolar disorder,2.2,2.6
16-39,F,69322001,Psychotic disorder,1.0,1.1
16-39,F,64905009,Paranoid schizophrenia,0.3,0.4
16-39,F,58214004,Schizophrenia,0.3,0.3
16-39,F,83225003,Bipolar II disorder,0.3,0.2
16-39,F,85248005,Bipolar disorder in remission,0.3,0.1
16-39,F,191525009,Non-organic psychosis,0.2,0.3
16-39,F,191667009,Paranoid disorder,0.2,0.3
16-39,F,68890003,Schizoaffective disorder,0.2,0.2
16-39,F,755311000000100,Non-organic psychosis in remission,0.2,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,smhres,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,85248005,Bipolar disorder in remission,0.39,0.21
16-39,F,755311000000100,Non-organic psychosis in remission,0.2,0.11
16-39,F,755301000000102,Paranoid state in remission,0.14,0.05
16-39,F,755321000000106,"Single major depressive episode, severe, with psychosis, psychosis in remission",0.12,0.05
16-39,F,4926007,Schizophrenia in remission,0.08,0.07
16-39,F,191643001,"Mixed bipolar affective disorder, in full remission",0.08,
16-39,F,41836007,Bipolar disorder in full remission,0.07,0.05
16-39,F,755331000000108,"Recurrent major depressive episodes, severe, with psychosis, psychosis in remission",0.05,0.03
16-39,F,191625000,"Bipolar affective disorder, currently manic, in full remission",0.05,
16-39,F,191588009,Single manic episode in full remission,0.04,0.03


## 
 ## Pregnancy/delivery

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,preg,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,77386006,Pregnant,42.7,43.1
16-39,F,48782003,Delivery normal,7.4,9.7
16-39,F,11687002,Gestational diabetes mellitus,3.5,3.3
16-39,F,268445003,Ultrasound scan - obstetric,2.7,1.9
16-39,F,289256000,Mother delivered,2.1,
16-39,F,14094001,Excessive vomiting in pregnancy,1.7,2.0
16-39,F,424525001,Antenatal care,1.1,1.3
16-39,F,169711001,Antenatal booking examination,0.7,
16-39,F,25825004,Hemorrhage in early pregnancy,0.6,1.2
16-39,F,47200007,High risk pregnancy,0.5,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,rate_per_1000 TPP,rate_per_1000 EMIS
ageband,sex,pregdel,term,Unnamed: 4_level_1,Unnamed: 5_level_1
16-39,F,77386006,Pregnant,26.4,25.9
16-39,F,309469004,Spontaneous vertex delivery,7.8,4.6
16-39,F,169826009,Single live birth,7.6,5.1
16-39,F,48782003,Delivery normal,4.6,6.0
16-39,F,17369002,Miscarriage,2.6,4.4
16-39,F,274130007,Emergency cesarean section,2.1,3.1
16-39,F,11687002,Gestational diabetes mellitus,2.1,
16-39,F,177141003,Elective cesarean section,2.0,2.6
16-39,F,11466000,Cesarean section,2.0,
16-39,F,156073000,Complete miscarriage,2.0,
