In [4]:
import pandas as pd
from utilities import *


sentinel_measures = ["qrisk2", "asthma", "copd", "sodium", "cholesterol", "alt", "tsh", "alt", "rbc", 'hba1c', 'systolic_bp', 'medication_review']

demographics = ['region', 'age_band', 'imd', 'sex', 'learning_disability', 'ethnicity']

values_dict = {}


dates = ['2019-04-01', '2020-04-01', '2021-04-01']

differences_list = []

for measure in sentinel_measures:
    
    total_df = load_and_drop(measure, practice=True)
    total_df['rate'] = (total_df[measure] / total_df['population'])*1000
    total_df = total_df.groupby(by=['date'])[['rate']].mean().reset_index()
    
    totals_dict = {}
    for date in dates:
        val = total_df[total_df['date'] == date]['rate']
        totals_dict[date] = val
    
    
        

    
    for d in demographics:
        df = pd.read_csv(f'../output/combined_measure_{measure}_{d}.csv', parse_dates=['date']).sort_values(['date'])
        
        
        if d == 'ethnicity':
            
            #drop missing ethnicity :('0')
            df = df[df['ethnicity'] != 0]
            
            # replace with strings
            ethnicity_codes = {1.0: "White", 2.0: "Mixed", 3.0: "Asian", 4.0: "Black", 5.0:"Other"}
            df = df.replace({"ethnicity": ethnicity_codes})
            
        elif d == 'age_band':
            df = df[df['age_band'] != 'missing']
            
        elif d == 'learning_disability':
            ld_dict = {0: 'No record of a learning disability', 1: 'Record of a learning disability'}
            df = df.replace({"learning_disability": ld_dict})
        
        
        if d != 'age_band':
            df['rate'] = df[measure]/(df['population']/1000)
        
        
      
        for unique_category in df[d].unique():
            df_subset = df[df[d] == unique_category]
            
            
            date_values = {}
            date_changes = {}
            
            for date in dates:
                val = df_subset[df_subset['date']==date]['rate'].values[0]
                total_val = totals_dict[date].values[0]
             

                difference = round(((val - total_val) / total_val)*100, 2)
             
                date_values[date]=val
                date_changes[date] = difference

         
            row = [measure, d, unique_category, f'{date_values["2019-04-01"]} ({date_changes["2019-04-01"]})', f'{date_values["2020-04-01"]} ({date_changes["2020-04-01"]})', f'{date_values["2021-04-01"]} ({date_changes["2021-04-01"]})']
            differences_list.append(row)
        
 
            
   
    
differences_df =pd.DataFrame(differences_list, columns=['measure', 'demographic', 'demographic_subset', '2019-04-01', '2020-04-01', '2021-04-01'])
differences_df.to_csv('../output/demographics_differences.csv') 

Unnamed: 0,measure,demographic,demographic_subset,2019-04-01,2020-04-01,2021-04-01
0,qrisk2,region,North East,750.0 (54.76),nan (nan),687.5 (29.77)
1,qrisk2,region,Yorkshire and the Humber,nan (nan),nan (nan),833.3333333333334 (57.29)
2,qrisk2,region,West Midlands,nan (nan),375.0 (-25.51),500.0 (-5.62)
3,qrisk2,region,South East,562.5 (16.07),523.8095238095237 (4.05),666.6666666666666 (25.83)
4,qrisk2,region,North West,600.0 (23.81),nan (nan),nan (nan)
...,...,...,...,...,...,...
355,medication_review,ethnicity,Mixed,nan (nan),nan (nan),nan (nan)
356,medication_review,ethnicity,White,nan (nan),nan (nan),nan (nan)
357,medication_review,ethnicity,Asian,nan (nan),nan (nan),nan (nan)
358,medication_review,ethnicity,Black,nan (nan),nan (nan),nan (nan)
