In [1]:
import pandas as pd
from utilities import *


sentinel_measures = ["qrisk2", "asthma", "copd", "sodium", "cholesterol", "alt", "tsh", "alt", "rbc", 'hba1c', 'systolic_bp', 'medication_review']

demographics = ['region', 'age_band', 'imd', 'sex', 'learning_disability', 'ethnicity']

values_dict = {}


dates = ['2019-04-01', '2020-04-01', '2021-04-01']

differences_list = []


def classify_changes(changes):
    """Classifies list of % changes

    Args:
        changes: list of percentage changes
    """
    
    if (-15 <= changes[0] < 15) and (-15 <= changes[1] < 15):
        classification = 'no change'
        
    elif (changes[0] > 15) or (changes[1] > 15):
        classification = 'increase'
    
    elif (changes[0] <= -15) and not (-15 <= changes[1] < 15) :
        classification = 'sustained drop'
    
    elif (changes[0] <= -15) and (-15 <= changes[1] < 15) :
        classification = 'recovery'
    
    else:
        classification = 'none'
    
    return classification

for measure in sentinel_measures:
    
    total_df = load_and_drop(measure, practice=True)
    total_df['rate'] = (total_df[measure] / total_df['population'])*1000
    total_df = total_df.groupby(by=['date'])[['rate']].mean().reset_index()
    
    totals_dict = {}
    for date in dates:
        val = total_df[total_df['date'] == date]['rate']
        totals_dict[date] = val
    
    
        

    
    for d in demographics:
        df = pd.read_csv(f'../output/combined_measure_{measure}_{d}.csv', parse_dates=['date']).sort_values(['date'])
        
        
        if d == 'ethnicity':
            
            #drop missing ethnicity :('0')
            df = df[df['ethnicity'] != 0]
            
            # replace with strings
            ethnicity_codes = {1.0: "White", 2.0: "Mixed", 3.0: "Asian", 4.0: "Black", 5.0:"Other"}
            df = df.replace({"ethnicity": ethnicity_codes})
            
        elif d == 'age_band':
            df = df[df['age_band'] != 'missing']
            
        elif d == 'learning_disability':
            ld_dict = {0: 'No record of a learning disability', 1: 'Record of a learning disability'}
            df = df.replace({"learning_disability": ld_dict})
        
        
        if d != 'age_band':
            df['rate'] = df[measure]/(df['population']/1000)
        
        
      
        for unique_category in df[d].unique():
            df_subset = df[df[d] == unique_category]
            
            
            date_values = {}
            date_changes = {}
            
            for date in dates:
                val = df_subset[df_subset['date']==date]['rate'].values[0]
                total_val = totals_dict[date].values[0]
             

                difference = round(((val - total_val) / total_val)*100, 2)
             
                date_values[date]=val
                date_changes[date] = difference

            classification = classify_changes([date_changes["2020-04-01"], date_changes["2021-04-01"]])
            row = [measure, d, unique_category, date_values["2019-04-01"], date_changes["2019-04-01"], date_values["2020-04-01"], date_changes["2020-04-01"], date_values["2021-04-01"], date_changes["2021-04-01"], classification]
            differences_list.append(row)
        
 
            
   
    
differences_df =pd.DataFrame(differences_list, columns=['measure', 'demographic', 'demographic_subset', '2019_val', '2019_change', '2020_val', '2020_change', '2021_val', '2021_change', 'classification'])
differences_df.to_csv('../output/demographics_differences.csv')

differences_df_sorted = differences_df.reindex(differences_df['2021_change'].abs().sort_values(ascending=False).index)
differences_df_sorted.to_csv('../output/demographics_differences_sorted.csv')

Unnamed: 0,measure,demographic,demographic_subset,2019_val,2019_change,2020_val,2020_change,2021_val,2021_change,classification
1,qrisk2,region,Yorkshire and the Humber,,,,,833.333333,57.29,increase
126,cholesterol,region,Yorkshire and the Humber,777.777778,40.89,600.000000,27.15,750.000000,47.98,increase
208,tsh,ethnicity,Black,,,,,727.272727,47.70,increase
299,hba1c,ethnicity,Black,583.333333,7.00,636.363636,28.96,818.181818,47.67,increase
65,copd,region,West Midlands,,,562.500000,31.54,750.000000,45.81,increase
...,...,...,...,...,...,...,...,...,...,...
355,medication_review,ethnicity,Mixed,,,,,,,none
356,medication_review,ethnicity,White,,,,,,,none
357,medication_review,ethnicity,Asian,,,,,,,none
358,medication_review,ethnicity,Black,,,,,,,none
