# Service Restoration Observatory

# Changes occuring in the provision of {measure}: {Start date} - {end date}

Below are various time-series graphs showing changes in total nationwide {measure} provision as well as a breakdown by region and other demographic variables.  The following graphs are available:

* [Total {measure} number](#total)
    * [Total {measure} number by child codes](#child-codes)
* [Total {measure} number by practice](#practice)
* [Breakdown by region](#region)
* [Breakdown by age band](#age)
* [Breakdown by sex](#sex)


#### Methods
Using OpenSAFELY-TPP, covering 40% of England's population, we have assessed coding activity related to {measure} in general practice from the beginning of {start date} until the end of {end date}. The codelist used can be found here at [OpenSAFELY Codelists](link to codelist).  For each month within the study period, we have calculated the rate at which the code was recorded per 1000 registered patients.

All analytical code and output is available for inspection at the [OpenSAFELY GitHub repository](link to repo).

In [31]:
import sys
import pandas as pd
import numpy as np
import plotly
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from IPython.display import HTML
from IPython.core.display import HTML as Center
from utilities import *

%matplotlib inline

Center(""" <style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style> """)

In [56]:
# Load measures df
measures_df_total = pd.read_csv('../output/measure_1_total.csv')
measures_df_event_code = pd.read_csv('../output/measure_1_event_code.csv')
measures_df_practice = pd.read_csv('../output/measure_1_practice_only.csv')
measures_df_by_region = pd.read_csv('../output/measure_1_by_region.csv')
measures_df_by_age = pd.read_csv('../output/measure_1_by_age_band.csv')
measures_df_by_sex = pd.read_csv('../output/measure_1_by_sex.csv')
measures_df_by_imd = pd.read_csv('../output/measure_1_by_imd.csv')
measures_df_by_ethnicity = pd.read_csv('../output/measure_1_by_ethnicity.csv')

codelist = pd.read_csv('../codelists/martinaf-online-consultations-snomed-v01-28bba9bc.csv')
codelist.round(16)
measures_df_event_code.round(16)

# temporary fix for population not working in Measures
measures_df_total = measures_df_total.groupby(
    ['date'])['event_x', 'population'].sum().reset_index()
measures_df_total['value'] = measures_df_total['event_x'] /measures_df_total['population']



# Get measures

class Measure:
  def __init__(self, id, numerator, denominator, group_by):
    self.id = id
    self.numerator = numerator
    self.denominator = denominator
    self.group_by = group_by
    
    
measures = [
    Measure(
        id="1_total",
        numerator="event_x",
        denominator="population",
        group_by=None
    ),

    Measure(
        id="1_event_code",
        numerator="event_x",
        denominator="population",
        group_by=["practice", "event_x_event_code"]
    ),

    Measure(
        id="1_practice_only",
        numerator="event_x",
        denominator="population",
        group_by=["practice"]
    ),

    
]


# Convert date column to datetime and sort by date
to_datetime_sort(measures_df_total)
to_datetime_sort(measures_df_event_code)
to_datetime_sort(measures_df_practice)
to_datetime_sort(measures_df_by_region)
to_datetime_sort(measures_df_by_age)
to_datetime_sort(measures_df_by_sex)
to_datetime_sort(measures_df_by_imd)
to_datetime_sort(measures_df_by_ethnicity)

# Redact small numbers from measures df
# redact_small_numbers(measures_df_total, 5, measures[0])

# Calculate rates

calculate_rate(measures_df_total, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_event_code, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_practice, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_by_region, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_by_age, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_by_sex, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_by_imd, value_col='event_x', population_col='population', rate_per=1000)
calculate_rate(measures_df_by_ethnicity, value_col='event_x', population_col='population', rate_per=1000)

# Ethnicity with meaningful label
di = {1.0:"White",2.0:"Mixed",3.0:"Asian",4.0:"Black",5.0:"Other"}
measures_df_by_ethnicity.replace({"ethnicity":di},inplace=True)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



<a id='total'></a>
## Total {measure} Number

In [33]:
plot_measures(measures_df_total, title='Total {Measure} across whole population', measure_id='measure_1', column_to_plot='rate', category=False, y_label='Rate per 1000')

<a id='child-codes'></a>
### Sub totals by child codes

Events for the top x subcodes across the study period

In [34]:
child_table = create_child_table(df=measures_df_event_code, code_df=codelist, code_column='code', term_column='term', measure='event_x')
child_table

IndexError: index 0 is out of bounds for axis 0 with size 0

In [35]:
measures_df_event_code.round(16)

Unnamed: 0,event_x_event_code,event_x,population,value,date,rate
0,1068881000000101,488.0,1021,0.477963,2019-01-01,477.962782
1,1090371000000106,497.0,1025,0.484878,2019-01-01,484.878049
2,325901000000103,498.0,952,0.523109,2019-01-01,523.109244
3,325911000000101,508.0,1007,0.504469,2019-01-01,504.468719
4,325951000000102,477.0,1007,0.473684,2019-01-01,473.684211
...,...,...,...,...,...,...
235,325981000000108,480.0,984,0.487805,2020-12-01,487.804878
236,325991000000105,510.0,997,0.511535,2020-12-01,511.534604
237,384131000000101,505.0,990,0.510101,2020-12-01,510.101010
238,854891000000104,464.0,956,0.485356,2020-12-01,485.355649


In [36]:
plot_measures(measures_df_event_code, title='Total {Measure} across whole populatio for top 5 child codes', measure_id='measure_1', column_to_plot='rate', category='event_x_event_code', y_label='Rate per 1000')

ValueError: 
    Invalid value of type 'numpy.int64' received for the 'name' property of scatter
        Received value: 1068881000000101

    The 'name' property is a string and must be specified as:
      - A string
      - A number that will be converted to a string

<a id='practice'></a>
## Total number by practice

In [38]:
practice_df = pd.read_csv('../output/input_practice_count.csv')
practices_dict =calculate_statistics_practices(measures_df_practice, practice_df,"2020-02-01")
print(f'Practices included entire period: {practices_dict["total"]["number"]} ({practices_dict["total"]["percent"]}%)')
print(f'Practices included within last year: {practices_dict["year"]["number"]} ({practices_dict["year"]["percent"]}%)')
print(f'Practices included within last 3 months: {practices_dict["months_3"]["number"]} ({practices_dict["months_3"]["percent"]}%)')



Practices included entire period: 169 (444.74%)
Practices included within last year: 166 (436.84%)
Practices included within last 3 months: 164 (431.58%)


In [39]:
interactive_deciles_chart(measures_df_practice, period_column='date', column='event_x', title='Decile chart',ylabel='rate per 1000')

<a id="region"></a>
## Breakdown by Region

Number of events within each group.

In [40]:
measures_df_by_region['region'] = measures_df_by_region['region'].replace(np.nan, 'NA')
counts_df = calculate_statistics_demographics(df=measures_df_by_region, demographic_var='region', end_date="2021-02-01", event_column='event_x')
counts_df

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
East Midlands,11972.0,50000.0,5000.0
East of England,11980.0,50000.0,5000.0
London,23941.0,50000.0,5000.0
North East,11880.0,50000.0,5000.0
North West,11985.0,50000.0,5000.0
South East,24166.0,50000.0,5000.0
West Midlands,11953.0,50000.0,5000.0
Yorkshire and the Humber,12123.0,50000.0,5000.0


In [41]:
plot_measures(measures_df_by_region, title='Breakdown by region',measure_id='measure_1', column_to_plot='rate', category='region', y_label='Rate per 1000')

<a id="age"></a>
## Breakdown by Age

Number of events within each group.

In [42]:
counts_df = calculate_statistics_demographics(df=measures_df_by_age, demographic_var='age_band', end_date="2021-02-01", event_column='event_x')
counts_df

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
0,117.0,50000.0,5000.0
0-19,15037.0,50000.0,5000.0
20-29,14846.0,50000.0,5000.0
30-39,14937.0,50000.0,5000.0
40-49,15110.0,50000.0,5000.0
50-59,15009.0,50000.0,5000.0
60-69,14967.0,50000.0,5000.0
70-79,14904.0,50000.0,5000.0
80+,15073.0,50000.0,5000.0


In [43]:
plot_measures(measures_df_by_age, title='Breakdown by age',measure_id='measure_1', column_to_plot='rate', category='age_band', y_label='Rate per 1000')

<a id="sex"></a>
## Breakdown by Sex

Number of events within each group.

In [44]:
counts_df = calculate_statistics_demographics(df=measures_df_by_sex, demographic_var='sex', end_date="2021-02-01", event_column='event_x')
counts_df

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
F,59745.0,50000.0,5000.0
M,59051.0,50000.0,5000.0
U,1204.0,50000.0,5000.0


In [45]:
plot_measures(measures_df_by_sex, title='Breakdown by sex',measure_id='measure_1', column_to_plot='rate', category='sex', y_label='Rate per 1000')

In [46]:
counts_df = calculate_statistics_demographics(df=measures_df_by_imd, demographic_var='imd', end_date="2021-02-01", event_column='event_x')
counts_df

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
Q1,23047.0,50000.0,5000.0
Q2,22596.0,50000.0,5000.0
Q3,22998.0,50000.0,5000.0
Q4,22792.0,50000.0,5000.0
Q5,22643.0,50000.0,5000.0
U,5924.0,50000.0,5000.0


In [47]:
plot_measures(measures_df_by_imd, title='Breakdown by deprivation quintile',measure_id='measure_1', column_to_plot='rate', category='imd', y_label='Rate per 1000')

In [57]:
counts_df = calculate_statistics_demographics(df=measures_df_by_ethnicity, demographic_var='ethnicity', end_date="2021-02-01", event_column='event_x')
counts_df

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
,0.0,50000.0,5000.0
White,17930.0,50000.0,5000.0
Mixed,18053.0,50000.0,5000.0
Asian,17896.0,50000.0,5000.0
Black,17837.0,50000.0,5000.0
Other,18152.0,50000.0,5000.0


In [58]:
plot_measures(measures_df_by_ethnicity, title='Breakdown by ethnicity',measure_id='measure_1', column_to_plot='rate', category='ethnicity', y_label='Rate per 1000')