In [23]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from IPython.display import HTML
from IPython.display import Markdown as md
from IPython.core.display import HTML as Center
from utilities import *
from config import marker, start_date, end_date, demographics, codelist_code_column, codelist_term_column


%matplotlib inline


Center('''<style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style>''')

class Measure:
  def __init__(self, id, numerator, denominator, group_by):
    self.id = id
    self.numerator = numerator
    self.denominator = denominator
    self.group_by = group_by
    
measures = [

    Measure(
        id="total",
        numerator="event",
        denominator="population",
        group_by=["age_band"]
    ),

    Measure(
        id="event_code",
        numerator="event",
        denominator="population",
        group_by=["age_band","event_code"]
    ),

    Measure(
        id="practice",
        numerator="event",
        denominator="population",
        group_by=["age_band","practice"]
    ),


]

for d in demographics:
    if d=='age_band':
        m = Measure(
        id=d,
        numerator="event",
        denominator="population",
        group_by=["age_band"]
        )
    else:
        m = Measure(
            id=d,
            numerator="event",
            denominator="population",
            group_by=["age_band", d]
        )
    measures.append(m)

default_measures = ['total', 'event_code', 'practice']
measures_ids = default_measures+ demographics
measures_dict = {}

for m in measures:
    measures_dict[m.id] = m

#if ethnicity in demographics create measures csv

if 'ethnicity' in demographics:
    ethnicity_data = []
    for file in os.listdir('../output'):
    
        if file.startswith('input'):
            #exclude ethnicity and practice
            if file.split('_')[1] not in ['ethnicity.csv', 'practice']:
                
                file_path = os.path.join('../output', file)
                date = file.split('_')[1][:-4]
                df = pd.read_csv(file_path)
                df['date'] = date
             
                ethnicity_data.append(df)
        
                
    ethnicity_df = pd.concat(ethnicity_data)

    population = ethnicity_df.groupby(by=['age_band', 'ethnicity', 'date']).size().reset_index(name='population')

    event = ethnicity_df.groupby(by=['age_band', 'ethnicity', 'date'])[['event', 'date']].sum().reset_index()

    measures_df = population.merge(event, on=['age_band', 'ethnicity', 'date'])
  
    measures_df.to_csv('../output/measure_ethnicity.csv')




In [24]:
display(
md("# Service Restoration Observatory"),
md(f"## Changes in {marker} between {start_date} and {end_date}"),
md(f"Below are various time-series graphs showing changes in {marker} code use."),
)


# Service Restoration Observatory

## Changes in SMR between 2020-12-01 and 2021-12-01

Below are various time-series graphs showing changes in SMR code use.

In [25]:
display(
md("### Methods"),
md(f"Using OpenSAFELY-TPP, covering 40% of England's population, we have assessed coding activity related to {marker} between {start_date} and {end_date}. The codelist used can be found here at [OpenSAFELY Codelists](https://codelists.opensafely.org/).  For each month within the study period, we have calculated the rate at which the code was recorded per 1000 registered patients."),
md(f"All analytical code and output is available for inspection at the [OpenSAFELY GitHub repository](https://github.com/opensafely")
)


### Methods

Using OpenSAFELY-TPP, covering 40% of England's population, we have assessed coding activity related to SMR between 2020-12-01 and 2021-12-01. The codelist used can be found here at [OpenSAFELY Codelists](https://codelists.opensafely.org/).  For each month within the study period, we have calculated the rate at which the code was recorded per 1000 registered patients.

All analytical code and output is available for inspection at the [OpenSAFELY GitHub repository](https://github.com/opensafely

In [26]:
default_measures = ['total', 'event_code', 'practice']
measures = default_measures+ demographics

data_dict = {}

for key, value in measures_dict.items():
    
    df = pd.read_csv(f'../output/measure_{value.id}.csv')
    if key == "event_code":
        df.round(16)
    
    to_datetime_sort(df)
    
    if value.id=='age_band':
        data_dict[value.id] = calculate_rate(df, m=value, rate_per=1000, return_age=True)

    else:
        data_dict[value.id] = calculate_rate(df, m=value, rate_per=1000)




codelist = pd.read_csv('../codelists/opensafely-structured-medication-review-nhs-england.csv')
codelist = codelist.round(16)


In [27]:
display(
md(f"## Total {marker} Number")
)


## Total SMR Number

In [28]:
plot_measures(data_dict['total'], title=f"Total {marker} across whole population", column_to_plot='rate', category=False, y_label='Rate per 1000')


In [29]:
display(
md("### Sub totals by sub codes"),
md("Events for the top 5 subcodes across the study period"))
child_table = create_child_table(df=data_dict['event_code'], code_df=codelist, code_column=codelist_code_column, term_column=codelist_term_column)
child_table
    

### Sub totals by sub codes

Events for the top 5 subcodes across the study period

Unnamed: 0,code,Events,Events (thousands),Description
1239511000000000.0,1239511000000000.0,2455.0,2.455,Structured medication review


In [30]:
display(
md("## Total Number by GP Practice")
)


## Total Number by GP Practice

In [31]:
practice_df = pd.read_csv('../output/input_practice_count.csv')
practices_dict =calculate_statistics_practices(data_dict['practice'], practice_df,end_date)
print(f'Practices included entire period: {practices_dict["total"]["number"]} ({practices_dict["total"]["percent"]}%)')
print(f'Practices included within last year: {practices_dict["year"]["number"]} ({practices_dict["year"]["percent"]}%)')
print(f'Practices included within last 3 months: {practices_dict["months_3"]["number"]} ({practices_dict["months_3"]["percent"]}%)')
interactive_deciles_chart(data_dict['practice'], period_column='date', column='event', title='Decile chart',ylabel='rate per 1000')


Practices included entire period: 44 (125.71%)
Practices included within last year: 38 (108.57%)
Practices included within last 3 months: 0 (0.0%)


In [32]:
i=0


In [33]:
    display(
    md(f"## Breakdown by {demographics[i]}")
    )
    counts_df = calculate_statistics_demographics(df=data_dict[demographics[i]], demographic_var=demographics[i], end_date=end_date, event_column='event')
    counts_df
    

## Breakdown by sex

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
F,12553.0,5000.0,0.0
M,12208.0,5000.0,0.0
U,239.0,5000.0,0.0


In [34]:
    plot_measures(data_dict[demographics[i]], title=f'Breakdown by {demographics[i]}', column_to_plot='rate_standardised', category=demographics[i], y_label='Standardised Rate per 1000')
    i+=1
    

In [35]:
    display(
    md(f"## Breakdown by {demographics[i]}")
    )
    counts_df = calculate_statistics_demographics(df=data_dict[demographics[i]], demographic_var=demographics[i], end_date=end_date, event_column='event')
    counts_df
    

## Breakdown by age_band

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
0-4,1289.0,5000.0,0.0
10-14,1218.0,5000.0,0.0
15-19,1258.0,5000.0,0.0
20-24,1198.0,5000.0,0.0
25-29,1239.0,5000.0,0.0
30-34,1300.0,5000.0,0.0
35-39,1212.0,5000.0,0.0
40-44,1265.0,5000.0,0.0
45-49,2478.0,5000.0,0.0
5-9,1267.0,5000.0,0.0


In [36]:
    plot_measures(data_dict[demographics[i]], title=f'Breakdown by {demographics[i]}', column_to_plot='rate_standardised', category=demographics[i], y_label='Standardised Rate per 1000')
    i+=1
    

In [37]:
    display(
    md(f"## Breakdown by {demographics[i]}")
    )
    counts_df = calculate_statistics_demographics(df=data_dict[demographics[i]], demographic_var=demographics[i], end_date=end_date, event_column='event')
    counts_df
    

## Breakdown by region

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
East Midlands,272.0,511.0,0.0
East of England,267.0,511.0,0.0
London,494.0,511.0,0.0
North East,261.0,511.0,0.0
North West,282.0,511.0,0.0
South East,496.0,511.0,0.0
West Midlands,253.0,511.0,0.0
Yorkshire and the Humber,239.0,511.0,0.0


In [38]:
    plot_measures(data_dict[demographics[i]], title=f'Breakdown by {demographics[i]}', column_to_plot='rate_standardised', category=demographics[i], y_label='Standardised Rate per 1000')
    i+=1
    

In [39]:
    display(
    md(f"## Breakdown by {demographics[i]}")
    )
    counts_df = calculate_statistics_demographics(df=data_dict[demographics[i]], demographic_var=demographics[i], end_date=end_date, event_column='event')
    counts_df
    

## Breakdown by imd

Unnamed: 0,Total Study Period,Within Last Year,Within Last 3 Months
100,9952.0,10000.0,0.0
200,10088.0,10000.0,0.0
300,9872.0,10000.0,0.0
400,10048.0,10000.0,0.0
500,10040.0,10000.0,0.0


In [40]:
    plot_measures(data_dict[demographics[i]], title=f'Breakdown by {demographics[i]}', column_to_plot='rate_standardised', category=demographics[i], y_label='Standardised Rate per 1000')
    i+=1
    

ValueError: 
    Invalid value of type 'numpy.int64' received for the 'name' property of scatter
        Received value: 100

    The 'name' property is a string and must be specified as:
      - A string
      - A number that will be converted to a string

In [None]:
    display(
    md(f"## Breakdown by {demographics[i]}")
    )
    counts_df = calculate_statistics_demographics(df=data_dict[demographics[i]], demographic_var=demographics[i], end_date=end_date, event_column='event')
    counts_df
    

In [None]:
    plot_measures(data_dict[demographics[i]], title=f'Breakdown by {demographics[i]}', column_to_plot='rate_standardised', category=demographics[i], y_label='Standardised Rate per 1000')
    i+=1
    