# Querying Large Parquet Files with Pandas

## Install the Required Packages

In [1]:
pip install opendatablend

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install pyarrow

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


## Import the Required Packages

In [4]:
import opendatablend as odb
import pandas as pd

## Setup Variables for use with Open Data Blend for Python

In [5]:
# Pin the analysis to a specific version of the dataset to enable reproducible analysis. If we wanted it to always use the latest version of the data, we could use https://packages.opendatablend.io/v1/open-data-blend-prescribing/datapackage.json
dataset_path = 'https://packages.opendatablend.io/v1/open-data-blend-prescribing/20210818T044451Z/datapackage.json'

# Set the access key. If the access key is set to an empty string (i.e. '') then unauthenticated calls to the Open Data Blend Dataset API will be made. Unauthenticated calls have a monthly limit: https://docs.opendatablend.io/open-data-blend-datasets/dataset-api#usage-limits 
access_key=''

## Acquiring the Prescription Data

In [6]:
# Get the Date data file. This file is less than 10 MB.
output_date = odb.get_data(dataset_path, 'date-parquet', access_key=access_key)

In [7]:
# Get the Prescribing Chemical data file. The file is less than 10 MB.
output_prescribing_chemical = odb.get_data(dataset_path, 'prescribing-chemical-parquet', access_key=access_key)

In [8]:
# Get the Prescribing Practice data file. The file is less than 10 MB in size.
output_prescribing_practice = odb.get_data(dataset_path, 'prescribing-practice-parquet', access_key=access_key)

In [9]:
# Get the Primary Care Organisation data file. The file is less than 10 MB in size.
output_primary_care_organisation = odb.get_data(dataset_path, 'primary-care-organisation-parquet', access_key=access_key)

In [10]:
# Get the English Prescribing 2019 data file. The file is larger than 3 GB.
output_english_prescriptions_2019 = odb.get_data(dataset_path, 'english-prescribing-2019-parquet', access_key=access_key)

In [11]:
# Get the English Prescribing 2020 data file. The file is larger than 3 GB. 
output_english_prescriptions_2020 = odb.get_data(dataset_path, 'english-prescribing-2020-parquet', access_key=access_key)

In [12]:
# Get the English Prescribing 2021 data file. The file is larger than 1.5 GB. 
output_english_prescriptions_2021 = odb.get_data(dataset_path, 'english-prescribing-2021-parquet', access_key=access_key)

## Efficiently Querying the Prescription Data

### Step 1 - Selectively load the dimension data

#### Create a Date DataFrame:

In [13]:
# Specify the required columns
date_columns = [
    'drv_date_key',
    'drv_year'
]

In [14]:
# Specify the filter predicates to be applied
date_filters = [
    ('drv_date_key', '>=', 20190101), 
    ('drv_date_key', '<=', 20210630)
]

In [15]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
date = pd.read_parquet(output_date.data_file_name, columns=date_columns, filters=date_filters)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:01.327084')

In [16]:
# Preview a sample of the data
date.sample(n=5, random_state=1)

Unnamed: 0,drv_date_key,drv_year
191,20190711,2019
832,20210412,2021
111,20190422,2019
262,20190920,2019
670,20201101,2020


In [17]:
# Check the size in memory
date.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 912 entries, 0 to 911
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   drv_date_key  912 non-null    int32
 1   drv_year      912 non-null    int32
dtypes: int32(2)
memory usage: 7.2 KB


#### Create a Prescribing Chemical DataFrame:

In [18]:
# Specify the required columns
prescribing_chemical_columns = [
    'drv_prescribing_chemical_key',
    'srk_british_national_formulary_code',
    'src_british_national_formulary_description'
]

In [19]:
# Specify the filter predicates to be applied - We are only interested Glucose blood testing reagents (0601060D0) and Glucose and ketone blood testing reagents (0601060X0) 

prescribing_chemical_filters = [
    ('drk_source', '=', 'English Prescribing'),
    ('src_british_national_formulary_chemical_substance', 'in', ['0601060D0', '0601060X0'])
]

In [20]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
prescribing_chemical = pd.read_parquet(output_prescribing_chemical.data_file_name, columns=prescribing_chemical_columns, filters=prescribing_chemical_filters)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:00.107007')

In [21]:
# Preview a sample of the data
prescribing_chemical.sample(n=5, random_state=1)

Unnamed: 0,drv_prescribing_chemical_key,srk_british_national_formulary_code,src_british_national_formulary_description
40,49436,0601060D0BWAAA0,FreeStyle testing strips
31,36388,0601060D0BJACA0,MediSense SoftSense testing strips
45,49441,0601060D0DHAAA0,GlucoDock testing strips
86,89867,0601060D0ERAAA0,Microdot Max testing strips
83,87623,0601060D0ENAAA0,Contour Plus testing strips


In [22]:
# Check the size in memory
prescribing_chemical.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 3 columns):
 #   Column                                      Non-Null Count  Dtype 
---  ------                                      --------------  ----- 
 0   drv_prescribing_chemical_key                94 non-null     int32 
 1   srk_british_national_formulary_code         94 non-null     object
 2   src_british_national_formulary_description  94 non-null     object
dtypes: int32(1), object(2)
memory usage: 14.8 KB


#### Create a Prescribing Practice DataFrame:

In [23]:
# Specify the required columns
prescribing_prescribing_columns = [
    'drv_prescribing_practice_key',
    'srk_prescribing_practice_code',
    'src_prescribing_practice'
]

In [24]:
# Specify the filter predicates to be applied

prescribing_prescribing_filters = [
    ('drk_source', '=', 'English Prescribing'),
]

In [25]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
prescribing_practice = pd.read_parquet(output_prescribing_practice.data_file_name, columns=prescribing_prescribing_columns, filters=prescribing_prescribing_filters)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:00.063000')

In [26]:
# Preview a sample of the data
prescribing_practice.sample(n=5, random_state=1)

Unnamed: 0,drv_prescribing_practice_key,srk_prescribing_practice_code,src_prescribing_practice
986,17095,B85024,THE WATERLOO PRACTICE
16021,41707,Y03979,WWL COMMUNITY NURSES
2539,18648,E84635,THE SURGERY
4286,20395,H81080,LONGCROFT CLINIC
1633,17742,C84105,FAIRFIELDS PRACTICE


In [27]:
# Check the size in memory
prescribing_practice.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17821 entries, 0 to 17820
Data columns (total 3 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   drv_prescribing_practice_key   17821 non-null  int32 
 1   srk_prescribing_practice_code  17821 non-null  object
 2   src_prescribing_practice       17821 non-null  object
dtypes: int32(1), object(2)
memory usage: 2.5 MB


#### Create a Primary Care Organisation DataFrame:

In [28]:
# Specify the required columns
primary_care_organisation_columns = [
    'drv_primary_care_organisation_key',
    'srk_primary_care_organisation_code',
    'src_primary_care_organisation'
]

In [29]:
# Specify the filter predicates to be applied
primary_care_organisation_filters = [
    ('drk_source', '=', 'English Prescribing'),
]

In [30]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
primary_care_organisation = pd.read_parquet(output_primary_care_organisation.data_file_name, columns=primary_care_organisation_columns, filters=primary_care_organisation_filters)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:00.031000')

In [31]:
# Preview a sample of the data
primary_care_organisation.sample(n=5, random_state=1)

Unnamed: 0,drv_primary_care_organisation_key,srk_primary_care_organisation_code,src_primary_care_organisation
215,709,13L00,SURREY AND SUSSEX CB HUB
663,1342,07G00,THURROCK CCG
773,1452,99A00,LIVERPOOL CCG
798,1477,AND00,BADDOW HOSPITAL LTD
629,1308,04F00,MILTON KEYNES CCG


In [32]:
# Check the size in memory
primary_care_organisation.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1058 entries, 0 to 1057
Data columns (total 3 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   drv_primary_care_organisation_key   1058 non-null   int32 
 1   srk_primary_care_organisation_code  1058 non-null   object
 2   src_primary_care_organisation       1058 non-null   object
dtypes: int32(1), object(2)
memory usage: 152.1 KB


### Step 2 - Selectively load the fact data

#### Specify the required columns and filter predicates to be used for all the English Prescribing fact data:

In [33]:
# Specify the required columns
english_prescribing_columns = [
    'drv_processing_period_start_date_key',
    'drv_primary_care_organisation_key',
    'drv_prescribing_practice_key',
    'drv_prescribing_chemical_key',
    'src_english_prescribing_items'
]

In [34]:
# Specify the filter predicates to be applied - We are only interested in the fact rows that correspond to the subset of the Prescribing Chemical Keys in our Prescribing Chemical DataFrame
english_prescribing_key_filter = [
    ('drv_prescribing_chemical_key', 'in', prescribing_chemical['drv_prescribing_chemical_key'])
]

#### Create an English Prescribing 2019 DataFrame

In [35]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
english_prescribing_2019 = pd.read_parquet(output_english_prescriptions_2019.data_file_name, columns=english_prescribing_columns, filters=english_prescribing_key_filter)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:07.883614')

In [36]:
# Preview a sample of the data
english_prescribing_2019.sample(n=5, random_state=1)

Unnamed: 0,drv_processing_period_start_date_key,drv_primary_care_organisation_key,drv_prescribing_practice_key,drv_prescribing_chemical_key,src_english_prescribing_items
646002,20190501,496,16496,62476,3
1452546,20191001,519,16406,36988,2
1651468,20191101,588,41306,58014,9
1550103,20191001,659,20460,35335,1
1649095,20191101,586,22353,34541,1


In [37]:
# Check the size in memory
english_prescribing_2019.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1905926 entries, 0 to 1905925
Data columns (total 5 columns):
 #   Column                                Non-Null Count    Dtype
---  ------                                --------------    -----
 0   drv_processing_period_start_date_key  1905926 non-null  int32
 1   drv_primary_care_organisation_key     1905926 non-null  int32
 2   drv_prescribing_practice_key          1905926 non-null  int32
 3   drv_prescribing_chemical_key          1905926 non-null  int32
 4   src_english_prescribing_items         1905926 non-null  int32
dtypes: int32(5)
memory usage: 36.4 MB


#### Create an English Prescribing 2020 DataFrame

In [38]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
english_prescribing_2020 = pd.read_parquet(output_english_prescriptions_2020.data_file_name, columns=english_prescribing_columns, filters=english_prescribing_key_filter)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:06.639458')

In [39]:
# Preview a sample of the data
english_prescribing_2020.sample(n=5, random_state=1)

Unnamed: 0,drv_processing_period_start_date_key,drv_primary_care_organisation_key,drv_prescribing_practice_key,drv_prescribing_chemical_key,src_english_prescribing_items
497764,20200401,1306,24070,34244,1
1617845,20201101,1419,21747,34246,2
1461134,20201001,1416,32508,34541,3
902672,20200601,1447,42410,34244,3
207491,20200201,588,28449,34245,1


In [40]:
# Check the size in memory
english_prescribing_2020.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1823291 entries, 0 to 1823290
Data columns (total 5 columns):
 #   Column                                Non-Null Count    Dtype
---  ------                                --------------    -----
 0   drv_processing_period_start_date_key  1823291 non-null  int32
 1   drv_primary_care_organisation_key     1823291 non-null  int32
 2   drv_prescribing_practice_key          1823291 non-null  int32
 3   drv_prescribing_chemical_key          1823291 non-null  int32
 4   src_english_prescribing_items         1823291 non-null  int32
dtypes: int32(5)
memory usage: 34.8 MB


#### Create an English Prescribing 2021 DataFrame

In [41]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Load the data after the column selection and filter predicates have been applied
english_prescribing_2021 = pd.read_parquet(output_english_prescriptions_2021.data_file_name, columns=english_prescribing_columns, filters=english_prescribing_key_filter)

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:03.789186')

In [42]:
# Preview a sample of the data
english_prescribing_2021.sample(n=5, random_state=1)

Unnamed: 0,drv_processing_period_start_date_key,drv_primary_care_organisation_key,drv_prescribing_practice_key,drv_prescribing_chemical_key,src_english_prescribing_items
633862,20210501,1404,20826,87623,1
642740,20210501,1411,33847,34022,2
423697,20210301,1447,19901,36985,1
506891,20210401,1418,17308,58014,1
325311,20210301,1320,22184,58984,1


In [43]:
# Check the size in memory
english_prescribing_2021.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 882170 entries, 0 to 882169
Data columns (total 5 columns):
 #   Column                                Non-Null Count   Dtype
---  ------                                --------------   -----
 0   drv_processing_period_start_date_key  882170 non-null  int32
 1   drv_primary_care_organisation_key     882170 non-null  int32
 2   drv_prescribing_practice_key          882170 non-null  int32
 3   drv_prescribing_chemical_key          882170 non-null  int32
 4   src_english_prescribing_items         882170 non-null  int32
dtypes: int32(5)
memory usage: 16.8 MB


### Step 3 - Combine the fact data

In [44]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Combine the three fact DataFrames
english_prescribing = pd.concat([english_prescribing_2019, english_prescribing_2020, english_prescribing_2021])

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:00.087002')

In [45]:
# Check the size in memory
english_prescribing.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4611387 entries, 0 to 882169
Data columns (total 5 columns):
 #   Column                                Non-Null Count    Dtype
---  ------                                --------------    -----
 0   drv_processing_period_start_date_key  4611387 non-null  int32
 1   drv_primary_care_organisation_key     4611387 non-null  int32
 2   drv_prescribing_practice_key          4611387 non-null  int32
 3   drv_prescribing_chemical_key          4611387 non-null  int32
 4   src_english_prescribing_items         4611387 non-null  int32
dtypes: int32(5)
memory usage: 123.1 MB


In [46]:
# Free up some memory by dropping the redundant DataFrames
del english_prescribing_2019, english_prescribing_2020, english_prescribing_2021

### Step 4 - Denormalise the dimension data into the fact data

In [47]:
# Capture the execution start time
execution_start_time = pd.Timestamp.now()

# Merge the dimension DataFrames into the fact DataFrame and drop columns that are not longer needed
english_prescribing_wide = (
    english_prescribing
    .merge(date, left_on='drv_processing_period_start_date_key', right_on='drv_date_key')
    .merge(prescribing_chemical, left_on='drv_prescribing_chemical_key', right_on='drv_prescribing_chemical_key')
    .merge(prescribing_practice,  left_on='drv_prescribing_practice_key', right_on='drv_prescribing_practice_key')
    .merge(primary_care_organisation,  left_on='drv_primary_care_organisation_key', right_on='drv_primary_care_organisation_key')
).drop(columns=[
    'drv_processing_period_start_date_key',
    'drv_date_key', 
    'drv_prescribing_chemical_key', 
    'drv_prescribing_practice_key', 
    'drv_primary_care_organisation_key'
]) 

# Return the execution duration
pd.Timestamp.now() - execution_start_time

Timedelta('0 days 00:00:04.633174')

In [48]:
# Check the size in memory
english_prescribing_wide.info(memory_usage='deep', null_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4611387 entries, 0 to 4611386
Data columns (total 8 columns):
 #   Column                                      Non-Null Count    Dtype 
---  ------                                      --------------    ----- 
 0   src_english_prescribing_items               4611387 non-null  int32 
 1   drv_year                                    4611387 non-null  int32 
 2   srk_british_national_formulary_code         4611387 non-null  object
 3   src_british_national_formulary_description  4611387 non-null  object
 4   srk_prescribing_practice_code               4611387 non-null  object
 5   src_prescribing_practice                    4611387 non-null  object
 6   srk_primary_care_organisation_code          4611387 non-null  object
 7   src_primary_care_organisation               4611387 non-null  object
dtypes: int32(2), object(6)
memory usage: 1.9 GB


In [49]:
# Free up some memory by dropping the redundant DataFrame
del english_prescribing

### Step 5 – Analyse the data

#### What are the top 10 blood glucose test strip items?

In [50]:
#In 2019
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2019][['srk_british_national_formulary_code', 'src_british_national_formulary_description', 'src_english_prescribing_items']].groupby(by=['srk_british_national_formulary_code', 'src_british_national_formulary_description']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_british_national_formulary_code,src_british_national_formulary_description,Unnamed: 2_level_1
0601060D0CUABA0,GlucoRx Nexus testing strips,1212243
0601060D0CIAAA0,WaveSense JAZZ testing strips,565761
0601060D0CCAAA0,Aviva testing strips,558627
0601060D0CSAAA0,Mobile cassette,495687
0601060D0DWAAA0,Performa testing strips,445228
0601060D0BQAEA0,GlucoMen areo Sensor testing strips,417173
0601060D0DQAAA0,TEE2 testing strips,394264
0601060D0DGAAA0,Contour Next testing strips,384447
0601060D0BJADA0,FreeStyle Optium testing strips,252956
0601060D0DKAAA0,TRUEyou testing strips,218911


In [51]:
# In 2020
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2020][['srk_british_national_formulary_code', 'src_british_national_formulary_description', 'src_english_prescribing_items']].groupby(by=['srk_british_national_formulary_code', 'src_british_national_formulary_description']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_british_national_formulary_code,src_british_national_formulary_description,Unnamed: 2_level_1
0601060D0CUABA0,GlucoRx Nexus testing strips,1179977
0601060D0CIAAA0,WaveSense JAZZ testing strips,640254
0601060D0DQAAA0,TEE2 testing strips,511568
0601060D0DWAAA0,Performa testing strips,467861
0601060D0CSAAA0,Mobile cassette,465622
0601060D0CCAAA0,Aviva testing strips,450995
0601060D0BQAEA0,GlucoMen areo Sensor testing strips,422445
0601060D0DGAAA0,Contour Next testing strips,332670
0601060D0EDAAA0,Finetest Lite testing strips,301203
0601060D0BJADA0,FreeStyle Optium testing strips,227228


In [52]:
# In 2021 up until June 2021
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2021][['srk_british_national_formulary_code', 'src_british_national_formulary_description', 'src_english_prescribing_items']].groupby(by=['srk_british_national_formulary_code', 'src_british_national_formulary_description']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_british_national_formulary_code,src_british_national_formulary_description,Unnamed: 2_level_1
0601060D0CUABA0,GlucoRx Nexus testing strips,564884
0601060D0CIAAA0,WaveSense JAZZ testing strips,337503
0601060D0DQAAA0,TEE2 testing strips,264154
0601060D0DWAAA0,Performa testing strips,234710
0601060D0CSAAA0,Mobile cassette,208834
0601060D0BQAEA0,GlucoMen areo Sensor testing strips,208517
0601060D0CCAAA0,Aviva testing strips,187763
0601060D0EDAAA0,Finetest Lite testing strips,181144
0601060D0DGAAA0,Contour Next testing strips,148304
0601060D0BJADA0,FreeStyle Optium testing strips,103265


#### Which CCGs have prescribed the most blood glucose test strip items?

In [53]:
# In 2019
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2019][['srk_primary_care_organisation_code', 'src_primary_care_organisation','src_english_prescribing_items']].groupby(by=['srk_primary_care_organisation_code', 'src_primary_care_organisation']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_primary_care_organisation_code,src_primary_care_organisation,Unnamed: 2_level_1
15E00,BIRMINGHAM AND SOLIHULL CCG,165837
06H00,CAMBRIDGESHIRE AND PETERBOROUGH CCG,112515
15N00,DEVON CCG,104458
15C00,"BRISTOL, NORTH SOMERSET & S GLOS CCG",96744
11J00,DORSET CCG,93350
04G00,NENE CCG,92260
15F00,LEEDS CCG,90188
15M00,DERBY & DERBYSHIRE CCG,89705
05L00,SANDWELL AND WEST BIRMINGHAM CCG,85760
14L00,MANCHESTER CCG,83810


In [54]:
# In 2020
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2020][['srk_primary_care_organisation_code', 'src_primary_care_organisation','src_english_prescribing_items']].groupby(by=['srk_primary_care_organisation_code', 'src_primary_care_organisation']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_primary_care_organisation_code,src_primary_care_organisation,Unnamed: 2_level_1
15E00,BIRMINGHAM AND SOLIHULL CCG,162015
91Q00,KENT AND MEDWAY CCG,154217
15N00,DEVON CCG,137184
72Q00,SOUTH EAST LONDON CCG,131107
15M00,DERBY & DERBYSHIRE CCG,115930
06H00,CAMBRIDGESHIRE AND PETERBOROUGH CCG,110854
26A00,NORFOLK AND WAVENEY CCG,106603
52R00,NOTTINGHAM AND NOTTINGHAMSHIRE CCG,102896
36L00,SOUTH WEST LONDON CCG,102198
93C00,NORTH CENTRAL LONDON CCG,101324


In [55]:
# In 2021 up until June 2021
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2021][['srk_primary_care_organisation_code', 'src_primary_care_organisation','src_english_prescribing_items']].groupby(by=['srk_primary_care_organisation_code', 'src_primary_care_organisation']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_primary_care_organisation_code,src_primary_care_organisation,Unnamed: 2_level_1
91Q00,KENT AND MEDWAY CCG,98394
72Q00,SOUTH EAST LONDON CCG,88408
15E00,BIRMINGHAM AND SOLIHULL CCG,77638
26A00,NORFOLK AND WAVENEY CCG,68221
36L00,SOUTH WEST LONDON CCG,68113
52R00,NOTTINGHAM AND NOTTINGHAMSHIRE CCG,67173
93C00,NORTH CENTRAL LONDON CCG,66544
15N00,DEVON CCG,66312
A3A8R,NORTH EAST LONDON CCG,58423
W2U3Z,NORTH WEST LONDON CCG,58125


#### Which practices have prescribed the most blood glucose test strip items?

In [56]:
# In 2019
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2019][['srk_prescribing_practice_code', 'src_prescribing_practice','src_english_prescribing_items']].groupby(by=['srk_prescribing_practice_code', 'src_prescribing_practice']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_prescribing_practice_code,src_prescribing_practice,Unnamed: 2_level_1
D81022,OCTAGON MEDICAL PRACTICE,10548
M85063,MIDLANDS MEDICAL PARTNERSHIP,9418
Y01008,BAY MEDICAL GROUP,8595
F85002,MEDICUS HEALTH PARTNERS-FOREST RD GROUP,8122
M88004,REGIS MEDICAL CENTRE,7378
K83002,LAKESIDE HEALTHCARE,6951
D82044,VIDA HEALTHCARE,6283
G85034,NEXUS HEALTH GROUP,5879
C83019,BEACON MEDICAL PRACTICE,5860
M81026,HEREFORD MEDICAL GROUP,5720


In [57]:
# In 2020
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2020][['srk_prescribing_practice_code', 'src_prescribing_practice','src_english_prescribing_items']].groupby(by=['srk_prescribing_practice_code', 'src_prescribing_practice']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_prescribing_practice_code,src_prescribing_practice,Unnamed: 2_level_1
D81022,OCTAGON MEDICAL PRACTICE,10819
M85063,MIDLANDS MEDICAL PARTNERSHIP,8728
Y01008,BAY MEDICAL GROUP,7702
F85002,MEDICUS HEALTH PARTNERS-FOREST RD GROUP,7543
M88004,REGIS MEDICAL CENTRE,7077
K83002,LAKESIDE HEALTHCARE,6676
M92612,HEALTH AND BEYOND,6588
J82155,PORTSDOWN GROUP PRACTICE,6290
G85034,NEXUS HEALTH GROUP,6044
B81025,HOLDERNESS HEALTH,5896


In [58]:
# In 2021 up until June 2021
english_prescribing_wide[english_prescribing_wide['drv_year'] == 2021][['srk_prescribing_practice_code', 'src_prescribing_practice','src_english_prescribing_items']].groupby(by=['srk_prescribing_practice_code', 'src_prescribing_practice']).sum().sort_values(by='src_english_prescribing_items', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,src_english_prescribing_items
srk_prescribing_practice_code,src_prescribing_practice,Unnamed: 2_level_1
D81022,OCTAGON MEDICAL PRACTICE,5420
F85002,MEDICUS HEALTH PARTNERS,5277
M85063,MIDLANDS MEDICAL PARTNERSHIP,4038
Y01008,BAY MEDICAL GROUP,3910
A84036,VALENS MEDICAL PARTNERSHIP,3777
M92612,HEALTH AND BEYOND,3449
J82155,PORTSDOWN GROUP PRACTICE,3313
K83002,LAKESIDE HEALTHCARE,3211
M88004,REGIS MEDICAL CENTRE,3169
G85034,NEXUS HEALTH GROUP,2984
