In [1]:
import urllib.request
import pandas as pd
import zipfile
import folium
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [2]:
#GP Practice data ingestion

url = 'https://files.digital.nhs.uk/assets/ods/current/epraccur.zip'
filehandle, _ = urllib.request.urlretrieve(url)
zip_file_object = zipfile.ZipFile(filehandle, 'r')
first_file = zip_file_object.namelist()[0]
file = zip_file_object.open(first_file)
content = file.read()
csv_file = open('data/epraccur_data.csv', 'wb')
csv_file.write(content)
csv_file.close()
header_list = ["Organisation Code", "Name", "National Grouping", "High Level Health Geography", "Address line 1", "Address line 2", "Address line 3", 
"Address line 4", "Address line 5","Postcode","Open Date","Close Date","Status Code","Organisation Sub-Type Code","Commissioner","Join Provider/Purchaser Date",
"Left Provider/Purchaser Date","Contact Telephone Number", "Null 1", "Null 2", "Null 3", "Amended Record Indicator", "Null 4", "Provider/Purchaser",
"Null 5", "Prescribing Setting", "Null 6"]

In [3]:
gp_practice_df = pd.read_csv('data/epraccur_data.csv', names=header_list)
gp_practice_df = gp_practice_df[(gp_practice_df["Status Code"] == "A") & (gp_practice_df["Prescribing Setting"] == 4) 
& (gp_practice_df["Postcode"].str.contains('CB1 |CB2 |CB3 |CB4 |CB5 '))].reset_index(drop = True)
gp_practice_df['Full Address'] = gp_practice_df[['Address line 2', 'Address line 3', 'Address line 4',]].astype(str).agg(', '.join, axis=1)
gp_practice_df['Full Address'] = gp_practice_df['Full Address'].str.title() + ', ' + gp_practice_df['Postcode']
gp_practice_df['Full Address'] = gp_practice_df['Full Address'].str.replace('Nan', '').str.replace(' ,', ' ')
gp_practice_df['Name'] = gp_practice_df['Name'].str.title()
gp_practice_df_1 = gp_practice_df[['Organisation Code', 'Name', 'Full Address', 'Postcode', 'Contact Telephone Number']]

In [4]:
gp_practice_df_saved = pd.read_csv('data/cambridge_gp_practices.csv', index_col = 0)
new_list = list(gp_practice_df_1['Organisation Code'])
saved_list = list(gp_practice_df_saved['Organisation Code'])
new_orgs = list(set(new_list) - (set(saved_list)))
if new_orgs:
    geolocator = Nominatim(user_agent="open_access_nhs")
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
    gp_practice_df_1["location"] = gp_practice_df_1["Postcode"].apply(geolocator.geocode)
    gp_practice_df_1["Point"] = gp_practice_df_1["location"].apply(lambda loc: tuple(loc.point) if loc else None)
    gp_practice_df_1[['Latitude', 'Longitude', 'Altitude']] = gp_practice_df_1['Point'].to_list()
    gp_practice_df_2 = gp_practice_df_1[["Organisation Code","Name", "Full Address", "Postcode", "Contact Telephone Number", "Latitude", "Longitude"]]
    gp_practice_df_2.to_csv('data/cambridge_gp_practices.csv')
else:
    gp_practice_df_2 = gp_practice_df_saved.copy()
    print('No new cambridge GP practices')

No new cambridge GP practices


In [5]:
frame = folium.Figure(width=500, height=500)
fig = folium.Map(
    location=[52.2, 0.12],
    tiles="cartodbpositron",
    zoom_start=12).add_to(frame)
for code, name, address, number, lat, lon in zip(gp_practice_df_2['Organisation Code'],
gp_practice_df_2['Name'],
gp_practice_df_2['Full Address'],
gp_practice_df_2['Contact Telephone Number'],
gp_practice_df_2['Latitude'],
gp_practice_df_2['Longitude']):
    folium.Marker(
        [lat, lon],
        popup = folium.Popup('<b>' + 'Name: ' + '</b>'  + str(name) + '<br>'
        '<b>' + 'GP Practice Code: ' + '</b>' + str(code) + '<br>'
        '<b>' + 'Address: ' + '</b>' + str(address) + '<br>'
        '<b>' + 'Telephone Number: ' + '</b>' + str(number) + '<br>',
        max_width=len(address)*20),
        icon=folium.Icon(color="darkblue", prefix='fa', icon="fa-medkit"),
        ).add_to(fig)
fig