# NHS and GP Administrative Data

Notebook showing how to import administrative data for GP practices and additionally place it into a SQLite database. *(SQLite is a simple file based SQL database that "just works".)*

Data file download URLs identfied via: https://digital.nhs.uk/organisation-data-service/data-downloads

In [1]:
#!pip install pandas
#pandas is a python library for working with tabular datasets
#It can be used to import data from CSV files and Excel spreadsheets
import pandas as pd
import numpy as np

In [2]:
#SQLite is a file based SQL database included in the Python distribution
import sqlite3
import time
#If you want to build the database from scratch, delete any outstanding copy
#Uncomment and run the following command line (!) command
!mv nhsadmin.sqlite nhsadmin_pre_{time.strftime("%Y-%m-%d")}.sqlite 

In [3]:
### PATCHES
#via http://stackoverflow.com/a/28173933/454773


import lxml.html
import requests

def getDownloadURLs():
    downloadURLs=[]
    lookupURLs=["https://digital.nhs.uk/organisation-data-service/data-downloads/gp-data",
                 "https://digital.nhs.uk/organisation-data-service/data-downloads/other-nhs",
                 "https://digital.nhs.uk/organisation-data-service/data-downloads/health-authorities",
                 "https://digital.nhs.uk/organisation-data-service/data-downloads/non-nhs",
                 "https://digital.nhs.uk/organisation-data-service/data-downloads/miscellaneous"]
    
    for url in lookupURLs:
        txt=requests.get(url).text
        table = lxml.html.fromstring(txt)
        urls=table.xpath('//tr/td/a/@href')
        downloadURLs=downloadURLs+urls
    downloadURLs_dict={k.split('/')[-1]:k for k in downloadURLs }
    for u in downloadURLs:
        if u.split('/')[-3] not in downloadURLs_dict:
            downloadURLs_dict[u.split('/')[-3]]=u
    return downloadURLs_dict
    
downloadURLs_dict=getDownloadURLs()
downloadURLs_dict

{'Lauth': 'https://digital.nhs.uk/media/403/Lauth/zip/Lauth',
 'Lauthsite': 'https://digital.nhs.uk/media/402/Lauthsite/zip/Lauthsite',
 'default': 'https://digital.nhs.uk/media/447/default/zip/default',
 'eabeydispgp': 'https://digital.nhs.uk/media/385/eabeydispgp/zip/eabeydispgp',
 'earchive': 'https://digital.nhs.uk/media/457/earchive/zip/earchive',
 'eauth': 'https://digital.nhs.uk/media/332/eauth/zip/eauth',
 'ebranchs': 'https://digital.nhs.uk/media/393/ebranchs/zip/ebranchs',
 'ecare': 'https://digital.nhs.uk/media/347/ecare/zip/ecare',
 'ecarehomehq': 'https://digital.nhs.uk/media/409/ecarehomehq/zip/ecarehomehq',
 'ecarehomesite': 'https://digital.nhs.uk/media/408/ecarehomesite/zip/ecarehomesite',
 'ecarehomesucc': 'https://digital.nhs.uk/media/407/ecarehomesucc/zip/ecarehomesucc',
 'eccg': 'https://digital.nhs.uk/media/354/eccg/zip/eccg',
 'eccgsite': 'https://digital.nhs.uk/media/353/eccgsite/zip/eccgsite',
 'econcur': 'https://digital.nhs.uk/media/450/econcur/zip/econcur',


In [4]:
#Create a connection to the database
con = sqlite3.connect("nhsadmin.sqlite")

In [5]:
#This function helps download and unpack data files

def downloader(typ,url=None):
    ''' Download and unzip data file '''
    !mkdir -p downloads/
    !rm downloads/{typ}.zip
    if url is None:
        #https://digital.nhs.uk/media/372/epraccur/zip/epraccur
        url=downloadURLs_dict[typ]
            
    #Fallback to old style?
    if url is None:
        print('What URL?')
        return
    #    url='http://systems.digital.nhs.uk/data/ods/datadownloads/data-files/{typ}.zip'.format(typ=typ)
    #Download the data from the HSCIC website
    print('Trying {}'.format(url))
    #!wget -P downloads/ {url}
    !curl -L -o downloads/{typ}.zip {url}
    !rm -r data/{typ}/
    #Unzip the downloaded files into a subdirectory of the data folder, making sure the data dir exists first
    !mkdir -p data
    #The -o flag is overkill - if we hadn't deleted the original folder it would overwrite any similar files
    !unzip -o -d data/{typ} downloads/{typ}

In [9]:
def getData(typ, names=None, dates=False, encoding=None):
    ''' Read CSV file in from downloaded and unzipped file '''
    downloader(typ)
    typPath = downloadURLs_dict[typ].split('/')[-1]
    dtypes={}
    if names is not None:
        for c in names:
            if "phone" in c.lower() or " code" in c.lower() or "type" in c.lower(): dtypes[c]=str
    df = pd.read_csv('data/{typPath}/{typ}.csv'.format(typPath=typPath,typ=typ),dtype=dtypes,
                     header=None,names=names, parse_dates=dates,encoding=encoding)
    return df

In [10]:
def normaliser(typ,cols,dates=False,index=None,codes=None,encoding=None,db_con=None):
    ''' Download, read and process data file, adding it to a SQLite database '''
    if dates=='auto':
        dates=[cols.index(c) for c in cols if 'date' in c.lower()]
    if index=='auto':
        index=[cols[0]]
    df=getData(typ, names=cols, dates=dates,encoding=encoding)
    #df.columns=cols
    for c in df.columns:
        if c.startswith('Null') : df.drop(c, axis=1, inplace=True)
    if codes is not None:
        for col in codes:
            df[col + ' Value']=df[col].astype(str).map(codes[col])
    if index is not None: df=df.set_index(index)
    if db_con is not None: df.to_sql(con=db_con, name=typ,if_exists='replace')
    return df

## epraccur - Current Medical Practices and Prescribing Cost Centres

In [11]:
#via http://systems.digital.nhs.uk/data/ods/datadownloads/gppractice
#epraccur is administrative info about GP practices - practice codes, address, etc etc

EPRACCUR='epraccur'
epraccur= getData(EPRACCUR,dates=[10,11,15,16])

Trying https://digital.nhs.uk/media/372/epraccur/zip/epraccur
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  609k  100  609k    0     0   330k      0  0:00:01  0:00:01 --:--:--  330k
Archive:  downloads/epraccur.zip
  inflating: data/epraccur/epraccur.csv  
  inflating: data/epraccur/epraccur.pdf  


In [12]:
epraccur.columns

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
           dtype='int64')

In [13]:
#Update the column names
#Really, we should do this by loading in the Excel version of the file
#and then extracting the metadata from the spreadsheet to identify the column names
#The following information is extracted from the metadata PDF
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Status Code','Organisation Sub-Type code',
      'Commissioner','Join Provider/Purchaser Date','Left Provider/Purchaser Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Provider/Purchaser','Null','Prescribing Setting','Null']
#Set the column names
epraccur.columns=cols

In [14]:
codes={}

codes['Status Code']={"A": "Active","C": "Closed", "D": "Dormant", "P": "Proposed"}

codes['Organisation Sub-Type code']={"B": "Allocated to a Provider/Purchaser Organisation",
                                     "Z": "Not allocated to a Provider/Purchaser Organisation"}
codes['Prescribing Setting']={"0":"Other", "1":"WIC Practice", "2":"OOH Practice", 
                              "3":"WIC + OOH Practice", "4":"GP Practice", 
                              "8":"Public Health Service", "9":"Community Health Service", 
                              "10":"Hospital Service", "11":"Optometry Service", 
                              "12":"Urgent & Emergency Care", "13":"Hospice", 
                              "14": "Care Home / Nursing Home", "15":"Border Force",
                              "16":"Young Offender Institution", "17":"Secure Training Centre",
                              "18":"Secure Children's Home", "19": "Immigration Removal Centre",
                              "20":"Court", "21":"Police Custody",
                              "22":"Sexual Assault Referral Centre (SARC)", "24":"Other – Justice Estate",
                              "25":"Prison",}

for col in codes:
    epraccur[col+" Value"]=epraccur[col].astype(str).map(codes[col])

In [15]:
#Drop the "Available for future use" columns
epraccur.drop('Null', axis=1, inplace=True)
#preview the data
epraccur.head(3)

Unnamed: 0,Organisation Code,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,...,Commissioner,Join Provider/Purchaser Date,Left Provider/Purchaser Date,Contact Telephone Number,Amended Record Indicator,Provider/Purchaser,Prescribing Setting,Status Code Value,Organisation Sub-Type code Value,Prescribing Setting Value
0,A81001,THE DENSHAM SURGERY,Y54,Q74,THE HEALTH CENTRE,LAWSON STREET,STOCKTON-ON-TEES,CLEVELAND,,TS18 1HU,...,00K,2013-04-01,NaT,01642 672351,0,00K,4,Active,Allocated to a Provider/Purchaser Organisation,GP Practice
1,A81002,QUEENS PARK MEDICAL CENTRE,Y54,Q74,QUEENS PARK MEDICAL CTR,FARRER STREET,STOCKTON ON TEES,CLEVELAND,,TS18 2AW,...,00K,2013-04-01,NaT,01642 679681,0,00K,4,Active,Allocated to a Provider/Purchaser Organisation,GP Practice
2,A81003,VICTORIA MEDICAL PRACTICE,Y54,Q74,THE HEALTH CENTRE,VICTORIA ROAD,HARTLEPOOL,CLEVELAND,,TS26 8DB,...,00K,2013-04-01,NaT,01429 272945,0,00K,4,Dormant,Allocated to a Provider/Purchaser Organisation,GP Practice


In [16]:
#Example showing how to filter on Parent Organisation Code
epraccur[epraccur['Commissioner']=='10L'].head(3)

Unnamed: 0,Organisation Code,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,...,Commissioner,Join Provider/Purchaser Date,Left Provider/Purchaser Date,Contact Telephone Number,Amended Record Indicator,Provider/Purchaser,Prescribing Setting,Status Code Value,Organisation Sub-Type code Value,Prescribing Setting Value
5257,J84003,VENTNOR MEDICAL CENTRE,Y57,Q70,VENTNOR MEDICAL CENTRE,3 ALBERT STREET,VENTNOR,ISLE OF WIGHT,,PO38 1EZ,...,10L,2013-04-01,NaT,01983 857288,0,10L,4,Active,Allocated to a Provider/Purchaser Organisation,GP Practice
5258,J84004,EAST COWES MEDICAL CENTRE,Y57,Q70,EAST COWES MEDICAL CENTRE,CHURCH PATH,EAST COWES,ISLE OF WIGHT,,PO32 6RP,...,10L,2013-04-01,NaT,01983 284333,0,10L,4,Active,Allocated to a Provider/Purchaser Organisation,GP Practice
5259,J84005,ESPLANADE SURGERY,Y57,Q70,THE ESPLANADE SURGERY,19 THE ESPLANADE,RYDE,ISLE OF WIGHT,,PO33 2EH,...,10L,2013-04-01,NaT,01983 618388,0,10L,4,Active,Allocated to a Provider/Purchaser Organisation,GP Practice


### Storing the Data in a SQLite3 Database
If we store several administrative files in the same database, we can run linked queries over them using SQL.

In [17]:
tmp=epraccur.set_index(['Organisation Code'])
#If the table exists, replace it, under the assumption we are using a more recent version of the data
tmp.to_sql(con=con, name=EPRACCUR,if_exists='replace')

  chunksize=chunksize, dtype=dtype)


In [18]:
#We can now run a SQL query over the data
orgcode='J84007'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Organisation Code"="{orgcode}"'.format(typ=EPRACCUR,orgcode=orgcode), con)

Unnamed: 0,Organisation Code,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,...,Commissioner,Join Provider/Purchaser Date,Left Provider/Purchaser Date,Contact Telephone Number,Amended Record Indicator,Provider/Purchaser,Prescribing Setting,Status Code Value,Organisation Sub-Type code Value,Prescribing Setting Value
0,J84007,ST.HELENS MEDICAL CENTRE,Y57,Q70,ST.HELENS MEDICAL CENTRE,UPPER GREEN ROAD,ST.HELENS,ISLE OF WIGHT,,PO33 1UG,...,10L,2013-04-01 00:00:00,,01983 871828,0,10L,4,Active,Allocated to a Provider/Purchaser Organisation,GP Practice


## etrust - NHS Trusts and Trust Sites

In [19]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Null','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'GOR Code','Null','Null','Null']

codes={}

In [20]:
typ='etrust'

tmp=normaliser(typ,cols,dates='auto',index=['Organisation Code'], codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/350/etrust/zip/etrust
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  727k  100  727k    0     0  2094k      0 --:--:-- --:--:-- --:--:-- 2089k
Archive:  downloads/etrust.zip
  inflating: data/etrust/etrust.csv  
  inflating: data/etrust/etrust.pdf  


  chunksize=chunksize, dtype=dtype)


Unnamed: 0_level_0,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Contact Telephone Number,Amended Record Indicator,GOR Code
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
R1A,WORCESTERSHIRE HEALTH AND CARE NHS TRUST,Y55,Q77,ISAAC MADDOX HOUSE,SHRUB HILL INDUSTRIAL ESTATE,,WORCESTER,WORCESTERSHIRE,WR4 9RW,2011-07-01,NaT,,0,F
R1A01,PATHWAYS SUPPORT SERVICES,Y55,Q77,30A TENBY STREET,,,BIRMINGHAM,WEST MIDLANDS,B1 3EE,2011-07-01,NaT,,0,F
R1A02,QUEEN ELIZABETH HOSPITAL,Y55,Q77,EDGBASTON,,,BIRMINGHAM,WEST MIDLANDS,B15 2TH,2011-07-01,NaT,,0,F


In [21]:
tmp[tmp['Name'].str.lower().str.contains('Wight'.lower())].head(2)

Unnamed: 0_level_0,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Contact Telephone Number,Amended Record Indicator,GOR Code
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
R1F,ISLE OF WIGHT NHS TRUST,Y57,Q70,ST MARY'S HOSPITAL,PARKHURST ROAD,,NEWPORT,ISLE OF WIGHT,PO30 5TG,2012-04-01,NaT,,0,J
R1FHQ,ISLE OF WIGHT NHS - HQ,Y57,Q70,ST MARY'S HOSPITAL,PARKHURST ROAD,,NEWPORT,ISLE OF WIGHT,PO30 5TG,2012-04-01,NaT,,0,J


In [22]:
orgcode='R1F'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Organisation Code"="{orgcode}"'.format(typ=typ,orgcode=orgcode), con)

Unnamed: 0,Organisation Code,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Contact Telephone Number,Amended Record Indicator,GOR Code
0,R1F,ISLE OF WIGHT NHS TRUST,Y57,Q70,ST MARY'S HOSPITAL,PARKHURST ROAD,,NEWPORT,ISLE OF WIGHT,PO30 5TG,2012-04-01 00:00:00,,,0,J


## eccg - Clinical Commissioning Groups

In [23]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Null','Null','Null','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={'Organisation Sub-Type Code':{"H CB": "Commissioning hub", "C": "Standard CCG"}}

In [24]:
typ='eccg'

tmp=normaliser(typ,cols,dates='auto',index=['Organisation Code'], codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/354/eccg/zip/eccg
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 19896  100 19896    0     0   103k      0 --:--:-- --:--:-- --:--:--  103k
Archive:  downloads/eccg
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
Archive:  downloads/eccg.zip
  inflating: data/eccg/eccg.csv      
  inflating: data/eccg/eccg.pdf      


  chunksize=chunksize, dtype=dtype)


Unnamed: 0_level_0,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Organisation Sub-Type Code,Amended Record Indicator,Organisation Sub-Type Code Value
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
00C,NHS DARLINGTON CCG,Y54,Q74,DR PIPER HOUSE,KING STREET,,DARLINGTON,COUNTY DURHAM,DL3 6JL,2013-04-01,NaT,C,0,Standard CCG
00D,"NHS DURHAM DALES, EASINGTON AND SEDGEFIELD CCG",Y54,Q74,SEDGEFIELD COMMUNITY HOSPITAL,SALTERS LANE,SEDGEFIELD,STOCKTON-ON-TEES,CLEVELAND,TS21 3EE,2013-04-01,NaT,C,0,Standard CCG
00J,NHS NORTH DURHAM CCG,Y54,Q74,RIVERGREEN CENTRE,AYKLEY HEADS,,DURHAM,COUNTY DURHAM,DH1 5TS,2013-04-01,NaT,C,0,Standard CCG


In [25]:
tmp[tmp['Name'].str.lower().str.contains('Wight'.lower())]

Unnamed: 0_level_0,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Organisation Sub-Type Code,Amended Record Indicator,Organisation Sub-Type Code Value
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10L,NHS ISLE OF WIGHT CCG,Y57,Q70,SOUTH BLOCK,ST MARY'S HOSPITAL,PARKHURST ROAD,NEWPORT,ISLE OF WIGHT,PO30 5TG,2013-04-01,NaT,C,0,Standard CCG


In [26]:
orgcode='10L'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Organisation Code"="{orgcode}"'.format(typ=typ,orgcode=orgcode), con)

Unnamed: 0,Organisation Code,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Organisation Sub-Type Code,Amended Record Indicator,Organisation Sub-Type Code Value
0,10L,NHS ISLE OF WIGHT CCG,Y57,Q70,SOUTH BLOCK,ST MARY'S HOSPITAL,PARKHURST ROAD,NEWPORT,ISLE OF WIGHT,PO30 5TG,2013-04-01 00:00:00,,C,0,Standard CCG


In [27]:
#We can now see the benefit of having data from mulitple source data files in the same database
#For example, we can run queries across joined tables such as finding GP Practices by CCG
ccg='NHS ISLE OF WIGHT CCG'
q='''
SELECT epraccur."Organisation Code" AS code, epraccur.Name AS Name 
FROM eccg, epraccur 
WHERE eccg.Name="{}" AND eccg."Organisation Code"=epraccur.Commissioner'''

pd.read_sql_query(q.format(ccg), con)

Unnamed: 0,code,Name
0,J84003,VENTNOR MEDICAL CENTRE
1,J84004,EAST COWES MEDICAL CENTRE
2,J84005,ESPLANADE SURGERY
3,J84007,ST.HELENS MEDICAL CENTRE
4,J84008,ARGYLL HOUSE
5,J84010,SHANKLIN MEDICAL CENTRE
6,J84011,CARISBROOKE HEALTH CENTRE
7,J84012,TOWER HOUSE SURGERY
8,J84013,SANDOWN HEALTH CENTRE
9,J84014,THE DOWER HOUSE


## eccgsite - CCG Sites

In [28]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={}

In [29]:
typ='eccgsite'

tmp=normaliser(typ,cols,dates='auto',index=['Organisation Code'], codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/353/eccgsite/zip/eccgsite
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 68140  100 68140    0     0   295k      0 --:--:-- --:--:-- --:--:--  295k
Archive:  downloads/eccgsite.zip
  inflating: data/eccgsite/eccgsite.csv  
  inflating: data/eccgsite/eccgsite.pdf  


  chunksize=chunksize, dtype=dtype)


Unnamed: 0_level_0,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Parent Organisation Code,Join Parent Date,Left Parent Date,Amended Record Indicator
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
00CAA,NHS DARLINGTON CCG HQ,Y54,Q74,DR PIPER HOUSE,KING STREET,,DARLINGTON,COUNTY DURHAM,DL3 6JL,2013-04-01,NaT,00C,2013-04-01,NaT,0
00DAA,"NHS DURHAM DALES, EASINGTON AND SEDGEFIELD HQ",Y54,Q74,SEDGEFIELD COMMUNITY HOSPITAL,SALTERS LANE,SEDGEFIELD,STOCKTON-ON-TEES,CLEVELAND,TS21 3EE,2013-04-01,NaT,00D,2013-04-01,NaT,0
00JAA,NHS NORTH DURHAM CCG HQ,Y54,Q74,THE RIVERGREEN CENTRE,AYKLEY HEADS,,DURHAM,COUNTY DURHAM,DH1 5TS,2013-04-01,NaT,00J,2013-04-01,NaT,0


## epcmem - Current and historical records of membership of CCGs, Primary Care Trusts, Primary Care Groups by General Medical Practice

In [30]:
cols=['Organisation Code','Parent Organisation Code',
'Parent Organisation Type','Join Parent Date','Left Parent Date','Amended Record Indicator']

codes={"Parent Organisation Type":{"W": "Primary Care Organisation"}}

In [31]:
typ='epcmem'
tmp=normaliser(typ,cols,dates=[3,4],index=['Organisation Code'], codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/378/epcmem/zip/epcmem
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  204k  100  204k    0     0   817k      0 --:--:-- --:--:-- --:--:--  819k
Archive:  downloads/epcmem.zip
  inflating: data/epcmem/epcmem.csv  
  inflating: data/epcmem/epcmem.pdf  


  chunksize=chunksize, dtype=dtype)


Unnamed: 0_level_0,Parent Organisation Code,Parent Organisation Type,Join Parent Date,Left Parent Date,Amended Record Indicator,Parent Organisation Type Value
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A81001,4QP36,W,1999-04-01,2001-03-31,0,Primary Care Organisation
A81001,5E1,W,2001-04-01,2013-03-31,0,Primary Care Organisation
A81001,00K,W,2013-04-01,NaT,0,Primary Care Organisation


In [32]:
orgcode='A81001'
pd.read_sql_query('SELECT * from {typ} where "Organisation Code"="{orgcode}"'.format(typ=typ,orgcode=orgcode), con)

Unnamed: 0,Organisation Code,Parent Organisation Code,Parent Organisation Type,Join Parent Date,Left Parent Date,Amended Record Indicator,Parent Organisation Type Value
0,A81001,4QP36,W,1999-04-01 00:00:00,2001-03-31 00:00:00,0,Primary Care Organisation
1,A81001,5E1,W,2001-04-01 00:00:00,2013-03-31 00:00:00,0,Primary Care Organisation
2,A81001,00K,W,2013-04-01 00:00:00,,0,Primary Care Organisation


In [33]:
#Example:
#Look up the history of parent organisations for a particular practice
gp='VENTNOR MEDICAL CENTRE'

q='''
SELECT epraccur."Organisation Code" AS code, epraccur.Name AS Name, epcmem."Parent Organisation Code"
FROM epcmem, epraccur 
WHERE epraccur.Name="{}" AND epcmem."Organisation Code"=epraccur."Organisation Code"'''

pd.read_sql_query(q.format(gp), con)

#More work needs to be done here
# eg checking the Parent Organisation Type and then using this to look up the appropriate Parent Organsation Code

Unnamed: 0,code,Name,Parent Organisation Code
0,J84003,VENTNOR MEDICAL CENTRE,4NG74
1,J84003,VENTNOR MEDICAL CENTRE,5DG
2,J84003,VENTNOR MEDICAL CENTRE,5QT
3,J84003,VENTNOR MEDICAL CENTRE,10L


## epracmem - current and historical records of membership of practices by GPs

In [34]:
cols=['Practitioner Code','Parent Organisation Code','Parent Organisation Type','Join Parent Date',
      'Left Parent Date','Amended Record Indicator']

codes={}

In [35]:
typ='epracmem'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/379/epracmem/zip/epracmem
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1221k  100 1221k    0     0  2941k      0 --:--:-- --:--:-- --:--:-- 2942k
Archive:  downloads/epracmem.zip
  inflating: data/epracmem/epracmem.csv  
  inflating: data/epracmem/epracmem.pdf  


  chunksize=chunksize, dtype=dtype)


Unnamed: 0_level_0,Parent Organisation Code,Parent Organisation Type,Join Parent Date,Left Parent Date,Amended Record Indicator
Practitioner Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G0102005,H81600,P,1974-04-01,1991-04-01,0
G0102926,D81001,P,1974-04-01,1991-12-31,0
G0105912,E83600,P,1974-04-01,1992-07-31,0


In [36]:
tmp[tmp['Parent Organisation Code']=='J84020']

Unnamed: 0_level_0,Parent Organisation Code,Parent Organisation Type,Join Parent Date,Left Parent Date,Amended Record Indicator
Practitioner Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G3335046,J84020,P,1974-04-01,2006-05-17,0
G3370324,J84020,P,1974-04-01,2006-04-01,0
G8337043,J84020,P,2003-07-07,2008-09-30,0
G8549718,J84020,P,2006-03-20,NaT,0
G8637358,J84020,P,2006-05-02,NaT,0
G9508552,J84020,P,1995-08-14,NaT,0
G9710832,J84020,P,1997-11-17,2005-04-30,0


In [37]:
#Example - current GP codes by practice
gp='VENTNOR MEDICAL CENTRE'

q='''
SELECT epraccur."Organisation Code" AS code, epraccur.Name AS Name, epracmem."Practitioner Code",
        epracmem."Join Parent Date",epracmem."Left Parent Date"
FROM epracmem, epraccur 
WHERE epraccur.Name="{}" AND epracmem."Parent Organisation Code"=epraccur."Organisation Code"
      AND epracmem."Left Parent Date" is NULL '''

pd.read_sql_query(q.format(gp), con)


Unnamed: 0,code,Name,Practitioner Code,Join Parent Date,Left Parent Date
0,J84003,VENTNOR MEDICAL CENTRE,G7105823,2010-04-05 00:00:00,
1,J84003,VENTNOR MEDICAL CENTRE,G8613161,1986-10-05 00:00:00,
2,J84003,VENTNOR MEDICAL CENTRE,G9142387,2014-04-01 00:00:00,
3,J84003,VENTNOR MEDICAL CENTRE,G9500499,1995-01-03 00:00:00,
4,J84003,VENTNOR MEDICAL CENTRE,G9544343,2015-10-05 00:00:00,


## egdpprac - Dental Surgeries

In [38]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Status Code','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={}

codes["Status Code"]={"A": "Active", "C": "Closed", "D": "Dormant", "P": "Proposed"}

codes['Organisation Sub-Type Code']={"D": "NHS and Private Dental Practice", "P": "Private Only Dental Practice"}

In [39]:
typ='egdpprac'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/466/egdpprac/zip/egdpprac
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  367k  100  367k    0     0  1276k      0 --:--:-- --:--:-- --:--:-- 1275k
Archive:  downloads/egdpprac.zip
  inflating: data/egdpprac/egdpprac.csv  
  inflating: data/egdpprac/egdpprac.pdf  


  chunksize=chunksize, dtype=dtype)


Unnamed: 0_level_0,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Status Code,Organisation Sub-Type Code,Parent Organisation Code,Join Parent Date,Left Parent Date,Amended Record Indicator,Status Code Value,Organisation Sub-Type Code Value
Organisation Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
V00002,DENTAL SURGERY,Y52,Q37,DENTAL SURGERY,22 MARTYRS AVENUE,CRAWLEY,WEST SUSSEX,,RH11 7RZ,2008-04-01,2009-03-31,C,D,5P6,2008-04-01,2009-03-31,0,Closed,NHS and Private Dental Practice
V00003,CRABTREE ROAD DENTAL PRACTICE,Y57,Q81,CRABTREE ROAD DENTAL PRACTICE,25 CRABTREE ROAD,CRAWLEY,WEST SUSSEX,,RH11 7HL,2006-04-01,NaT,A,D,14G,2015-04-01,NaT,0,Active,NHS and Private Dental Practice
V00004,DENTAL SURGERY,Y57,Q81,DENTAL SURGERY,WOODCROFT,BEPTON ROAD,MIDHURST,WEST SUSSEX,GU29 9HH,2006-04-01,NaT,A,D,14G,2015-04-01,NaT,0,Active,NHS and Private Dental Practice


In [40]:
area='VENTNOR'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Address Line 3"="{area}"'.format(typ=typ,area=area), con)

Unnamed: 0,Organisation Code,Name,National Grouping,High Level Health Geography,Address Line 1,Address Line 2,Address Line 3,Address Line 4,Address Line 5,Postcode,Open Date,Close Date,Status Code,Organisation Sub-Type Code,Parent Organisation Code,Join Parent Date,Left Parent Date,Amended Record Indicator,Status Code Value,Organisation Sub-Type Code Value
0,V06499,DENTAL SURGERY,Y57,Q70,DENTAL SURGERY,4 CHURCH STREET,VENTNOR,ISLE OF WIGHT,,PO38 1SW,2006-04-01 00:00:00,,A,D,13N,2013-04-01 00:00:00,,0,Active,NHS and Private Dental Practice
1,V06685,DENTAL SURGERY,Y57,Q70,DENTAL SURGERY,42 HIGH STREET,VENTNOR,ISLE OF WIGHT,,PO38 1RZ,2006-04-01 00:00:00,,A,D,13N,2013-04-01 00:00:00,,0,Active,NHS and Private Dental Practice


## egpcur - Current General Medical Practitioners (GPs) 

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Status Code','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Current Care Organisation','Null','Null','Null']

codes={}

codes['Status Code']={"A": "Active", "B": "Retired", "C": "Closed", "P": "Proposed"}

codes['Organisation Sub-Type Code']={"P": "Principal/Senior GP at practice",
                                     "O": "Other GP in practice (not Principal/Senior GP)"}


In [None]:
typ='egpcur'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

Trying https://digital.nhs.uk/media/370/egpcur/zip/egpcur
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 46 4823k   46 2224k    0     0  2217k      0  0:00:02  0:00:01  0:00:01 2219k

In [None]:
gp='VENTNOR MEDICAL CENTRE'

q='''
SELECT epraccur."Organisation Code" AS code, epraccur.Name AS Name, egpcur."Organisation Code",
        egpcur."Name",egpcur."Join Parent Date",egpcur."Left Parent Date"
FROM egpcur, epraccur 
WHERE epraccur.Name="{}" AND egpcur."Parent Organisation Code"=epraccur."Organisation Code" '''

pd.read_sql_query(q.format(gp), con)

## egparc - Archived GPs

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Status Code','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes['Status Code']={"A": "Active", "C": "Closed", "P": "Proposed"}

codes['Organisation Sub-Type Code']={"P": "Principal/Senior GP at practice",
                                     "O": "Other GP in practice (not Principal/Senior GP)"}

In [None]:
typ='egparc'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## epracarc - Archived GP Practices

In [None]:
EPRACARC='epracarc'
epracarc=getData(EPRACARC,dates=[10,11,15,16])

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Status Code','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Practice Type','Null']

codes={}

codes['Status Code']={"A": "Active", "C": "Closed", "D": "Dormant", "P": "Proposed"}

codes['Organisation Sub-Type Code']={"B": "Allocated to a PCT or Care Trust",
                                     "Z": "Not allocated to a PCT or Care Trust"}

In [None]:
typ='epracarc'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## ehospice - Hospices

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Null','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={'Organisation Sub-Type Code':{"H": "Hospice"}}

In [None]:
typ='ehospice'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
area='HUDDERSFIELD'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Address Line 4"="{area}"'.format(typ=typ,area=area), con)

## epharmacyhq - Pharmacy Headquarters

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Null','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={}

In [None]:
typ='epharmacyhq'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
name='BOOTS'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Name" LIKE "%{name}%"'.format(typ=typ,name=name), con)

## edispensary - Dispensaries

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Status Code','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Current Care Organisation','Null','Null','Null']

codes={}

codes['Status Code']={"A": "Active","C": "Closed", "P": "Proposed"}
codes["Organisation Sub-Type Code"]={"1": "Pharmacy", "2": "Appliance Contractor",
                                     "3": "Oxygen concentrator supplier"}

In [None]:
typ='edispensary'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
area='ISLE OF WIGHT'
q='''
SELECT  edispensary.Name AS dispensaryName, edispensary."Address Line 3", epharmacyhq.Name AS parentName
FROM edispensary,epharmacyhq 
WHERE edispensary."Address Line 4"="{area}"
AND edispensary."Parent Organisation Code" = epharmacyhq."Organisation Code" LIMIT 5
'''
pd.read_sql_query(q.format(area=area), con)

## enurse - Nurse Prescribers

In [None]:
cols=['Nurse Type','Parent Organisation Code','Nurse PIN',
      'Open Date','Close Date','Title','Initials','Surname',
      'Address1','Address2','Address3','Address4','Address5','Postcode',
      'Telephone Number','Senior Partner Name',
      'Current Care Organisation Code','Name','Name manipulation indicator',
      'Qualification indicator']

codes={}

codes['Nurse Type']={"PN": "nurse employed by a practice", "CN": "nurse employed by a Community NHS Trust"}
codes['Qualification indicator']={"1": "District Nurse/Health Visitor",
                                  "2": "Extended Formulary nurse prescriber / Nurse supplementary prescriber"}

In [None]:
typ='enurse'
tmp=normaliser(typ,cols,dates="auto",index=['Nurse PIN'], codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
gp='VENTNOR MEDICAL CENTRE'

q='''
SELECT epraccur."Organisation Code" AS code, epraccur.Name AS Name, enurse."Surname",
        enurse."Name",enurse."Open Date",enurse."Close Date"
FROM enurse, epraccur 
WHERE epraccur.Name="{}" AND enurse."Parent Organisation Code"=epraccur."Organisation Code" '''

pd.read_sql_query(q.format(gp), con)

## epcdp - Private Controlled Drug Prescribers

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={}

codes['Organisation Sub-Type Code']={"1": "Private Doctor","2": "Private Nurse", "3": "Private Pharmacist",
                                     "4": "Private Optometrist", "5": "Private Physiotherapist",
                                     "6": "Private Radiographe", "7": "Private Podiatrist"}

In [None]:
typ='epcdp'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
area='RYDE'
pd.read_sql_query('SELECT * FROM {typ} WHERE "Address Line 3"="{area}"'.format(typ=typ,area=area), con)

## eabeydispgp - Abeyance and Dispersal GP

In [None]:
cols=['Organisation Code','Name','Null','Null',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Current Care Organisation','Null','Null','Null']

codes={'Organisation Sub-Type Code': {"A": "Abeyance","D": "Dispersal"}}

In [None]:
typ='eabeydispgp'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
gp='VENTNOR MEDICAL CENTRE'

q='''
SELECT epraccur."Organisation Code" AS code, epraccur.Name AS Name, 
        eabeydispgp."Name",eabeydispgp."Open Date",eabeydispgp."Close Date"
FROM eabeydispgp, epraccur 
WHERE epraccur.Name="{}" AND eabeydispgp."Parent Organisation Code"=epraccur."Organisation Code" '''

pd.read_sql_query(q.format(gp), con)

## ecarehomehq - Care Home Headquarters 



In [None]:
cols=['Organisation Code','Name','Null','Null',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Null','Null','Null','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Country']

codes={'Country': {"1": "England", "2": "Scotland", "3": "Wales", "4": "Northern Ireland", "5": "Isle of Man"}}

In [None]:
typ='ecarehomehq'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
pd.read_sql_query('SELECT * FROM {typ} LIMIT 3'.format(typ='ecarehomehq'), con)

## ecarehomesite - Care Home Sites

In [None]:
#via http://systems.digital.nhs.uk/data/ods/datadownloads/nonnhs
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Current Care Organisation','Null','Null','Country']

codes={'Country': {"1": "England", "2": "Scotland", "3": "Wales", "4": "Northern Ireland",
                   "5": "Isle of Man", "10": "Channel Islands"}}

In [None]:
typ='ecarehomesite'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
pd.read_sql_query('SELECT * FROM {typ} LIMIT 3'.format(typ='ecarehomesite'), con)

## ecarehomesucc - Care Home Successors

In [None]:
#via http://systems.digital.nhs.uk/data/ods/datadownloads/nonnhs
cols=['Organisation Code','Successor Organisation Code',
      'Successor Reason Code','Succession Effective Date','Succession Indicator']

codes={}

codes['Successor Reason Code']={"F": "FMR code change", "O": "Org. type change", "R": "Reconfiguration"}
codes['Succession Indicator']={np.NaN: "No further succession", "F": "Further succession", "X": "Closed succession"}

In [None]:
typ='ecarehomesucc'
tmp=normaliser(typ,cols,dates="auto",index=['Organisation Code','Successor Organisation Code'],
               codes=codes, encoding='Latin-1',db_con=con)

tmp.head(3)

In [None]:
pd.read_sql_query('SELECT * FROM {typ} LIMIT 3'.format(typ='ecarehomesucc'), con)

## ephp - Independent Sector Healthcare Providers 

In [None]:
#via http://systems.digital.nhs.uk/data/ods/datadownloads/nonnhs
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Null','Null','Null','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={}

In [None]:
typ='ephp'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
pd.read_sql_query('SELECT * FROM {typ} WHERE Name LIKE "%Virgin%"'.format(typ='ephp'), con)

## ephpsite - Independent Sector Healthcare Provider Sites

In [None]:
#via http://systems.digital.nhs.uk/data/ods/datadownloads/nonnhs
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Parent Organisation Code','Null','Null','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={'Organisation Sub-Type Code': {"A": "Social Enterprise", "M": "Treatment Centre",
                                      "E": "Independent Sector Hospital", 
                                      "R": "Registered under Part 2 of the Care Standards Act 2000"}}

In [None]:
typ='ephpsite'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

In [None]:
pd.read_sql_query('SELECT * FROM {typ} WHERE "Parent Organisation Code" IN (SELECT "Organisation Code" FROM ephp WHERE Name LIKE "%Virgin%") LIMIT 3'.format(typ='ephpsite'), con)

In [None]:
pd.read_sql_query('SELECT * FROM {typ} WHERE "Address Line 5" LIKE "%WIGHT%"'.format(typ='ephpsite'), con)
#Alternatively do it by postcode?
#How do the Organisation Codes reconcile with other flavours of Organisation Code for the same establishment?

## enonnhs - Non-NHS Organisations

In [None]:
#via http://systems.digital.nhs.uk/data/ods/datadownloads/nonnhs
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Null','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={'Organisation Sub-Type Code': {"R": "Registered Non-NHS Provider", "N": "Non-Registered Non-NHS Provider"}}

In [None]:
typ='enonnhs'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## eprison - Prisons in England and Wales

In [None]:
cols=['Organisation Code','Name','Null','Null',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Null','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Null','Null']

codes={}

typ='eprison'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## eschools - Schools in England

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Null',
      'Local Authority','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Current Care Organisation','Type of Establishment','Null','Null']

codes={'Type of Establishment': {"01": "COMMUNITY SCHOOL", "02": "VOLUNTARY AIDED SCHOOL", 
                                 "03": "VOLUNTARY CONTROLLED SCHOOL", "05": "FOUNDATION SCHOOL", 
                                 "06": "CITY TECHNOLOGY COLLEGE", "07": "COMMUNITY SPECIAL SCHOOL", 
                                 "08": "NON-MAINTAINED SPECIAL SCHOOL",
                                 "1": "COMMUNITY SCHOOL", "2": "VOLUNTARY AIDED SCHOOL", 
                                 "3": "VOLUNTARY CONTROLLED SCHOOL", "5": "FOUNDATION SCHOOL", 
                                 "6": "CITY TECHNOLOGY COLLEGE", "7": "COMMUNITY SPECIAL SCHOOL", 
                                 "8": "NON-MAINTAINED SPECIAL SCHOOL","10": "OTHER INDEPENDENT SPECIAL SCHOOL",
                                 "11": "OTHER INDEPENDENT SCHOOL", "12": "FOUNDATION SPECIAL SCHOOL", 
                                 "14": "PUPIL REFERRAL UNIT", "15": "LA NURSERY SCHOOL", 
                                 "18": "FURTHER EDUCATION", "24": "SECURE UNITS", "25": "OFFSHORE SCHOOLS",
                                 "26": "SERVICE CHILDRENS EDUCATION", "27": "MISCELLANEOUS", 
                                 "28": "ACADEMY SPONSOR LED", "29": "HIGHER EDUCATION INSTITUTIONS", 
                                 "30": "WELSH ESTABLISHMENT", "31": "SIXTH FORM CENTRES", 
                                 "32": "SPECIAL POST 16 INSTITUTION", "33": "ACADEMY SPECIAL SPONSOR LED",
                                 "34": "ACADEMY CONVERTER", "35": "FREE SCHOOLS", 
                                 "36": "FREE SCHOOLS SPECIAL", "37": "BRITISH SCHOOLS OVERSEAS", 
                                 "38": "FREE SCHOOLS - ALTERNATIVE PROVISION", "39": "FREE SCHOOLS - 16-19",
                                 "40": "UNIVERSITY TECHNICAL COLLEGE", "41": "STUDIO SCHOOLS", 
                                 "42": "ACADEMY ALTERNATIVE PROVISION CONVERTER",
                                 "43": "ACADEMY ALTERNATIVE PROVISION SPONSOR LED",
                                 "44": "ACADEMY SPECIAL CONVERTER", "45": "ACADEMY 16-19 CONVERTER", 
                                 "46": "ACADEMY 16-19 SPONSOR LED", "47": "CHILDREN'S CENTRE", 
                                 "48": "CHILDREN'S CENTRE LINKED SITE", 
                                 "56": "INSTITUTION FUNDED BY OTHER GOVERNMENT DEPARTMENT"}}

typ='eschools'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## ejustice - Health and Justice Organisations

In [None]:
typ='ejustice'

cols=['Organisation Code','Justice Estate Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Residency Indicator',
      'Parent Organisation Code','Null','Null','Null',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'Null','Null','Primary Role','Null']

codes={}

codes['Residency Indicator']={'RI': 'Residential Institution', 'RJ': 'Non-Residential Institution'}

codes['Primary Role']={'PN': 'Prison', 'JA': 'Young Offender Institution', 'JB': 'Secure Training Centre',
                       'JC': "Secure Children's Home", 'JD': 'Immigration Removal Centre', 'JE': 'Constabulary',
                       'JF': 'Police Custody Suite', 'JG': 'Courts', 'JH': 'Sexual Assault Referral Centre' 
}


In [None]:
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## ecare - Care Trusts and Sites

In [None]:
cols=['Organisation Code','Name','National Grouping','High Level Health Geography',
      'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode',
      'Open Date','Close Date','Null','Organisation Sub-Type Code',
      'Null','Null','Null','Contact Telephone Number',
      'Null','Null','Null',
      'Amended Record Indicator','Null',
      'GOR Code','Null','Null','Null']

codes={}
codes['Organisation Sub-Type Code']={'P': 'PCT Derived', 'T': 'NHS Trust Derived'}


typ='ecare'
tmp=normaliser(typ,cols,dates="auto",index="auto", codes=codes, encoding='Latin-1',db_con=con)
tmp.head(3)

## SHOW DATABASE TABLES

In [None]:
q="SELECT name FROM sqlite_master WHERE type='table'"
pd.read_sql_query(q,con)