## Get a dict of service names from the GDS Register


In [1]:
import requests
from pandas import read_csv
from io import StringIO
import os
import json

In [None]:
DATA_DIR = os.getenv("DATA_DIR")

We presume a dictionary is the preferred format for the lookup with both the key that works across registers.

In [22]:
def get_services_list(url='https://government-service.register.gov.uk/records.csv?page-size=5000'):
    """Get a list of str of Government services that have a government service domain on the GOV.UK website"""
    
    services = requests.get(url)
    services = services.text
    services = StringIO(services)
    df = read_csv(services)

    gov_services = list(df.hostname.values)

    return gov_services

In [23]:
len(get_services_list())

152

In [24]:
df.head()

Unnamed: 0,index-entry-number,entry-number,entry-timestamp,key,government-service,hostname,government-organisation,start-date,end-date
0,166,166,2018-12-03T14:59:59Z,1116,1116,recruit-apprentice,EO1216,,
1,165,165,2018-12-03T14:59:21Z,1054,1054,findapprenticeship,EO1216,,
2,164,164,2018-11-09T11:27:01Z,1148,1148,universal-credit,D10,,
3,163,163,2018-07-13T13:55:50Z,1198,1198,apply-divorce,EA73,2018-04-16,
4,162,162,2018-07-13T13:55:05Z,1197,1197,reply-jury-summons,EA73,2018-04-24,


In [25]:
def get_service_dict(url='https://government-service.register.gov.uk/records.csv?page-size=5000'):
    """Get a dictionary of Government Service host name and the responsible government-organisation.
    This let's us look up the id based on the domain hostname of the service.
    """
    orgs = requests.get(url)

    orgs = orgs.text
    orgs = StringIO(orgs)
    df = read_csv(orgs)

    # Set unique Register's org ID as key and name as value
    keys = df['hostname'].values
    values = df['government-organisation'].values
    dictionary = dict(zip(keys, values))

    gov_orgs_dict = dictionary

    return gov_orgs_dict

In [27]:
df = get_service_dict()

In [28]:
json.dumps(df)



In [32]:
with open(os.path.join(DATA_DIR, "services_from_registers_lookup.json"), 'w') as f: f.write(json.dumps(df))