# Carole's playground

In [369]:
import sys
import requests
import json
import pandas as pd
from IPython.display import display, HTML

## Helpers

In [370]:
def prettyPrint(rawjson):
    print(json.dumps(rawjson, indent=2))
    
def prettyDfSize(df):
    print('Rows: {}, Columns: {}'.format(len(df), len(df.columns)))
    
def get(url):
    result = {
        'successful' : False,
        'raw' : None,
        'json' : None
    }
    r = requests.get(url)
    result['raw'] = r
    if(r.status_code != 200):
        print('Error returned for {}: {}'.format(r.url, r.status_code))
        print(r.text)
    else:
        result['successful'] = True
        result['json'] = json.loads(r.text)
    return result

In [371]:
apikey = 'BuiXohTN1i'
baseUrl = 'http://data.ons.gov.uk/ons/api/data'
apikKeyParam = 'apikey={}'.format(apikey);

url = 'http://data.ons.gov.uk/ons/api/data/.json?apikey=' + apikey

## Available contexts

In [372]:
url = '{}/contexts.json?{}'.format(baseUrl, apikKeyParam)
print(url)
response = get(url)
if response['successful'] == False:
    print('Error')
else:
    contexts = response['json']
    for context in contexts['ons']['contextList']['statisticalContext']:
        print(context)

http://data.ons.gov.uk/ons/api/data/contexts.json?apikey=BuiXohTN1i
{'contextName': 'Census', 'contextId': 1}
{'contextName': 'Socio-Economic', 'contextId': 2}
{'contextName': 'Economic', 'contextId': 3}
{'contextName': 'Social', 'contextId': 4}


## Concepts
The context must be set to Census

In [373]:
context = 'Census'
url = '{}/concepts.json?{}&context={}'.format(baseUrl, apikKeyParam, context)
url

'http://data.ons.gov.uk/ons/api/data/concepts.json?apikey=BuiXohTN1i&context=Census'

In [374]:
r = get(url)
if r['successful'] == 'False':
    print('Error')
else:
    concepts = [concept for concept in r['json']['ons']['conceptList']['concept']]
    temp = {'name':[], 'id':[], 'usage_number':[]}
    #prettyPrint(concepts)
    for concept in concepts:
        temp['id'].append(concept['id'])
        temp['usage_number'].append(concept['collectionCount'])
        temp['name'].append([name for name in concept['names']['name'] if name['@xml.lang'] == 'en'][0]['$'])
    conceptsdf = pd.DataFrame(temp)
    conceptsdf['id'] = conceptsdf['id'].astype(int)

#### List of concepts

In [375]:
print(url)
prettyDfSize(conceptsdf)
conceptsdf.sort_values(['name'])

http://data.ons.gov.uk/ons/api/data/concepts.json?apikey=BuiXohTN1i&context=Census
Rows: 58, Columns: 3


Unnamed: 0,id,name,usage_number
35,45,Accommodation type,19
11,1,Age,186
47,72,Age of arrival in the UK,9
34,46,Cars or vans,30
33,47,Central heating,11
32,48,Communal establishments,23
19,32,Country of birth,42
57,21,Demography,0
10,22,Dependent children,26
31,49,Dwellings,6


# Classifications

In [376]:
context = 'Census'
url = '{}/classifications.json?{}&context={}'.format(baseUrl, apikKeyParam, context)
url

'http://data.ons.gov.uk/ons/api/data/classifications.json?apikey=BuiXohTN1i&context=Census'

In [377]:
r = get(url)
if r['successful'] == False:
    print('Error')
else:
    classifications = [classification for classification in r['json']['ons']['classificationList']['classification']]
    temp = {'name':[], 'id':[], 'url':[]}
    temp['id'] = [classification['id'] for classification in classifications]
    temp['name'] = [[name['$'] for name in classification['names']['name'] if name['@xml.lang'] == 'en'][0] for classification in classifications]
    temp['url'] = [[url['href'] for url in classification['urls']['url'] if url['@representation'] == 'json'][0] for classification in classifications]
    classificationsdf = pd.DataFrame(temp)

#### List of classifications

In [378]:
prettyDfSize(classificationsdf)
classificationsdf.sort_values(['name'])

Rows: 546, Columns: 3


Unnamed: 0,id,name,url
521,CL_0000100,Ability to read Welsh language (T001A),classification/CL_0000100.json?apikey=BuiXohTN...
508,CL_0001361,Ability to speak Welsh (T003D),classification/CL_0001361.json?apikey=BuiXohTN...
147,CL_0000111,Ability to speak Welsh and age of dependent ch...,classification/CL_0000111.json?apikey=BuiXohTN...
408,CL_0000101,Ability to speak Welsh language (T001A),classification/CL_0000101.json?apikey=BuiXohTN...
223,CL_0000024,Ability to speak Welsh language (T003A),classification/CL_0000024.json?apikey=BuiXohTN...
68,CL_0000102,Ability to understand spoken Welsh language (T...,classification/CL_0000102.json?apikey=BuiXohTN...
323,CL_0000103,Ability to write Welsh language (T001A),classification/CL_0000103.json?apikey=BuiXohTN...
107,CL_0000969,Accommodation type,classification/CL_0000969.json?apikey=BuiXohTN...
502,CL_0000551,Accommodation type (T003A),classification/CL_0000551.json?apikey=BuiXohTN...
82,CL_0000532,Accommodation type (T004A),classification/CL_0000532.json?apikey=BuiXohTN...


### Classification Details
#### Religion

In [379]:
def classificationDetails(classificationName):
    print(classificationName)
    filteredClassifications = classificationsdf[classificationsdf.name == classificationName]
    prettyDfSize(filteredClassifications)
    
    for index, classification in filteredClassifications.iterrows():
        urlarg = classification['url']
        url = '{}/{}'.format(baseUrl, urlarg)
        print(url)

        r = requests.get(url)
        if(r.status_code != 200):
            print('Error returned for {}: {}'.format(r.url, r.status_code))
            print(r.text)
        else:
            try:
                raw = json.loads(r.text)
                codes = [code for code in raw['Structure']['CodeLists']['CodeList']['Code']]
                temp = {'description':[], 'value':[]}
                temp['value'] = [code['@value'] for code in codes]
                temp['description'] = [[desc['$'] for desc in code['Description'] if desc['@xml.lang'] == 'en'][0] for code in codes]
                codesdf = pd.DataFrame(temp)
                display(codesdf.sort_values(['description']))
            except TypeError:
                # saw that happening where Description is not an array for some reasons...
                print("TypeError:", sys.exc_info()[0])

In [380]:
df = classificationDetails('Religion')
df

Religion
Rows: 1, Columns: 3
http://data.ons.gov.uk/ons/api/data/classification/CL_0001053.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
6,All categories: Religion,CI_0018467
4,Buddhist,CI_0018478
3,Christian,CI_0018474
2,Hindu,CI_0018482
1,Jewish,CI_0018485
7,Muslim,CI_0018489
0,No religion,CI_0018500
5,Other religion,CI_0018496
8,Religion not stated,CI_0018504
9,Sikh,CI_0018493


In [381]:
df = classificationDetails('Age')
df

Age
Rows: 11, Columns: 3
http://data.ons.gov.uk/ons/api/data/classification/CL_0001091.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
5,Age 16 to 24,CI_0018621
1,Age 25 to 34,CI_0018622
4,Age 35 to 49,CI_0018623
2,Age 50 to 64,CI_0018624
0,Age 65 and over,CI_0018625
3,All categories: Age,CI_0018601


http://data.ons.gov.uk/ons/api/data/classification/CL_0001060.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
0,Age 0 to 15,CI_0018533
3,Age 16 to 24,CI_0018534
1,Age 25 to 49,CI_0018535
4,Age 50 to 64,CI_0018536
2,Age 65 and over,CI_0018537
5,All categories: Age,CI_0018528


http://data.ons.gov.uk/ons/api/data/classification/CL_0000954.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
11,Age 0 to 4,CI_0017440
3,Age 10 to 14,CI_0017443
14,Age 15,CI_0017444
16,Age 16 to 17,CI_0017445
10,Age 18 to 19,CI_0017446
9,Age 20 to 24,CI_0017447
13,Age 25 to 29,CI_0017448
1,Age 30 to 44,CI_0017449
0,Age 45 to 59,CI_0017450
4,Age 5 to 7,CI_0017441


http://data.ons.gov.uk/ons/api/data/classification/CL_0001052.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
13,Age 1,CI_0018458
12,Age 10,CI_0018468
35,Age 11,CI_0018469
9,Age 12,CI_0018470
36,Age 13,CI_0018471
8,Age 14,CI_0018472
37,Age 15,CI_0018473
30,Age 16,CI_0018475
14,Age 17,CI_0018476
17,Age 18,CI_0018477


http://data.ons.gov.uk/ons/api/data/classification/CL_0001063.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
2,Age 0 to 15,CI_0018551
0,Age 16 to 49,CI_0018552
3,Age 50 to 64,CI_0018553
1,Age 65 and over,CI_0018554
4,All categories: Age,CI_0018550


http://data.ons.gov.uk/ons/api/data/classification/CL_0001036.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
1,Age 0 to 4,CI_0018326
6,Age 10 to 15,CI_0018328
14,Age 16 to 19,CI_0018329
13,Age 20 to 24,CI_0018330
17,Age 25 to 29,CI_0018331
18,Age 30 to 34,CI_0018332
16,Age 35 to 39,CI_0018333
12,Age 40 to 44,CI_0018334
2,Age 45 to 49,CI_0018335
8,Age 5 to 9,CI_0018327


http://data.ons.gov.uk/ons/api/data/classification/CL_0001057.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
0,Age 0 to 15,CI_0018519
3,Age 16 to 49,CI_0018520
1,Age 50 to 64,CI_0018521
2,Age 65 and over,CI_0018522
4,All categories: Age,CI_0018514


http://data.ons.gov.uk/ons/api/data/classification/CL_0001110.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
0,Age 16 to 24,CI_0018670
2,Age 25 to 34,CI_0018674
4,Age 35 to 49,CI_0018675
3,Age 50 and over,CI_0018676
5,Age 50 and over,CI_0018701
1,All categories: Age 16 and over,CI_0018677


http://data.ons.gov.uk/ons/api/data/classification/CL_0001274.json?apikey=BuiXohTN1i&context=Census
TypeError: <class 'TypeError'>
http://data.ons.gov.uk/ons/api/data/classification/CL_0001171.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
0,Age 16 to 19,CI_0018812
5,Age 20 to 24,CI_0018813
10,Age 25 to 29,CI_0018814
9,Age 30 to 34,CI_0018815
4,Age 35 to 39,CI_0018816
6,Age 40 to 44,CI_0018817
11,Age 45 to 49,CI_0018818
7,Age 50 to 54,CI_0018819
1,Age 55 to 59,CI_0018820
2,Age 60 to 64,CI_0018821


http://data.ons.gov.uk/ons/api/data/classification/CL_0001037.json?apikey=BuiXohTN1i&context=Census


Unnamed: 0,description,value
50,Age 16,CI_0018345
13,Age 17,CI_0018346
32,Age 18,CI_0018347
2,Age 19,CI_0018348
28,Age 20,CI_0018349
41,Age 21,CI_0018350
38,Age 22,CI_0018351
12,Age 23,CI_0018352
18,Age 24,CI_0018353
59,Age 25,CI_0018354


# Collections

In [382]:
context = 'Census'
datafilter = 'Religion'
url = '{}/collections.json?{}&context={}&find={}'.format(baseUrl, apikKeyParam, context, datafilter)
url

'http://data.ons.gov.uk/ons/api/data/collections.json?apikey=BuiXohTN1i&context=Census&find=Religion'

In [384]:
response = get(url)
if response['successful'] == False:
    print('Error')
else:
    print('Dataset for ', datafilter)
    content = response['json']
    temp = {'name' : [], 'description' : [], 'id' : [], 'url' : [], 'geohierarchy' : []}
    for collection in content['ons']['collectionList']['collection']:
        temp['name'].append([n for n in collection['names']['name'] if n['@xml.lang'] == 'en'][0]['$'])
        temp['description'].append(collection['description'])
        temp['id'].append(collection['id'])
        temp['url'].append([u for u in collection['urls']['url'] if u['@representation'] == 'json'][0]['href'])
        temp['geohierarchy'].append(collection['geographicalHierarchies'])
    df = pd.DataFrame(temp)
display(df.sort_values(['description']))    

Dataset for  Religion


Unnamed: 0,description,geohierarchy,id,name,url
0,collection DC1202EW,{'geographicalHierarchy': [{'differentiator': ...,DC1202EW,Household composition by religion of Household...,collectiondetails/DC1202EW.json?context=Census...
1,collection DC2107EW,{'geographicalHierarchy': [{'differentiator': ...,DC2107EW,Religion by sex by age,collectiondetails/DC2107EW.json?context=Census...
2,collection DC2201EW,{'geographicalHierarchy': [{'differentiator': ...,DC2201EW,Ethnic group by religion,collectiondetails/DC2201EW.json?context=Census...
3,collection DC2204EW,{'geographicalHierarchy': [{'differentiator': ...,DC2204EW,National identity by religion,collectiondetails/DC2204EW.json?context=Census...
4,collection DC2207EW,{'geographicalHierarchy': [{'differentiator': ...,DC2207EW,Country of birth by religion by sex,collectiondetails/DC2207EW.json?context=Census...
5,collection DC4204EW,{'geographicalHierarchy': [{'differentiator': ...,DC4204EW,Tenure by car or van availability by religion ...,collectiondetails/DC4204EW.json?context=Census...
6,collection DC4207EW,{'geographicalHierarchy': [{'differentiator': ...,DC4207EW,Dwelling type by type of central heating in ho...,collectiondetails/DC4207EW.json?context=Census...
7,collection DC4208EW,{'geographicalHierarchy': [{'differentiator': ...,DC4208EW,Dwelling type by type of central heating in ho...,collectiondetails/DC4208EW.json?context=Census...
8,collection DC4409EWla,{'geographicalHierarchy': {'differentiator': '...,DC4409EWla,Communal establishment management and type by ...,collectiondetails/DC4409EWla.json?context=Cens...
9,collection DC5204EW,{'geographicalHierarchy': [{'differentiator': ...,DC5204EW,Highest level of qualification by religion by age,collectiondetails/DC5204EW.json?context=Census...


# Datasets

In [385]:
context = 'Census'
url = '{}/datasets.json?{}&context={}&class=CL_0000083'.format(baseUrl, apikKeyParam, context)
url

'http://data.ons.gov.uk/ons/api/data/datasets.json?apikey=BuiXohTN1i&context=Census&class=CL_0000083'

In [386]:
response = get(url)
if response['successful'] == False:
    print('Error')
else:
    content = response['json']
    prettyPrint(content)
    

{
  "ons": {
    "linkedNodes": {
      "linkedNode": {
        "urls": {
          "url": [
            {
              "href": ".xml?apikey=BuiXohTN1i&context=Census",
              "@representation": "xml"
            },
            {
              "href": ".json?apikey=BuiXohTN1i&context=Census",
              "@representation": "json"
            },
            {
              "href": ".html?apikey=BuiXohTN1i&context=Census",
              "@representation": "html"
            }
          ]
        },
        "name": "Root",
        "relation": "parent"
      }
    },
    "datasetList": {
      "contexts": {
        "context": {
          "contextName": "Census",
          "datasets": {
            "dataset": [
              {
                "names": {
                  "name": [
                    {
                      "@xml.lang": "en",
                      "$": "Economic activity"
                    },
                    {
                      "@xml.lang": "cy",
       