# NomisWeb

It was a massive PITA tracking down these codes...

- TYPE150: 2021 output areas within England and Wales
- TYPE151: 2021 super output areas - lower layer within England and Wales
- TYPE152: 2021 super output areas - middle layer within England and Wales
- TYPE153: 2022 wards within England and Wales
- TYPE154: 2022 local authorities: districts within England and Wales
- TYPE155: 2022 local authorities: counties within England and Wales
- TYPE168: 2021 national parks within England and Wales
- TYPE423: local authorities: county / unitary (as of April 2023) within England and Wales
- TYPE424: local authorities: district / unitary (as of April 2023) within England and Wales
- TYPE459: local enterprise partnerships (as of April 2021) within England and Wales
- TYPE480: regions within England and Wales


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
from dotenv import load_dotenv

from nomisweb import Geographies, TableMetadata, build_geog_query, fetch, fetch_table
from utils import extract_crime_data

In [3]:
load_dotenv()
api_key = os.environ["NOMIS_API_KEY"]

table_name = "NM_2041_1"  # a random 2021 census table - age, ethnicity, sex

In [4]:
top_level_geogs = Geographies(**fetch(f"dataset/{table_name}/geography.def.sdmx.json", id=api_key))
top_level_geogs.to_dataframe()

Unnamed: 0_level_0,NomisCode,TypeName,TypeCode
GeogCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
K04000001,2092957703,countries,499
E92000001,2092957699,countries,499
W92000004,2092957700,countries,499


In [5]:
# list the available geography types

ew_geog_types = Geographies(**fetch(f"dataset/{table_name}/geography/2092957703.def.sdmx.json", id=api_key))
ew_geog_types.to_dataframe()

Unnamed: 0_level_0,NomisCode,TypeName,TypeCode,IsAbstractCode,ParentCode,ChildCount
GeogCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
K04000001,2092957703,countries,499,,,
,2092957703TYPE150,2021 output areas,150,True,2092958000.0,188880.0
,2092957703TYPE151,2021 super output areas - lower layer,151,True,2092958000.0,35672.0
,2092957703TYPE152,2021 super output areas - middle layer,152,True,2092958000.0,7264.0
,2092957703TYPE153,2022 wards,153,True,2092958000.0,7638.0
,2092957703TYPE154,2022 local authorities: districts,154,True,2092958000.0,331.0
,2092957703TYPE155,2022 local authorities: counties,155,True,2092958000.0,174.0
,2092957703TYPE168,2021 national parks,168,True,2092958000.0,13.0
,2092957703TYPE423,local authorities: county / unitary (as of Apr...,423,True,2092958000.0,175.0
,2092957703TYPE424,local authorities: district / unitary (as of A...,424,True,2092958000.0,318.0


In [6]:
# list specific geographies of a given type
ew_lads = Geographies(**fetch(f"dataset/{table_name}/geography/2092957703TYPE154.def.sdmx.json"))
ew_lads.to_dataframe()

Unnamed: 0_level_0,NomisCode,TypeName,TypeCode
GeogCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E06000001,645922819,2022 local authorities: districts,154
E06000002,645922820,2022 local authorities: districts,154
E06000003,645922822,2022 local authorities: districts,154
E06000004,645922823,2022 local authorities: districts,154
E06000005,645922817,2022 local authorities: districts,154
...,...,...,...
W06000020,645923145,2022 local authorities: districts,154
W06000021,645923146,2022 local authorities: districts,154
W06000022,645923147,2022 local authorities: districts,154
W06000023,645923132,2022 local authorities: districts,154


In [7]:
# codelist endpoint will give every supported geography for a given table (cached as its large)
geog_df = Path("./data/census2021geographies.parquet")
if not geog_df.exists():
    # seems like codelist endpoints dont like api keys
    all_geogs = Geographies(**fetch(f"codelist/CL_2041_1_GEOGRAPHY.def.sdmx.json")).to_dataframe()
    all_geogs.to_parquet(geog_df)
else:
    all_geogs = pd.read_parquet(geog_df)

all_geogs  # .TypeName.value_counts()

Unnamed: 0_level_0,NomisCode,TypeName,TypeCode
GeogCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E00060274,629202434,2021 output areas,150
E00060275,629202435,2021 output areas,150
E00060276,629202436,2021 output areas,150
E00060277,629202437,2021 output areas,150
E00060279,629202439,2021 output areas,150
...,...,...,...
E12000009,2013265929,regions,480
W92000004,2013265930,regions,480
K04000001,2092957703,countries,499
E92000001,2092957699,countries,499


## Get some crime data and the LSOAs they occur in 

In [8]:
crime_data = extract_crime_data("./data/wy202204-202503.zip")

In [9]:
available_lsoas = (
    Geographies(**fetch(f"dataset/{table_name}/geography/2092957703TYPE151.def.sdmx.json")).to_dataframe().NomisCode
)
lsoas = crime_data["LSOA code"].unique()
# FFS crime data has some 2011 LSOAs
lsoas = np.intersect1d(lsoas, available_lsoas.index)
lsoas

array(['E01005410', 'E01005414', 'E01005448', ..., 'E01035052',
       'E01035053', 'E01035054'], shape=(1433,), dtype=object)

In [15]:
table_name = "NM_2132_1"
nomis_area_codes = all_geogs.loc[lsoas, "NomisCode"].to_list()

params = {
    "date": "latest",
    "geography": build_geog_query(nomis_area_codes),
    "c2021_eth_20": "1001...1005",
    "c2021_age_6": "1...5",
    "c_sex": "1,2",
    "select": "geography_name,geography_code,c2021_eth_20_name,c2021_age_6_name,c_sex_name,obs_value",
    "uid": api_key,
}

data = fetch_table(table_name, **params)

In [16]:
data.C2021_ETH_20_NAME.unique()

array(['Asian, Asian British or Asian Welsh',
       'Black, Black British, Black Welsh, Caribbean or African',
       'Mixed or Multiple ethnic groups', 'White', 'Other ethnic group'],
      dtype=object)

In [17]:
# e.g. compare proportion of Black people in community to stop-and-search incidences

data["is_black"] = data.C2021_ETH_20_NAME.str.contains("Black")
lsoa_totals = data.groupby(["GEOGRAPHY_CODE", "GEOGRAPHY_NAME", "is_black"]).OBS_VALUE.sum().unstack(level="is_black")

proportion = lsoa_totals.apply(lambda r: r[True] / r.sum(), axis=1)
proportion

GEOGRAPHY_CODE  GEOGRAPHY_NAME 
E01005410       Oldham 006A        0.000668
E01005414       Oldham 006C        0.001498
E01005448       Oldham 012A        0.057352
E01005561       Rochdale 014D      0.004954
E01006881       St. Helens 012E    0.010497
                                     ...   
E01035050       Leeds 105G         0.090965
E01035051       Leeds 105H         0.045877
E01035052       Leeds 105I         0.037216
E01035053       Leeds 105J         0.016110
E01035054       Leeds 112F         0.039875
Length: 1433, dtype: float64