In [1]:
import requests
import zipfile
import tempfile
import os
import pandas as pd
import io
import json

### Data
Collected from Nomis website https://www.nomisweb.co.uk/sources/census_2021_bulk

In [2]:
data = [
    {"filename": "TS001", "description": "Number of usual residents in households and communal establishments", "year": 2021},
    {"filename": "TS002", "description": "Legal partnership status", "year": 2021},
    {"filename": "TS003", "description": "Household composition", "year": 2021},
    {"filename": "TS004", "description": "Country of birth", "year": 2021},
    {"filename": "TS004A", "description": "Country of Birth", "year": 2021},
    {"filename": "TS005", "description": "Passports held", "year": 2021},
    {"filename": "TS006", "description": "Population density", "year": 2021},
    {"filename": "TS007", "description": "Age by single year", "year": 2021},
    {"filename": "TS007A", "description": "Age by five-year age bands", "year": 2021},
    {"filename": "TS007B", "description": "Age by broad age bands", "year": 2021},
    {"filename": "TS008", "description": "Sex", "year": 2021},
    {"filename": "TS009", "description": "Sex by single year of age", "year": 2021},
    {"filename": "TS010", "description": "Living arrangements", "year": 2021},
    {"filename": "TS011", "description": "Households by deprivation dimensions", "year": 2021},
    {"filename": "TS012", "description": "Country of birth (detailed)", "year": 2021},
    {"filename": "TS013", "description": "Passports held (detailed)", "year": 2021},
    {"filename": "TS015", "description": "Year of arrival in UK", "year": 2021},
    {"filename": "TS016", "description": "Length of residence", "year": 2021},
    {"filename": "TS017", "description": "Household size", "year": 2021},
    {"filename": "TS018", "description": "Age of arrival in the UK", "year": 2021},
    {"filename": "TS019", "description": "Migrant Indicator", "year": 2021},
    {"filename": "TS020", "description": "Number of non-UK short-term residents by sex", "year": 2021},
    {"filename": "TS021", "description": "Ethnic group", "year": 2021},
    {"filename": "TS022", "description": "Ethnic group (detailed)", "year": 2021},
    {"filename": "TS023", "description": "Multiple ethnic group", "year": 2021},
    {"filename": "TS024", "description": "Main language (detailed)", "year": 2021},
    {"filename": "TS025", "description": "Household language", "year": 2021},
    {"filename": "TS026", "description": "Multiple main languages in household", "year": 2021},
    {"filename": "TS027", "description": "National identity - UK", "year": 2021},
    {"filename": "TS028", "description": "National identity (detailed)", "year": 2021},
    {"filename": "TS029", "description": "Proficiency in English", "year": 2021},
    {"filename": "TS030", "description": "Religion", "year": 2021},
    {"filename": "TS031", "description": "Religion (detailed)", "year": 2021},
    {"filename": "TS032", "description": "Welsh language skills (detailed)", "year": 2021},
    {"filename": "TS033", "description": "Welsh language skills (speaking)", "year": 2021},
    {"filename": "TS034", "description": "Welsh language skills (writing)", "year": 2021},
    {"filename": "TS035", "description": "Welsh language skills (reading)", "year": 2021},
    {"filename": "TS036", "description": "Welsh language skills (understanding)", "year": 2021},
    {"filename": "TS037", "description": "General health", "year": 2021},
    {"filename": "TS037ASP", "description": "General health - Age-standardised proportions", "year": 2021},
    {"filename": "TS038", "description": "Disability", "year": 2021},
    {"filename": "TS038ASP", "description": "Disability - Age-standardised proportions", "year": 2021},
    {"filename": "TS039", "description": "Provision of unpaid care", "year": 2021},
    {"filename": "TS039ASP", "description": "Provision of unpaid care - Age-standardised proportions", "year": 2021},
    {"filename": "TS040", "description": "Number of disabled people in the household", "year": 2021},
    {"filename": "TS041", "description": "Number of Households", "year": 2021},
    {"filename": "TS044", "description": "Accommodation type", "year": 2021},
    {"filename": "TS045", "description": "Car or van availability", "year": 2021},
    {"filename": "TS045A", "description": "Number of cars or vans in the area", "year": 2021},
    {"filename": "TS046", "description": "Central heating", "year": 2021},
    {"filename": "TS047", "description": "Communal establishment residents by age and sex", "year": 2021},
    {"filename": "TS048", "description": "Communal establishment management and type", "year": 2021},
    {"filename": "TS048NP", "description": "Communal establishment management and type - National Parks", "year": 2021},
    {"filename": "TS050", "description": "Number of bedrooms", "year": 2021},
    {"filename": "TS051", "description": "Number of rooms", "year": 2021},
    {"filename": "TS052", "description": "Occupancy rating for bedrooms", "year": 2021},
    {"filename": "TS053", "description": "Occupancy rating for rooms", "year": 2021},
    {"filename": "TS054", "description": "Tenure", "year": 2021},
    {"filename": "TS055", "description": "Purpose of second address", "year": 2021},
    {"filename": "TS056", "description": "Second address indicator", "year": 2021},
    {"filename": "TS058", "description": "Distance travelled to work", "year": 2021},
    {"filename": "TS059", "description": "Hours worked", "year": 2021},
    {"filename": "TS060", "description": "Industry", "year": 2021},
    {"filename": "TS060A", "description": "Industry", "year": 2021},
    {"filename": "TS060NP", "description": "Industry - National Parks", "year": 2021},
    {"filename": "TS061", "description": "Method used to travel to work", "year": 2021},
    {"filename": "TS062", "description": "NS-SeC", "year": 2021},
    {"filename": "TS063", "description": "Occupation", "year": 2021},
    {"filename": "TS064", "description": "Occupation - minor groups", "year": 2021},
    {"filename": "TS065", "description": "Employment history", "year": 2021},
    {"filename": "TS066", "description": "Economic activity status", "year": 2021},
    {"filename": "TS067", "description": "Highest level of qualification", "year": 2021},
    {"filename": "TS068", "description": "Schoolchildren and full-time students", "year": 2021},
    {"filename": "TS070", "description": "Gender identity (detailed)", "year": 2021},
    {"filename": "TS071", "description": "Previously served in the UK armed forces", "year": 2021},
    {"filename": "TS072", "description": "Number of people in household who have previously served in UK armed forces", "year": 2021},
    {"filename": "TS073", "description": "Population who have previously served in UK armed forces in communal establishments and in households", "year": 2021},
    {"filename": "TS074", "description": "Household Reference Person indicator of previous service in UK armed forces", "year": 2021},
    {"filename": "TS075", "description": "Multi religion households", "year": 2021},
    {"filename": "TS076", "description": "Welsh language skills (speaking) by single year of age", "year": 2021},
    {"filename": "TS077", "description": "Sexual orientation", "year": 2021},
    {"filename": "TS078", "description": "Gender identity", "year": 2021},
    {"filename": "TS079", "description": "Sexual orientation (detailed)", "year": 2021}
]

In [3]:
for d in data:
    d['file'] = f"https://www.nomisweb.co.uk/output/census/2021/census2021-{d['filename'].lower()}.zip"


In [4]:


def check_file_exists(url):
    try:
        response = requests.head(url)
        if response.status_code == 200:
            return True
        else:
            print(f"The file at {url} does not exist. Status code: {response.status_code}")
    except requests.RequestException as e:
        print(f"An error occurred: {e}")

In [5]:
def download_and_list_zip(url):
    # Create a temporary file
    file_list=[]
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        tmp_file_path = tmp_file.name

    try:
        # Download the ZIP file
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Write the content to the temporary file
        with open(tmp_file_path, 'wb') as f:
            f.write(response.content)

        # Check if the file is a valid ZIP file
        if zipfile.is_zipfile(tmp_file_path):
            # Open the ZIP file and list its contents
            with zipfile.ZipFile(tmp_file_path, 'r') as zip_ref:
                file_list = zip_ref.namelist()



    finally:
        # Delete the temporary file
        if os.path.exists(tmp_file_path):
            os.remove(tmp_file_path)
    return file_list




In [6]:
for d in data:
    if check_file_exists(d['file']):
        d['files'] = download_and_list_zip(d['file'])

In [7]:
def load_csv_from_zip(url, filename_in_zip):
    """
    Downloads a ZIP file and loads a specific CSV file into a pandas DataFrame

    Args:
        url: URL of the ZIP file
        filename_in_zip: Name of the CSV file inside the ZIP archive

    Returns:
        pandas DataFrame
    """
    # Download the ZIP file
    response = requests.get(url)
    response.raise_for_status()

    # Create a file-like object from the downloaded bytes
    zip_bytes = io.BytesIO(response.content)

    # Open the ZIP file and extract the specific file
    with zipfile.ZipFile(zip_bytes) as z:
        with z.open(filename_in_zip) as f:
            # Read the CSV directly into pandas
            return pd.read_csv(f)




In [8]:
for d in data:
    if len(d['files'])>0:
        df = load_csv_from_zip(d['file'],d['files'][0])
        column_names = df.columns.tolist()
        data_types = df.dtypes.tolist()
        d['data_types'] = []
        for i in range(len(column_names)):
            d['data_types'] .append({'name': column_names[i], 'type': str(data_types[i])})


In [9]:

# Specify the file path
file_path = 'testdata/nomis2021.json'

# Write the list of dictionaries to a JSON file
with open(file_path, 'w') as json_file:
    json.dump(data, json_file, indent=4)

In [10]:
print("done")

done


In [11]:
import ipywidgets as widgets
from IPython.display import display
from pprint import pprint


In [None]:
# Extract filenames
filenames = [item['filename'] for item in data]
descriptions = [item['description'] for item in data]
descriptions.sort()
# Create a dropdown widget
dropdown = widgets.Dropdown(
    options=descriptions,
    value=descriptions[0],
    description='Select File:',
    disabled=False,
)

# Create an output widget to display the selected dictionary
output = widgets.Output()

# Define a function to handle the dropdown selection
def on_selection_change(change):
    selected_filename = change['new']
    selected_dict = next(item for item in data if item['description'] == selected_filename)
    with output:
        output.clear_output()
        pprint(selected_dict)

# Attach the function to the dropdown's value change event
dropdown.observe(on_selection_change, names='value')

# Display the dropdown and output widgets
display(dropdown, output)


Dropdown(description='Select File:', options=('Accommodation type', 'Age by broad age bands', 'Age by five-yea…

Output()

In [None]:
constraints = [
    {"Constraint ": "Household Size",  "ID": "TS017"},
    {"Constraint ": "Household Composition",  "ID ": "TS003"},
    {"Constraint ": "Household Deprivation",  "ID": "TS011"},
    {"Constraint ": "Household Tenure", "ID": "TS054"},
    {"Constraint ": "Household Heating", "ID": "TS046"},
    {"Constraint ": "Household Number of Cars", "ID": "TS045"}
]

In [13]:
def load_csv_from_zip(url, filename_in_zip):
    """
    Downloads a ZIP file and loads a specific CSV file into a pandas DataFrame
    
    Args:
        url: URL of the ZIP file
        filename_in_zip: Name of the CSV file inside the ZIP archive
        
    Returns:
        pandas DataFrame
    """
    # Download the ZIP file
    response = requests.get(url)
    response.raise_for_status()
    
    # Create a file-like object from the downloaded bytes
    zip_bytes = io.BytesIO(response.content)
    
    # Open the ZIP file and extract the specific file
    with zipfile.ZipFile(zip_bytes) as z:
        with z.open(filename_in_zip) as f:
            # Read the CSV directly into pandas
            return pd.read_csv(f)


In [22]:
df = load_csv_from_zip('https://www.nomisweb.co.uk/output/census/2021/census2021-ts003.zip','census2021-ts003-lsoa.csv')

In [23]:
df.head()

Unnamed: 0,date,geography,geography code,Household composition: Total; measures: Value,Household composition: One person household; measures: Value,Household composition: One person household: Aged 66 years and over; measures: Value,Household composition: One person household: Other; measures: Value,Household composition: Single family household; measures: Value,Household composition: Single family household: All aged 66 years and over; measures: Value,Household composition: Single family household: Married or civil partnership couple; measures: Value,...,Household composition: Single family household: Cohabiting couple family: With dependent children; measures: Value,Household composition: Single family household: Cohabiting couple family: All children non-dependent; measures: Value,Household composition: Single family household: Lone parent family; measures: Value,Household composition: Single family household: Lone parent family: With dependent children; measures: Value,Household composition: Single family household: Lone parent family: All children non-dependent; measures: Value,Household composition: Single family household: Other single family household; measures: Value,Household composition: Single family household: Other single family household: Other family composition; measures: Value,Household composition: Other household types; measures: Value,Household composition: Other household types: With dependent children; measures: Value,"Household composition: Other household types: Other, including all full-time students and all aged 66 years and over; measures: Value"
0,2021,Hartlepool 001A,E01011954,965,266,103,163,671,69,260,...,89,13,188,135,53,2,2,28,13,15
1,2021,Hartlepool 001B,E01011969,599,157,92,65,431,116,189,...,36,7,56,33,23,0,0,11,7,4
2,2021,Hartlepool 001C,E01011970,488,122,65,57,349,83,174,...,23,3,39,25,14,3,3,17,9,8
3,2021,Hartlepool 001D,E01011971,521,104,32,72,406,42,235,...,30,5,61,37,24,0,0,11,6,5
4,2021,Hartlepool 001F,E01033465,741,151,42,109,566,46,317,...,63,4,73,59,14,4,4,24,9,15
