In [38]:
import sys, os
os.path.abspath('..') in sys.path or sys.path.append(os.path.abspath('..'))

True

In [39]:
import pandas as pd
import petl as etl
from utils.paths import OPEN_DATA, SITE

In [40]:
BRADFORD_DG = SITE / 'observatory/demographics/_data/'
BRADFORD_DG.mkdir(exist_ok=True, parents=True)

In [41]:
census = etl.fromdataframe(pd.read_parquet(OPEN_DATA / 'census.parquet'))

In [46]:
categories = etl.lookup(
    etl.fromcsv(OPEN_DATA / 'census_categories.csv').convertnumbers().sort('sortorder'),
    key='variable',
    value='category'
)

In [56]:
def prepare_data(variable, folder, ward_sorter=None):
    folder.mkdir(exist_ok=True, parents=True)
    data = (
        census
        .selecteq('variable', variable)
        .selecteq('measure', 'Percent')
        .cutout('variable', 'measure')
        .recast(variablefield='category')
        .cut('geography_code', 'geography_name', *categories.get(variable))
    )
    data.tocsv(folder / 'by_geography.csv')
    (
        data
        .select(lambda r: r.geography_code.startswith('E05'))
        .addfield('order', ward_sorter)
        .sort('order')
        .tocsv(folder / 'by_ward.csv')
    )

prepare_data('Age', BRADFORD_DG / 'age', ward_sorter=lambda r: -sum(r[f] for f in categories.get('Age')[:5]))
prepare_data('Sex', BRADFORD_DG / 'sex', ward_sorter=lambda r: r.Female)
prepare_data('Ethnic group', BRADFORD_DG / 'ethnic_group', ward_sorter=lambda r: -r['Asian, Asian British or Asian Welsh'])
prepare_data('General health', BRADFORD_DG / 'general_health', ward_sorter=lambda r: -sum(r[f] for f in categories.get('General health')[-2:]))