In [243]:
from pathlib import Path
import petl as etl

Set up some directories

In [245]:
OUTDIR = Path('../src/_data/arts_council_england')
OUTDIR.mkdir(exist_ok=True, parents=True)

## Project Grants data

In [246]:
PGDATA = OUTDIR / 'project_grants'
PGDATA.mkdir(exist_ok=True, parents=True)

In [247]:
date_parser = etl.dateparser('%Y-%m-%d 00:00:00', strict=True)

In [248]:
local_authorities = [
    {
        'LAD24CD': 'E06000047', 'LAD24NM': 'County Durham'
    }, {
        'LAD24CD': 'E06000057', 'LAD24NM': 'Northumberland'
    }, {
        'LAD24CD': 'E08000021', 'LAD24NM': 'Newcastle upon Tyne'
    }, {
        'LAD24CD': 'E08000022', 'LAD24NM': 'North Tyneside'
    }, {
        'LAD24CD': 'E08000023', 'LAD24NM': 'South Tyneside'
    }, {
        'LAD24CD': 'E08000024', 'LAD24NM': 'Sunderland'
    }, {
        'LAD24CD': 'E08000037', 'LAD24NM': 'Gateshead'
    },
]

In [249]:
code_lookup = etl.lookupone(etl.fromdicts(local_authorities), 'LAD24NM', 'LAD24CD')

In [271]:
grants_data = (
    etl.fromcsv('../raw/arts-council-project-grants.csv')
        .selectin('Local authority', [l['LAD24NM'] for l in local_authorities])
        .convert('Award date', date_parser)
        .convert('Award amount', float)
        .addfield('LAD24CD', lambda r: code_lookup[r['Local authority']] )
)

Grants by Recipient / Local authority

In [272]:
grants_data.aggregate(
    aggregation=len,
    key=('Recipient', 'LAD24CD'),
    field='Grants'
).selecteq(
    'LAD24CD', 'E08000021'
).tocsv(
    PGDATA / 'grants_by_recipient.csv'
)

In [282]:
grants_data.cut(
    'Award date', 'Decision quarter', 'LAD24CD', 'Award amount', 'Main discipline'
).addfield(
    'Year', lambda x: x['Award date'].year
).sort(
    'Award date'
).aggregate(
    ('Year', 'LAD24CD'),
    sum, 'Award amount',
).pivot(
    'LAD24CD', 'Year', 'value', sum
).tocsv(
    PGDATA / 'grants_value_by_lad_by_year.csv'
)

In [283]:
grants_data.cut(
    'Award date', 'Decision quarter', 'LAD24CD', 'Award amount', 'Main discipline'
).addfield(
    'Year', lambda x: x['Award date'].year
).sort(
    'Award date'
).aggregate(
    ('Year', 'LAD24CD'),
    len,
).pivot(
    'LAD24CD', 'Year', 'value', sum
).tocsv(
    PGDATA / 'grants_count_by_lad_by_year.csv'
)