In [1]:
import petl as etl
from pipeline_utils.datestamp import save_datestamp
from pipeline_utils.filesystem.paths import DATA, RAW_DATA, REF_DATA, SITE
from pipeline_utils.reference.onspd.functions import normalise_postcode

Setup the output directory

In [2]:
OUT = SITE / 'data/360-giving/_data/'
OUT.mkdir(exist_ok=True, parents=True)

## Load reference tables

ONS Postcode database

In [3]:
onspd = (
    etl.fromcsv(DATA / 'reference/onspd_extract.csv')
)

Load the list of funders

In [4]:
funders = (
    etl
    .fromcsv(SITE / 'data/360-giving/funders.csv')
    .convert(['Culture Programme', 'COVID'], bool)
    .cache()
)

In [5]:
recipients = etl.lookupone(
    etl
    .fromcsv(DATA / 'reference/360-giving/recipients.csv')
    .replace('Canonical', '', None).selectnotnone('Canonical'),
    'Recipient Org:Name',
    'Canonical'
)

## Load data

Load the raw data downloaded from 360 Giving via the `get-data.py` script.

In [6]:
data=(
    etl
    .fromcsv(RAW_DATA / '360-giving.csv')
    .cut(
        'Title',
        'Amount Awarded',
        'Currency',
        'Award Date',

        'Recipient Org:Identifier',
        'Recipient Org:Name',
        'Recipient Org:Charity Number',
        'Recipient Org:Company Number',
        'Recipient Org:Postal Code',

        'Funding Org:Identifier',
        'Funding Org:Name',
        'Funding Org:Postal Code',

        'Grant Programme:Code',
        'Grant Programme:Title',

        'License',
    )
    .convert('Award Date', etl.dateparser('%Y-%m-%d'))
    .convert('Recipient Org:Postal Code', normalise_postcode)
    .convert('Recipient Org:Name', lambda name: recipients.get(name, name))
    .convertnumbers()
    .leftjoin(onspd, lkey='Recipient Org:Postal Code', rkey='pcds')
    .select('oslaua', lambda x: x is None or x == 'E08000021')
    .cache()
)

## Calculate Grants per recipient

In [7]:
links = (
    data
    .cut('Funding Org:Name', 'Grant Programme:Title', 'Recipient Org:Name')
    .rename({
        'Funding Org:Name': 'funder',
        'Grant Programme:Title': 'grant_programme',
        'Recipient Org:Name': 'recipient',
    })
    .convertall('upper')
)

In [8]:
(
    links
    .aggregate(
        key="recipient",
        aggregation={
            "total_grants": len,
        },
    )
    .sort('total_grants', reverse=True)
).tocsv(DATA / 'grants_by_recipient.csv')

In [9]:
links.aggregate(
    key=("funder", "grant_programme"),
    aggregation={
        "total_grants": len,
    },
).sort(('funder', 'grant_programme')).tocsv(DATA / 'grants_by_funder_and_grant_programme.csv')

## Calculate fund values

Check if there are any new funds

In [10]:
new_funds = (
    data
    .cut('Funding Org:Name', 'Grant Programme:Title')
    .distinct()
    .antijoin(funders)
    .sort(['Funding Org:Name', 'Grant Programme:Title'])
)
new_funds.displayall()
new_funds.tocsv(REF_DATA / '360-giving/new-funders.csv')

Funding Org:Name,Grant Programme:Title


Check if there are any funds no longer in the list

In [11]:
funders.antijoin(data).displayall()

Funding Org:Name,Grant Programme:Title,Culture Programme,COVID


Get awards and COVID awards as two tables

In [12]:
awards, covid_awards = (
    data
    .join(funders, ['Funding Org:Name', 'Grant Programme:Title'])
    .selecttrue('Culture Programme')
    .biselect('COVID', lambda x: x is False)
)

Aggregate by Funding Organisation and Grant Programme

In [13]:
(
    awards
    .cut('Amount Awarded', 'Funding Org:Name', 'Grant Programme:Title', 'Award Date')
    .selectnotnone('Funding Org:Name')
    .aggregate(
        [   
            'Funding Org:Name',
            'Grant Programme:Title'
        ],
        {
            'Grants': len,
            'Funding': ('Amount Awarded', sum),
            'Earliest Award Date': ('Award Date', min),
            'Latest Award Date': ('Award Date', max),
        }
    )
    .addfield('Average Grant', lambda r: r.Funding / r.Grants)
    .convert(('Funding', 'Average Grant'), round)
    .sort('Grants', reverse=True)
    .tocsv(OUT / 'by_programme.csv')
)

Aggregate by award date

In [14]:
(
    awards
    .cut('Award Date', 'Amount Awarded')
    .convertnumbers()
    .convert('Award Date', lambda d: d.replace(day=1))
    .aggregate(
        'Award Date',
        {
            'Number': len,
            'Amount Awarded': ('Amount Awarded', sum),
        }
    )
    .convert('Amount Awarded', round)
).tocsv(OUT / 'summary_by_month.csv')

Aggregate by Recipients

In [15]:
(
    awards
    .cut('Recipient Org:Name', 'Funding Org:Name', 'Grant Programme:Title', 'Amount Awarded')
    .aggregate(
        'Recipient Org:Name',
        {
            'Number': len,
            'Amount Awarded': ('Amount Awarded', sum),
        }
    )
).tocsv(OUT / 'by_recipient.csv')

Save datestamp for processing

In [16]:
save_datestamp(OUT / 'metadata/processed.yml')