In [1]:
import json

import petl as etl
from pipeline_utils.filesystem.paths import DATA, RAW_DATA, REF_DATA, SITE

In [2]:
OUT = SITE / 'data/360-giving/_data/'
OUT.mkdir(exist_ok=True, parents=True)

In [3]:
data=(
    etl
    .fromcsv(RAW_DATA / '360-giving.csv')
    .cut(
        'Title',
        'Amount Awarded',
        'Currency',
        'Award Date',

        'Recipient Org:Identifier',
        'Recipient Org:Name',
        'Recipient Org:Charity Number',
        'Recipient Org:Company Number',
        'Recipient Org:Postal Code',

        'Funding Org:Identifier',
        'Funding Org:Name',
        'Funding Org:Postal Code',

        'Grant Programme:Code',
        'Grant Programme:Title',

        'License (see note)',
    )
    .convert('Award Date', etl.dateparser('%Y-%m-%d'))
    .cache()    
)

In [4]:
links = data.cut('Funding Org:Name', 'Grant Programme:Title', 'Recipient Org:Name').rename(
    {
        'Funding Org:Name': 'funder',
        'Grant Programme:Title': 'grant_programme',
        'Recipient Org:Name': 'recipient',
    }
).convertall('upper')

In [5]:
links.aggregate(
    key="recipient",
    aggregation={
        "total_grants": len,
    },
).sort('total_grants', reverse=True).tocsv(DATA / 'grants_by_recipient.csv')

In [6]:
links.aggregate(
    key=("funder", "grant_programme"),
    aggregation={
        "total_grants": len,
    },
).sort(('funder', 'grant_programme')).tocsv(DATA / 'grants_by_funder_and_grant_programme.csv')

In [7]:
included_funders = list(etl.fromtext(REF_DATA / '360-giving/funders.txt', header=('funder',)).values('funder'))
included_programmes = list(etl.fromtext(REF_DATA / '360-giving/programmes.txt', header=('programme',)).values('programme'))
excluded_programmes = list(etl.fromtext(REF_DATA / '360-giving/programmes-exclude.txt', header=('programme',)).values('programme'))

In [8]:
def limit_to_included(row):
    return ((
        row['Funding Org:Name'].upper() in included_funders
    ) or (
        row['Grant Programme:Title'].upper() in included_programmes
    )) and (
        row['Grant Programme:Title'].upper() not in excluded_programmes
    )

In [9]:
(
    data
        .select(limit_to_included)
        .cut('Amount Awarded', 'Funding Org:Name', 'Grant Programme:Title', 'Award Date')
        .convertnumbers()
        .aggregate(
            ('Funding Org:Name', 'Grant Programme:Title'),
            {
                'Grants': len,
                'Funding': ('Amount Awarded', sum),
                'Earliest Award Date': ('Award Date', min),
                'Latest Award Date': ('Award Date', max),
            }
        )
        .addfield('Average Grant', lambda r: r.Funding / r.Grants)
        .convert(('Funding', 'Average Grant'), round)
        .sort('Grants', reverse=True)
).tocsv(OUT / 'by_programme.csv')

In [10]:
data.select(
    limit_to_included
).cut(
    'Award Date', 'Amount Awarded'
).convertnumbers().convert(
    'Award Date', lambda d: d.replace(day=1)
).aggregate(
    'Award Date',
    {
        'Number': len,
        'Amount Awarded': ('Amount Awarded', sum),
    }
).convert(
    'Amount Awarded', round
).tocsv(
    OUT / 'summary_by_month.csv'
)

In [11]:
with open(OUT / 'inclusions.json', 'w') as f:
    json.dump({
        'funders': included_funders,
        'programmes': included_programmes,
        'excluded_programmes': excluded_programmes,
    }, fp=f, indent=2)