# Programme data

This notebook prepares data for the Programme theme page.

In [1]:
import json
from datetime import date

from utils.themes.programme import Programme
from utils.date import month_chunker
import petl as etl

In [2]:
from utils.paths import SITE

EVENTS = SITE / 'themes/programme/_data/events'
EVENTS.mkdir(exist_ok=True, parents=True)

Aggregation definition

In [3]:
event_aggregation = {
    'Records': ('row', set),
    'Events': ('Event Count', sum),
    'Date From': ('Start Date', min),
    'Date To': ('End Date', max),
}

Read events and break down into chunks broken by month boundaries. This deals with multi-day events which cover more than a single month.

As an example:

| Start -> End | Chunks |
|--------------|--------|
| 15 Jan -> 18 Jan | 1 chunk (15 Jan -> 18 Jan) |
| 15 Jan -> 18 Feb | 2 chunks (15 Jan -> 31 Jan, 1 Feb -> 18 Feb) |
| 15 Jan -> 18 Mar | 3 chunks (15 Jan -> 31 Jan, 1 Feb -> 28 Feb, 1 Mar -> 18 Mar) |

Each chunk is then rendered as a separate row in the table with the `rowmapmany` method.

In [4]:
def row_mapper(r):
    for d in r.Chunk:
        yield (*r, d)

chunked_events = (
    Programme.events
    .addrownumbers()
    .addfield('Chunk', lambda r: list(month_chunker(r['Start Date'], r['End Date'])))
    .rowmapmany(row_mapper, header=('row', *Programme.events.fieldnames(), 'Chunk', 'chunk'))
    .cutout('Start Date', 'End Date', 'Chunk')
    .unpack('chunk', newfields=['Start Date', 'End Date'])
    .selectle('Start Date', date.today())
    .convert('End Date', lambda f: date.today(), where=lambda r: r['End Date'] > date.today())
    .convert('Event Count', lambda _, r: (r['End Date'] - r['Start Date']).days + 1, pass_row=True)
    .convert('Month', lambda _, r: r['Start Date'].replace(day=1), pass_row=True)
)

Create an aggregate by month of the chunked events

In [5]:
(
    chunked_events
    .aggregate('Month', {
        'Events': ('Event Count', sum),
        'Records': (len),
    })
    .convert('Month', lambda f: f.isoformat())
    .tocsv(EVENTS / 'total_by_month.csv')
)

Aggregate by Project and by Month, and convert months to columns

In [6]:
(
    chunked_events
    .aggregate(['Project Name', 'Month'], sum, 'Event Count')
    .recast(key='Project Name', variablefield='Month', missing=0)
    .tocsv(EVENTS / 'monthly_by_project.csv')
)

Aggregate by Project and by Month, and convert projects to columns

In [7]:
(
    chunked_events
    .aggregate(['Project Name', 'Month'], sum, 'Event Count')
    .recast(key='Month', variablefield='Project Name', missing=0)
    .tocsv(EVENTS / 'monthly_breakdown.csv')
)

Extract event reports and aggregate

In [8]:
event_reports = (
    Programme.event_reports
    .aggregate(
        'project_id',
        {
            'reports': (len),
            'audience': ('audience', sum)
        }
    )
    .addfield('event_reports', lambda r: { 'reports': r.reports, 'audience': r.audience }).cutout('reports', 'audience')
)

Create a project breakdown

In [9]:
matched_event_reports = (
    chunked_events
    .aggregate(['project_id', 'Project Name', 'Programme Category', 'Evaluation Category'], event_aggregation)
    .convert('Records', len)
    .leftjoin(event_reports)
)

orphan_event_reports = (
    event_reports
    .antijoin(chunked_events)
    .leftjoin(Programme.projects.cut('id', 'Project Name', 'Greenlight Status', 'Project Phase', 'Programme Category', 'Evaluation Category'), lkey='project_id', rkey='id')
)

all_project_events_and_reports = etl.cat(
    matched_event_reports,
    orphan_event_reports
    
).sort('Project Name')


In [10]:
with open(EVENTS / 'by_project.json', 'w') as f:
    json.dump(
        dict(
            all_project_events_and_reports
            .addfield('Details', lambda r: {
                'records': r.Records,
                'events': r.Events,
                'eventReports': r.event_reports,
                'evaluationCategory': r['Evaluation Category'],
                'programmeCategory': r['Programme Category'],
                'earliestDate': r['Date From'].isoformat() if r['Date From'] else None,
                'latestDate': r['Date To'].isoformat() if r['Date To'] else None,
            })
            .cut('Project Name', 'Details')
            .records()
        ),
        f,
        indent=2,
    )

Create a summary file

In [11]:
with open(EVENTS / 'summary.json', 'w') as f:
    json.dump(
        {
            'total': sum(chunked_events.values('Event Count')),
            'excluded': dict(Programme.excluded_events.aggregate('Validation', len).records()),
            'date': {
                'earliest': min(chunked_events.values('Start Date')).isoformat(),
                'latest': max(chunked_events.values('End Date')).isoformat(),
            }
        },
        f,
        indent=2,
    )