# Dashboard

This pipeline processes data into a form to drive the dashboard.

The parameters are injected using papermill.

In [1]:
target: str = 'q1'
start: str = '2025-01-01'
end: str = '2025-03-31'
fixed_date: str = 'true'

## Setup

Import some libraries

In [2]:
import json
from datetime import date
from ast import literal_eval

import petl as etl
from utils.paths import PUBLISHED, SITE
from utils.themes.programme import Programme

Set the target directory, and ensure it exists

In [3]:
TARGET = SITE / 'insights/dashboard' / target / '_data'
TARGET.mkdir(exist_ok=True, parents=True)

Calculate the date range. If the `fixed_date` parameter is `false`, make today the latest date.

In [4]:
date_range = (
    date.fromisoformat(start),
    min(date.today(), date.fromisoformat(end)) if fixed_date == 'false' else date.fromisoformat(end),
    date.today(),
)

Write the timestamp file

In [5]:
with open(TARGET / 'timestamp.json', 'w') as f:
    json.dump(dict(zip(['start', 'end', 'updated'], (str(x) for x in date_range))), f)

## Events & projects data

In [6]:
raw_airtable_events = (
    Programme.events

    .selectge('End Date', date_range[0])
    .selectle('Start Date', date_range[1])
    .selectin('Item Type', ['Event (any public-facing activity)'])

    # .selectnotnone('Project Name')
    # .selectcontains('Producing model', 'Artist Led Awards', complement=True)

    .convert('Start Date', lambda d: max(d, date_range[0]))
    .convert('End Date', lambda d: min(d, date_range[1]))
    .addfield('Events', lambda r: max(1, (r['End Date'] - r['Start Date']).days + 1))
)

airtable_events = (
    raw_airtable_events
    .aggregate(['Project Name', 'project_id', 'Evaluation Category'], sum, 'Events')
)

In [7]:
manual_events = (
    etl
    .fromcsv(PUBLISHED / 'manual/manual-events.csv')
    .rename({
        'Project': 'Project Name',
        'Airtable project ID': 'project_id',
        'Evaluation category': 'Evaluation Category',
    })

    .selectne('Exclude from events count', 'True')
    .convert('Date', lambda d: date.fromisoformat(d))
    .selectge('Date', date_range[0])
    .selectle('Date', date_range[1])

    .convert('Project Name', lambda x: x.strip())
    .convert('Project Name', Programme.canonical_project_name)

    .update('Events', 1, where=lambda r: not r.Events)
    .convert(['Events'], int)

    .aggregate(['Project Name', 'project_id', 'Evaluation Category'], sum, 'Events')
)

In [8]:
events = (
    etl.cat(
        airtable_events.selectnotin('project_id', [i for i in manual_events.values('project_id') if i != '']),
        manual_events
    )
    .cutout('project_id')
    .aggregate(['Project Name', 'Evaluation Category'], sum, 'value', field='Events')
)

In [9]:
project_summary = {
    'total': events.distinct('Project Name').nrows(),
    'by_category': dict(events.aggregate('Evaluation Category', len).records()),
    'names': list(events.cut('Project Name').distinct().values('Project Name')),
}

In [10]:
events_summary = {
    'total': sum(events.values('Events')),
    'by_category': dict(events.aggregate('Evaluation Category', sum, 'Events').records()),
    'by_project': {
        r[0]: {'category': r[1], 'count': r[2]}
        for r
        in events.records()
    },
}

In [11]:
with open(TARGET / 'events.json', 'w') as f:
    json.dump(
        {
            'projects': project_summary,
            'events': events_summary,
        },
        f,
        indent=2
    )

## Audiences

In [12]:
ticketed_events = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/event-instances.csv')
    .convert('start', etl.dateparser('%Y-%m-%d %H:%M:%S'))
    .selectrangeopen('start', date_range[0], date_range[1])
    .selectcontains('eventType', 'MainProgrammeEvent')
)

In [13]:
tickets_sold = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/tickets.csv')
    .selecteq('geography_type', 'oslaua')
    .rightjoin(ticketed_events.cut('instance_id', 'event_name'))
    .cut('event_name', 'geography_code', 'count_of_tickets')
    .convertnumbers()
    .selectnotnone('count_of_tickets')
)

In [14]:
tickets_scanned = dict(
    etl
    .fromcsv(PUBLISHED / 'manual/spektrix-sales-and-scans.csv')
    .convert('Event Date', etl.dateparser('%Y-%m-%d'))
    .selectrangeopen('Event Date', date_range[0], date_range[1])
    .convertnumbers()
    .aggregate(None, {
        'sold': ('Sold', sum),
        'scanned': ('Scanned', sum)
    })
    .melt(variables=['sold', 'scanned'])
    .records()
)

In [15]:
ticketed_audience = {
    'sold': {
        'total': sum(tickets_sold.values('count_of_tickets')),
        'bradford': sum(tickets_sold.selecteq('geography_code', 'E08000032').values('count_of_tickets')),
    },
    'scanned': {
        'total': tickets_scanned
    }
}

In [16]:
manual_audience = (
    etl
    .fromcsv(PUBLISHED / 'manual/manual-events.csv')
    .rename({
        'Project': 'Project Name',
        'Airtable project ID': 'project_id',
        'Evaluation category': 'Evaluation Category',
    })

    .convert('Audience', int)
    .convert('Date', lambda d: date.fromisoformat(d))

    .selectnotnone('Audience')
    .selectge('Date', date_range[0])
    .selectle('Date', date_range[1])

    .aggregate('Evaluation Category', sum, 'Audience', field='Audience')
)

In [17]:
unticketed_audience = {
    'total': sum(manual_audience.values('Audience')),
    'by_category': dict(manual_audience.records())
}

In [18]:
with open(TARGET / 'audience.json', 'w') as f:
    json.dump(
        {
            'total': ticketed_audience['sold']['total'] + unticketed_audience['total'],
            'ticketed': ticketed_audience,
            'unticketed': unticketed_audience,
        },
        f,
        indent=2
    )

## Participants

In [19]:
manual_participants = (
    etl
    .fromcsv(PUBLISHED / 'manual/manual-participants.csv')

    .convert('Date', lambda d: date.fromisoformat(d))
    .selectge('Date', date_range[0])
    .selectle('Date', date_range[1])
    .selectnotin('variable', ['Schools engaged'])
    .convertnumbers()
)

In [20]:
schools_engaged = sum(
    etl
    .fromcsv(PUBLISHED / 'manual/manual-participants.csv')

    .convert('Date', lambda d: date.fromisoformat(d))
    .selectge('Date', date_range[0])
    .selectle('Date', date_range[1])
    .selectin('variable', ['Schools engaged'])
    .convertnumbers()
    .values('value')
)

In [21]:
participant_summary = {
    'total': sum(manual_participants.values('value')),
    'by_partipant_type': dict(manual_participants.aggregate('variable', sum, 'value').records()),
    # TODO make this nested by variable
    'by_partipant_type_and_project': [i for i in manual_participants.aggregate(['variable', 'Project'], sum, 'value').dicts()],
    'creative_learning': {
        'schools_engaged': schools_engaged
    }
}

## Volunteers

In [22]:
signups = (
    etl
    .fromcsv(PUBLISHED / 'volunteers/checkpoints.csv')
    .convert('date', etl.dateparser('%Y-%m-%d'))
    .selectle('date', date_range[1])
    .convertnumbers(strict=False)
)

In [23]:
shifts = (
    etl
    .fromcsv(PUBLISHED / 'volunteers/shifts.csv')
    .convert('date', etl.dateparser('%Y-%m-%d'))
    .selecteq('type', 'BD25 Event')
    .selectrangeopen('date', date_range[0], date_range[1])
    .convertnumbers(strict=False)
)

In [24]:
volunteer_summary = {
    'events': {
        'count': shifts.cut('rosterfy_event_name').distinct().nrows(),
        'names': list(shifts.cut('rosterfy_event_name').distinct().values('rosterfy_event_name')),
    },
    'shifts': {
        'count': shifts.nrows(),
        'attended': sum(shifts.values('attended')),
        'hours': sum(shifts.values('hours')),
    },
    'people': {
        'checkpoints': dict(signups.aggregate('checkpoint', sum, 'count').records()),
    },
}

In [25]:
with open(TARGET / 'participants.json', 'w') as f:
    json.dump(
        {
            'total': participant_summary['total'] + volunteer_summary['people']['checkpoints']['1. Monitoring & Evaluation'],
            'participants': participant_summary,
            'volunteers': volunteer_summary,
        },
        f,
        indent=2
    )