# Dashboard

This pipeline processes data into a form to drive the dashboard.

The parameters are injected using papermill.

In [1]:
target: str = ''
start: str = '2025-01-01'
end: str = '2025-12-31'
fixed_date: str = 'false'

## Setup

Import some libraries

In [2]:
import json
from datetime import date
from ast import literal_eval

import petl as etl
from utils.paths import PUBLISHED, SITE

Set the target directory, and ensure it exists

In [3]:
TARGET = SITE / 'insights/dashboard' / target / '_data'
TARGET.mkdir(exist_ok=True, parents=True)

Calculate the date range. If the `fixed_date` parameter is `false`, make today the latest date.

In [None]:
date_range = (
    date.fromisoformat(start),
    min(date.today(), date.fromisoformat(end)) if fixed_date == 'false' else date.fromisoformat(end),
    date.today(),
)

Write the timestamp file

In [None]:
with open(TARGET / 'timestamp.json', 'w') as f:
    json.dump(dict(zip(['start', 'end', 'updated'], (str(x) for x in date_range))), f)

## Projects data

In [6]:
projects = (
    etl
    .fromjson(PUBLISHED / 'programme/projects.json')
    .convert(['Date From', 'Date To'], etl.dateparser('%Y-%m-%d'))
    .convert('Project Name', lambda x: x.strip())
    .selectge('Date To', date_range[0])
    .selectle('Date From', date_range[1])
)

In [7]:
def category_exploder(row):
    for v in row['Producing model']:
        yield [v]

producing_model = etl.cat(
    projects
    .rowmapmany(category_exploder, header=['Producing model'])
    .aggregate('Producing model', len),

    projects
    .selecteq('Producing model', [])
    .aggregate(None, len)
    .addfield('Producing model', 'UNKNOWN')
)

In [8]:
project_summary = {
    'count': projects.nrows(),
    'producing_model': dict(producing_model.records()),
    'names': list(projects.cut('Project Name').distinct().values('Project Name')),
}

In [9]:
events = (
    etl
    .fromcsv(PUBLISHED / 'programme/events.csv')
    .convert(['Start Date', 'End Date'], etl.dateparser('%Y-%m-%d'))
    .convert('Programme Category', lambda f: set(literal_eval(f)))
    .convert('Project Name', lambda x: x.strip())
    .replaceall('', None)
    .selectge('End Date', date_range[0])
    .selectle('Start Date', date_range[1])
    .selectin('Item Type', ['Event (any public-facing activity)'])
    .selectnotnone('Project Name')
    .cut('Start Date', 'End Date', 'Project Name', 'Programme Category')
    .sort(['End Date', 'Start Date'])
)

In [10]:
events

Start Date,End Date,Project Name,Programme Category
2025-01-10,2025-01-10,Nationhood: Memory and Hope (Aida Muluneh Photography exhibition),"{'Visual Arts and Sculpture', 'Performance - other'}"
2025-01-10,2025-01-10,Channels (Bodies of Water) - Turbynes,"{'Visual Arts and Sculpture', 'Music', 'Heritage', 'Digital'}"
2025-01-10,2025-01-11,Rise (AKA - Opening Event),"{'Outdoor arts', 'Festival', 'Theatre', 'Dance / Movement / Circus', 'Exhibition', 'International', 'Broadcast'}"
2025-01-11,2025-01-11,Nationhood: Memory and Hope (Aida Muluneh Photography exhibition),"{'Visual Arts and Sculpture', 'Performance - other'}"
2025-01-11,2025-01-11,Rise (AKA - Opening Event),"{'Outdoor arts', 'Festival', 'Theatre', 'Dance / Movement / Circus', 'Exhibition', 'International', 'Broadcast'}"


In [11]:
def category_exploder(row):
    for c in row['Programme Category']:
        yield (row['Project Name'], c)

events_summary = {
    'count': events.nrows(),
    'projects': dict(events.aggregate('Project Name', len).records()),
    'categories': dict(events.cut('Project Name', 'Programme Category').rowmapmany(category_exploder, header=['project', 'category']).aggregate(['category'], len).records())
}

In [12]:
with open(TARGET / 'events.json', 'w') as f:
    json.dump(
        {
            'projects': project_summary,
            'events': events_summary,
        },
        f,
        indent=2
    )

## Tickets

In [13]:
ticketed_events = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/event-instances.csv')
    .convert('start', etl.dateparser('%Y-%m-%d %H:%M:%S'))
    .selectrangeopen('start', date_range[0], date_range[1])
    .selectcontains('eventType', 'MainProgrammeEvent')
)

In [14]:
tickets_sold = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/tickets.csv')
    .selecteq('geography_type', 'oslaua')
    .rightjoin(ticketed_events.cut('instance_id', 'event_name'))
    .cut('event_name', 'geography_code', 'count_of_tickets')
    .convertnumbers()
    .selectnotnone('count_of_tickets')
)

In [15]:
ticket_summary = {
    'events': {
        'names': list(ticketed_events.cut('event_name').distinct().values('event_name')),
        'count': ticketed_events.nrows(),
    },
    'sold': {
        'total': sum(tickets_sold.values('count_of_tickets')),
        'bradford': sum(tickets_sold.selecteq('geography_code', 'E08000032').values('count_of_tickets')),
    }
}

In [16]:
with open(TARGET / 'tickets.json', 'w') as f:
    json.dump(
        ticket_summary,
        f,
        indent=2
    )

## Volunteers

In [17]:
signups = (
    etl
    .fromcsv(PUBLISHED / 'volunteers/checkpoints.csv')
    .convert('date', etl.dateparser('%Y-%m-%d'))
    .selectle('date', date_range[1])
    .convertnumbers(strict=False)
)

In [18]:
shifts = (
    etl
    .fromcsv(PUBLISHED / 'volunteers/shifts.csv')
    .convert('date', etl.dateparser('%Y-%m-%d'))
    .selecteq('type', 'BD25 Event')
    .selectrangeopen('date', date_range[0], date_range[1])
    .convertnumbers(strict=False)
)

In [19]:
volunteer_summary = {
    'events': {
        'count': shifts.cut('rosterfy_event_name').distinct().nrows(),
        'names': list(shifts.cut('rosterfy_event_name').distinct().values('rosterfy_event_name')),
    },
    'shifts': {
        'count': shifts.nrows(),
        'attended': sum(shifts.values('attended')),
        'hours': sum(shifts.values('hours')),
    },
    'people': {
        'checkpoints': dict(signups.aggregate('checkpoint', sum, 'count').records()),
    },
}

In [20]:
with open(TARGET / 'volunteers.json', 'w') as f:
    json.dump(
        volunteer_summary,
        f,
        indent=2
    )