# Dashboard

This pipeline processes data into a form to drive the dashboard.

The parameters are injected using papermill.

In [1]:
DEBUG = 'live'
if DEBUG:
    import yaml
    with open('./params.yaml') as f:
        config = yaml.load(f, yaml.SafeLoader)['dashboards'][DEBUG]

    target = config['target']
    start = config['start']
    end = config['end']
    fixed_date = config['fixed_date']

In [2]:
target: str
start: str
end: str
fixed_date: str

## Setup

Import some libraries

In [3]:
import json
from datetime import date
from ast import literal_eval

import petl as etl
from utils.paths import PUBLISHED, SITE
from utils.themes.programme_slice import ProgrammeSlice

Set the target directory, and ensure it exists

In [4]:
TARGET = SITE / 'insights/dashboard' / target / '_data'
TARGET.mkdir(exist_ok=True, parents=True)

Calculate the date range. If the `fixed_date` parameter is `false`, make today the latest date.

In [5]:
date_range = (
    date.fromisoformat(start),
    min(date.today(), date.fromisoformat(end)) if fixed_date == 'false' else date.fromisoformat(end),
    date.today(),
)

Write the timestamp file

In [6]:
with open(TARGET / 'timestamp.json', 'w') as f:
    json.dump(dict(zip(['start', 'end', 'updated'], (str(x) for x in date_range))), f)

Load programme data

In [7]:
programme_data = ProgrammeSlice(range=date_range[0:2])

## Events & projects data

In [8]:
events = (
    programme_data.project_breakdown
    .aggregate(['project_name', 'evaluation_category'], sum, 'events', field='events')
)

In [9]:
project_summary = {
    'total': events.distinct('project_name').nrows(),
    'by_category': dict(events.aggregate('evaluation_category', len).records()),
    'names': list(events.cut('project_name').distinct().values('project_name')),
}

In [10]:
events_summary = {
    'total': sum(events.values('events')),
    'by_category': dict(events.aggregate('evaluation_category', sum, 'events').records()),
    'by_project': {
        r[0]: {'category': r[1], 'count': r[2]}
        for r
        in events.records()
    },
}

In [11]:
events_summary

{'total': 3873,
 'by_category': {'': 139,
  'Community-led arts event': 53,
  'Digital': 1,
  'Exhibition': 759,
  'Festival': 50,
  'Film': 711,
  'In-person': 1746,
  'Training and skills development': 414},
 'by_project': {'A Good Yarn: Luke Jerram': {'category': 'In-person',
   'count': 34},
  'A Portrait of Us - Outdoor Exhibition': {'category': 'In-person',
   'count': 2},
  'A Screen Near You - Cinema for All (grassroots development and screening)': {'category': 'In-person',
   'count': 8},
  'A Screen Near You - The Incredible Moving Cinema': {'category': 'In-person',
   'count': 9},
  'ARCHIVE: How Soon Is Now (grant dependent)': {'category': 'In-person',
   'count': 2},
  'Akram Khan and Dance United - Memories of the Future': {'category': 'In-person',
   'count': 7},
  'An Evening with Resi/Dance @ The Beacon (Dance United)': {'category': 'In-person',
   'count': 1},
  'An Evening with: Jacqui Dankworth': {'category': 'In-person', 'count': 1},
  'Aswad Arts Presents StreetSo

In [12]:
with open(TARGET / 'events.json', 'w') as f:
    json.dump(
        events_summary,
        f,
        indent=2
    )

with open(TARGET / 'projects.json', 'w') as f:
    json.dump(
        project_summary,
        f,
        indent=2
    )

## Audiences

In [13]:
audience_data = (
    programme_data.project_breakdown
    .aggregate(['project_name', 'evaluation_category'], sum, 'audience', field='audience')
    .replace('audience', 0, None)
)

In [14]:
project_to_spektrix_lookup = etl.fromcsv(PUBLISHED / 'lookups/project_to_spektrix_event.csv')

In [15]:
tickets_sold = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/tickets.csv')
    .selecteq('geography_type', 'oslaua')
    .rightjoin(
        etl
        .fromcsv(PUBLISHED / 'ticketing/event-instances.csv')
        .convert('start', etl.dateparser('%Y-%m-%d %H:%M:%S'))
        .selectrangeopen('start', date_range[0], date_range[1])
        .selectcontains('eventType', 'MainProgrammeEvent')
        .cut('instance_id', 'event_name', 'event_id')
    )
    .cut('event_name', 'event_id', 'geography_code', 'count_of_tickets')
    .convertnumbers()
    .selectnotnone('count_of_tickets')
    .aggregate(['event_name', 'event_id', 'geography_code'], sum, 'count_of_tickets', field='count_of_tickets')

    .leftjoin(project_to_spektrix_lookup.cutout('event_name'), lkey='event_id', rkey='event_id')

    .convert('event_name', lambda _, r: r.project_name, where=lambda r: r.project_name is not None, pass_row=True)
)

In [16]:
tickets_scanned = dict(
    etl
    .fromcsv(PUBLISHED / 'manual/spektrix-sales-and-scans.csv')
    .convert('Event Date', etl.dateparser('%Y-%m-%d'))
    .selectrangeopen('Event Date', date_range[0], date_range[1])
    .convertnumbers()
    .aggregate(None, {
        'sold': ('Sold', sum),
        'scanned': ('Scanned', sum)
    })
    .melt(variables=['sold', 'scanned'])
    .records()
)

In [17]:
ticketed_audience = {
    'sold': {
        'total': sum(tickets_sold.values('count_of_tickets')),
        'bradford': sum(tickets_sold.selecteq('geography_code', 'E08000032').values('count_of_tickets')),
        'by_project': dict(tickets_sold.aggregate('event_name', sum, 'count_of_tickets', field='count_of_tickets').records()),
        'by_geo': dict(tickets_sold.aggregate('geography_code', sum, 'count_of_tickets', field='count_of_tickets').records()),
    },
    'scanned': {
        'total': tickets_scanned
    }
}

Try to get audience for tickets that aren't matched

In [18]:
audience_from_spektrix = tickets_sold.selectnone('project_name').aggregate('event_name', sum, 'count_of_tickets').addfield('evaluation_category', 'UNKNOWN').rename({
    'event_name': 'project_name',
    'value': 'audience'
})

In [19]:
audience_plus_ticket_data = (
    audience_data
    .leftjoin(tickets_sold.aggregate(['project_name'], sum, 'count_of_tickets', field='ticketed_audience'))
    .fillleft()
    .selectnotnone('audience')
)

In [20]:
all_audience = etl.cat(
    audience_plus_ticket_data,
    audience_from_spektrix,
)

In [21]:
audience = {
    'total': sum(all_audience.values('audience')),
    'by_category': dict(
        all_audience
        .aggregate(['evaluation_category'], sum, 'audience')
        .records()
    ),
    'by_project': {
        r[0]: {'category': r[1], 'count': r[2]}
        for r
        in all_audience
        .aggregate(['project_name', 'evaluation_category'], sum, 'audience')
        .records()
    },
}

In [22]:
with open(TARGET / 'audience.json', 'w') as f:
    json.dump(
        audience | {
            'ticketed': ticketed_audience,
            'unticketed': None,
        },
        f,
        indent=2
    )

## Participants

### Programme

In [23]:
programme_participants = (
    programme_data.project_breakdown
    .melt(variables=[
        'schedule_participants_community',
        'manual_participants_community',
        # 'manual_participants_schools'
    ])
    .convert('variable', lambda f: f.replace('manual_','').replace('schedule_',''))
    .selectnotnone('value')
    .aggregate(['project_name', 'evaluation_category', 'variable'], sum, 'value')
    .recast(samplesize=1_000_000)
    .selectgt('participants_community', 0)
)

### Cultural learning

In [24]:
import petl as etl
from datetime import date
import ast


cultural_learning_engagements = (
    etl
    .fromcsv(PUBLISHED / 'cultural-learning/engagements.csv' )
    .convert(['date', 'month'], date.fromisoformat)
    .selectnotnone('date')
    .selectrangeopen('date', *date_range[:2])
    .convertnumbers()
    .convert('organisation_ids', ast.literal_eval)
)

cultural_learning_settings = (
    etl
    .fromcsv(PUBLISHED / 'programme/venues.csv')
    .selecteq('cultural_learning', 'True')
)

def distinct(items):
    cl_orgs = cultural_learning_settings.values('id')
    return len({i for s in items for i in s if i in cl_orgs})

cultural_learning_summary = (
    cultural_learning_engagements
    .aggregate(None, {
        'participants': ('participants', sum),
        'schools_engaged': ('organisation_ids', distinct)
    })
    .dicts()[0]
)

Report by org type

In [25]:
cultural_learning_settings.aggregate('Org/Venue Type', len)

Org/Venue Type,value
['Education Setting'],148
[],3


### Volunteers

In [26]:
volunteer_signups = (
    etl
    .fromcsv(PUBLISHED / 'volunteers/checkpoint-updates.csv')
    .convert('date', etl.dateparser('%Y-%m-%d'))
    .selectle('date', date_range[1])
    .convertnumbers(strict=False)
)

In [27]:
# volunteer_shifts = (
#     etl
#     .fromcsv(PUBLISHED / 'volunteers/shifts.csv')
#     .convert('date', etl.dateparser('%Y-%m-%d'))
#     .selecteq('type', 'BD25 Event')
#     .selectrangeopen('date', date_range[0], date_range[1])
#     .convertnumbers(strict=False)
# )

In [28]:
volunteer_summary = {
    'total': sum(
        volunteer_signups
        .selectin('checkpoint',  ["1. Monitoring & Evaluation", "2. Sign Up to Induction", "3. Fully Inducted Volunteers"])
        .values('count')
    ),
    'people': {
        'checkpoints': dict(volunteer_signups.aggregate('checkpoint', sum, 'count').records()),
    },
    # 'events': {
    #     'count': volunteer_shifts.cut('rosterfy_event_name').distinct().nrows(),
    #     'names': list(volunteer_shifts.cut('rosterfy_event_name').distinct().values('rosterfy_event_name')),
    # },
    # 'shifts': {
    #     'count': volunteer_shifts.nrows(),
    #     'attended': sum(volunteer_shifts.values('attended')),
    #     'hours': sum(volunteer_shifts.values('hours')),
    # },
}

### Other figures

In [29]:
other_figures = etl.fromcsv(PUBLISHED / 'manual/manual-other-figures.csv').selecteq('dashboard key', target).convertnumbers()

In [30]:
dict(other_figures.selecteq('variable', 'training_and_skills_development_participants').cut('source', 'value').rename('source', 'project_name').aggregate('project_name', sum, 'value', field='count').records())

{'Bradford Producing Hub': 833,
 'Creative Health': 278,
 'Digital Creatives scheme': 1632}

In [31]:
with open(TARGET / 'otherFigures.json', 'w') as f:
    json.dump(
        other_figures.aggregate('variable', sum, 'value').transpose().cutout('variable').dicts()[0],
        f,
        indent=2
    )

### Summary

In [32]:
def summarise_by_type(table):
    return {
        'count': sum(table.values('count')),
        'by_project': {
            r.project_name: { 'count': r['count'], 'category': r.get('evaluation_category', 'UNKNOWN') }
            for r in
            table.selectgt('count', 0).records()
        },
    }

by_type = {
    'community_participants': summarise_by_type(programme_participants
        .rename('participants_community', 'count')
    ),
    'training_and_skills_development_participants': summarise_by_type(
        other_figures
        .selecteq('variable', 'training_and_skills_development_participants')
        .cut('source', 'value')
        .rename('source', 'project_name')
        .aggregate('project_name', sum, 'value', field='count')   
    ),
    'cultural_learning_participants': summarise_by_type(
        cultural_learning_engagements.aggregate('project_name', sum, 'participants', field='count')
    )
}

In [33]:
by_type

{'community_participants': {'count': 28999,
  'by_project': {'A Screen Near You - The Incredible Moving Cinema': {'count': 32,
    'category': 'In-person'},
   'BBC Extraordinary Portraits': {'count': 2400, 'category': 'Exhibition'},
   'BBC Introducing @ The Beacon': {'count': 9, 'category': 'In-person'},
   'BD: Walls': {'count': 273, 'category': 'In-person'},
   'Back In The Pink - Castles In The Sky': {'count': 7,
    'category': 'In-person'},
   'Bassline Symphony': {'count': 245, 'category': 'In-person'},
   'Big Brass Blowout': {'count': 68, 'category': 'Festival'},
   'Bloom': {'count': 165, 'category': 'In-person'},
   'Bradford On Foot': {'count': 2162, 'category': 'Digital'},
   'CHANNELS': {'count': 786, 'category': 'Digital'},
   'Closing Event - Emily Lim/Dan Canham': {'count': 13,
    'category': 'In-person'},
   'Club Ekta Events': {'count': 22, 'category': 'In-person'},
   'Compagnie Off: Giraffes': {'count': 37, 'category': 'In-person'},
   'Connecting Communities': {

In [34]:
participant_summary = {
    'total': sum(programme_participants.values('participants_community'))
        + cultural_learning_summary['participants']
        + volunteer_summary['total']
        + sum(other_figures.selectcontains('variable', '_participants').values('value')),

    'by_type': by_type,

    'cultural_learning': cultural_learning_summary,
    'volunteers': volunteer_summary,
}

In [35]:
with open(TARGET / 'participants.json', 'w') as f:
    json.dump(
        participant_summary,
        f,
        indent=2
    )

## Other figures