In [1]:
import ast
import json

import pandas as pd
import petl as etl
from utils.paths import PUBLISHED, SITE
from utils.date import date_to_week

In [2]:
TICKET_DATA = SITE / 'themes/ticketing/_data/operational'
TICKET_DATA.mkdir(parents=True, exist_ok=True)

In [3]:
instances = etl.fromcsv(PUBLISHED / 'ticketing/event-instances.csv')

In [4]:
tickets = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/tickets.csv')
    .join(
        instances
        .cut('instance_id', 'eventType')
        .convert('eventType', lambda f: ast.literal_eval(f).pop())
    )
    .addfield('week_ending', lambda r: date_to_week(r.date))
    .selectnotin('eventType', ['TestEvent'])
    .convertnumbers()
    .cache()
)

In [5]:
def make_date_index(df, column_name='date'):
    df[column_name]=df[column_name].pipe(pd.DatetimeIndex)
    df.set_index(column_name, inplace=True)
    return df

In [6]:
by_week = (
    tickets
    .selecteq('geography_type', 'oslaua')
    .aggregate('week_ending', sum, 'count_of_tickets', field='count_of_tickets')
    .todataframe()
    .pipe(make_date_index, column_name='week_ending')
)

In [7]:
weekly_tickets = pd.concat(
    [
        by_week.rename(columns={'count_of_tickets': 'weekly'}),
        by_week.cumsum().rename(columns={'count_of_tickets': 'total'})
    ],
    axis=1
)

In [8]:
weekly_tickets.to_csv(TICKET_DATA / 'weekly_tickets.csv')

In [9]:
def geography_area(row):
    if row.geography_type != 'oslaua':
        return None
    if row.geography_code == 'MISSING':
        return 'Unknown'
    if row.geography_code == 'E08000032':
        return 'Bradford'
    if row.geography_code in 'E08000033 E08000034 E08000035 E08000036'.split():
        return 'Other West Yorkshire'
    return 'Other areas'

In [10]:
geo = tickets.aggregate(['geography_type', 'geography_code'], sum, 'count_of_tickets')

In [11]:
geo.tocsv(TICKET_DATA / 'geography.csv')

In [12]:
orders = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/orders.csv')
    .addfield('week_ending', lambda r: date_to_week(r.date))
    .convertnumbers()
)

In [13]:
weekly_orders = (
    orders
    .aggregate('week_ending', sum, 'count_of_orders', field='count_of_orders')
    .todataframe()
    .pipe(make_date_index, column_name='week_ending')
)

In [14]:
(
    tickets
    .selecteq('geography_type', 'oslaua')
    .aggregate('geography_code', sum, 'count_of_tickets')
    .sort('value', reverse=True)
).selectgt('value', 300).displayall()

geography_code,value
E08000032,32452
MISSING,15672
E08000035,7250
E08000033,3207
E06000065,2416
E08000034,2383
E06000014,601
E08000036,539
E08000019,531
E06000010,354


In [15]:
weekly_orders

Unnamed: 0_level_0,count_of_orders
week_ending,Unnamed: 1_level_1
2024-06-30,8
2024-07-07,21
2024-07-14,6
2024-07-21,4
2024-07-28,5
2024-08-04,6
2024-08-11,183
2024-08-18,204
2024-08-25,84
2024-09-01,105


In [16]:
pd.concat(
    [
        weekly_orders.rename(columns={'count_of_orders': 'weekly'}),
        weekly_orders.cumsum().rename(columns={'count_of_orders': 'total'})
    ],
    axis=1
).to_csv(TICKET_DATA / 'weekly_orders.csv')

In [17]:
summary = {
    'orders': orders.values('count_of_orders').sum(),
    'tickets': tickets.selecteq('geography_type', 'osward').values('count_of_tickets').sum(),
    'geo': {
        'by_area': dict(
            tickets
            .selecteq('geography_type', 'oslaua')
            .addfield('geography_area', geography_area)
            .selectnotnone('geography_area')
            .aggregate(['geography_area'], sum, 'count_of_tickets')
            .records()
        )
    }
}

In [18]:
with open(TICKET_DATA / 'summary.json', 'w') as f:
    json.dump(summary, f)

In [19]:
(
    tickets
    .selecteq('geography_type', 'oslaua')
    .convert('type', {
        'Adult': 'Full Price',
        'Guest Ticket': 'Full Price',

        'Audio Description': 'Accessible Ticket',
        'BSL Interpreted': 'Accessible Ticket',
        'Essential Companion': 'Accessible Ticket',
        'Wheelchair User': 'Accessible Ticket',

        'Z Community Ticket': 'Community Ticket',

        'Under 16': 'Child Ticket',
        'Child': 'Child Ticket',

        'RSVP': 'Bradford 2025',
        'Z Press Ticket': 'Bradford 2025',
        'Z Company Ticket': 'Bradford 2025',
        'Z Guest Artist': 'Bradford 2025',
    })
    .aggregate('type', sum, 'count_of_tickets', field='tickets')
    .sort('tickets', reverse=True)
    .rename({
        'type': 'Ticket type',
        'tickets': 'Number of tickets sold',
    })
    .tocsv(TICKET_DATA / 'type.csv')
)

In [20]:
event_type = (
    tickets
    .selecteq('geography_type', 'oslaua')
    .aggregate(['week_ending', 'eventType'], sum, 'count_of_tickets', field='tickets')
    .update('eventType', 'UNKNOWN', where=lambda r: not r.eventType)
)

(
    event_type
    .recast(variablefield='eventType', valuefield='tickets', missing=0)
    .tocsv(TICKET_DATA / 'event_type_by_week.csv')
)
(
    event_type
    .aggregate('eventType', sum, 'tickets', field='tickets')
    .convert('eventType', {
        'MainProgrammeEvent': 'Main Programme',
        'VIPReceptionEvent': 'VIP Reception',
        'UNKNOWN': 'Unknown',
        'SectorEvent': 'Sector Event',
    })
    .sort('tickets', reverse=True)
    .rename({
        'eventType': 'Event type',
        'tickets': 'Number of tickets sold',
    })
    .tocsv(TICKET_DATA / 'event_type.csv')
)