In [1]:
import ast
import json

import pandas as pd
import petl as etl
from utils.paths import PUBLISHED, SITE

In [2]:
TICKET_DATA = SITE / 'themes/ticketing/_data/operational'
TICKET_DATA.mkdir(parents=True, exist_ok=True)

In [3]:
instances = etl.fromcsv(PUBLISHED / 'ticketing/event-instances.csv')

In [4]:
tickets = etl.fromcsv(PUBLISHED / 'ticketing/tickets.csv').join(
    instances
    .cut('instance_id', 'eventType')
    .convert('eventType', lambda f: ast.literal_eval(f).pop())
).convertnumbers()

In [5]:
def make_date_index(df):
    df.date=df.date.pipe(pd.DatetimeIndex)
    return df

In [6]:
def resample_to_week(df):
    df = df.resample('W-SUN', on='date').sum()
    df.index.names = ['week_ending']
    return df

In [7]:
by_week = (
    tickets
    .selecteq('geography_type', 'oslaua')
    .selectnotin('eventType', ['TestEvent'])
    .aggregate('date', sum, 'count_of_tickets')
    .convert('date', etl.dateparser('%Y-%m-%d'))
    .todataframe()
    .pipe(make_date_index)
    .pipe(resample_to_week)
)

In [8]:
weekly_tickets = pd.concat(
    [
        by_week.rename(columns={'value': 'weekly'}),
        by_week.cumsum().rename(columns={'value': 'total'})
    ],
    axis=1
)

In [9]:
weekly_tickets.to_csv(TICKET_DATA / 'weekly_tickets.csv')

In [10]:
geo = tickets.aggregate(['geography_type', 'geography_code'], sum, 'count_of_tickets')

In [11]:
geo.tocsv(TICKET_DATA / 'geography.csv')

In [12]:
orders = (
    etl
    .fromcsv(PUBLISHED / 'ticketing/orders.csv')
    .convertnumbers()
    .convert('date', etl.dateparser('%Y-%m-%d'))
)

In [13]:
weekly_orders = (
    orders
    .todataframe()
    .pipe(make_date_index)
    .pipe(resample_to_week)
)

In [14]:
pd.concat(
    [
        weekly_orders.rename(columns={'count_of_orders': 'weekly'}),
        weekly_orders.cumsum().rename(columns={'count_of_orders': 'total'})
    ],
    axis=1
).to_csv(TICKET_DATA / 'weekly_orders.csv')

In [15]:
summary = {
    'orders': orders.values('count_of_orders').sum(),
    'tickets': tickets.selecteq('geography_type', 'osward').values('count_of_tickets').sum(),
}

In [16]:
with open(TICKET_DATA / 'summary.json', 'w') as f:
    json.dump(summary, f)

In [17]:
(
    tickets
    .selecteq('geography_type', 'oslaua')
    .aggregate('type', sum, 'count_of_tickets', field='tickets')
    .sort('tickets', reverse=True)
    .tocsv(TICKET_DATA / 'type.csv')
)