In [None]:
import sys
from pathlib import Path

import pandas as pd

TOP_DIR = Path('../../../..').resolve()

if str(TOP_DIR) not in sys.path: sys.path.append(str(TOP_DIR))

In [None]:
from lib.util.geo import postcode_formatter, postcode_to_ward_code

In [None]:
ticket_data = pd.read_csv(
    '../../../../working/metrics/ticketing/orders.csv',
    parse_dates=['created_at', 'event_date'],
    usecols=[
        'created_at',
        'event_id',
        'event_name',
        'event_date',
        'event_time',
        'number_of_tickets',
        'postcode_from_question',
        'postcode_from_address',
        'status',
    ]
)

In [None]:
ticket_data.info()

In [None]:
completed_orders = (
    ticket_data.loc[
        ~ticket_data.status.isin(['cancelled']),
        ticket_data.columns
    ]
)

In [None]:
completed_orders['ward_code'] = (
    completed_orders
        .loc[:, ['postcode_from_question', 'postcode_from_address']]
        .bfill()
        .iloc[:, 0]
        .pipe(postcode_formatter)
        .map(postcode_to_ward_code)
)

Get the events data

In [None]:
def prepare_events(data: pd.DataFrame):
    data = data.rename(columns={
        'Project name': 'project_name',
        'Event type': 'event_type',
        'Ticket Tailor ID': 'ticket_tailor_id'
    })
    data['public_event'] = data.event_type.str.contains('Public Event')
    data['ticket_tailor_id'] = data.ticket_tailor_id.str.split(
        pat=r'[\s,]+', regex=True)
    data = data.drop(columns=['event_type'])
    data = data.explode('ticket_tailor_id')
    data = data.loc[data.ticket_tailor_id.notna() & (data.ticket_tailor_id != '')]
    data = data.drop_duplicates()
    return data


events = pd.read_csv(
    '../../../../working/metrics/events/all.csv',
    usecols=[
        'Project name',
        'Event type',
        'Ticket Tailor ID'
    ]
).pipe(prepare_events)

Duplicated ticket tailor IDs

In [None]:
events[events.ticket_tailor_id.isin(events.ticket_tailor_id[events.ticket_tailor_id.duplicated()].to_list())]

In [None]:
len(completed_orders)

In [None]:
completed_orders = completed_orders.merge(events.set_index('ticket_tailor_id'), left_on='event_id', right_index=True, how='left')

In [None]:
len(completed_orders)

In [None]:
OUTPUT_DIR = TOP_DIR.joinpath('data/metrics/events/tickets/')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
completed_orders.loc[:, [
    'created_at',
    'event_id',
    'event_date',
    'number_of_tickets',
    'ward_code',
    'project_name',
    'public_event',
]].sort_values(['created_at', 'event_id']).to_csv(
  OUTPUT_DIR.joinpath('orders.csv'),
  index=False
)

In [None]:
(
    completed_orders.loc[:, ['event_date', 'event_time', 'event_name', 'event_id', 'project_name', 'public_event']]
      .drop_duplicates()
      .sort_values(['event_date', 'event_time'])
      .to_csv('../../../../data/metrics/events/tickets/events.csv', index=False)
)

In [None]:
chart = completed_orders.groupby(['project_name']).number_of_tickets.sum().sort_values().plot.barh()

In [None]:
chart = completed_orders.groupby([
    'event_date'
    ]).number_of_tickets.sum().resample('M').sum().cumsum().plot.bar()