In [1]:
import os
import pandas as pd
from events import load_event_data, get_first_ward, normalise_ward

In [2]:
latest_date = pd.Timestamp.today().ceil('D')

In [3]:
tickets = pd.read_csv('../../../../data/metrics/events/tickets/orders.csv', parse_dates=['created_at', 'event_date'])
tickets = tickets[tickets.event_date <= latest_date]

In [4]:
events = load_event_data()

events = pd.concat([
  events,
  events.pipe(get_first_ward).pipe(normalise_ward).fillna('UNKNOWN'),
  pd.Series(
      events.actual_audience_size_number_of_participants_in_person + events.actual_audience_size_number_of_participants_online,
      name='attendances'
  )
], axis=1).rename(columns={
    'start_date': 'date'
}).loc[:, ['ward_code', 'date', 'event_unique_identifier', 'attendances']]

In [5]:
OUTPUT_DIR='../../../../docs/metrics/events/signature/_data/'
os.makedirs(OUTPUT_DIR, exist_ok=True)

Events CSVs

In [6]:
pd.DataFrame({
  'events': events.groupby('ward_code').event_unique_identifier.count(),
  'attendances': events.groupby('ward_code').attendances.sum().astype(int),
}).to_csv(os.path.join(OUTPUT_DIR, 'events_by_ward.csv'))


In [7]:
pd.DataFrame({
  'events': events.groupby('date').event_unique_identifier.count(),
  'attendances': events.groupby('date').attendances.sum().astype(int),
}).resample('M').sum().cumsum().astype(int).to_csv(os.path.join(OUTPUT_DIR, 'events_by_date.csv'))

Tickets CSV

In [8]:
pd.DataFrame({
  'orders': tickets.groupby('ward_code').number_of_tickets.count(),
  'tickets': tickets.groupby('ward_code').number_of_tickets.sum().astype(int),
}).to_csv(os.path.join(OUTPUT_DIR, 'tickets_by_ward.csv'))

In [9]:
pd.DataFrame({
  'orders': tickets.groupby('event_date').number_of_tickets.count(),
  'tickets': tickets.groupby('event_date').number_of_tickets.sum().astype(int),
}).resample('M').sum().cumsum().to_csv(os.path.join(OUTPUT_DIR, 'tickets_by_event_date.csv'))

In [10]:
pd.Series({
    'total_events': events.event_unique_identifier.count(),
    'total_attendances': events.attendances.sum().astype(int),
    'first_event': events.date.min(),
    'latest_event': events.date.max(),
    'total_ticket_orders_for_events_to_date': tickets.number_of_tickets.count(),
    'total_tickets_sold_for_events_to_date': tickets.number_of_tickets.sum(),
}).to_json(os.path.join(OUTPUT_DIR, 'headlines.json'), indent=2)

In [11]:
events.attendances.count()

15