In [60]:
import os
import pandas as pd
from events import load_event_data, get_first_ward, normalise_ward

In [61]:
latest_date = pd.Timestamp.today().ceil('D')

In [62]:
tickets = pd.read_csv('../../../../data/metrics/events/tickets/orders.csv', parse_dates=['created_at', 'event_date'])
tickets = tickets[tickets.event_date <= latest_date]

In [63]:
OUTPUT_DIR='../../../../docs/metrics/events/signature/_data/'
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [64]:
events = load_event_data()

events = pd.concat([
  events,
  events.pipe(get_first_ward).pipe(normalise_ward).fillna('UNKNOWN'),
  pd.Series(
      events.actual_audience_size_number_of_participants_in_person + events.actual_audience_size_number_of_participants_online,
      name='attendances'
  )
], axis=1).rename(columns={
    'start_date': 'date'
}).loc[:, ['ward_code', 'date', 'event_unique_identifier', 'attendances', 'actual_audience_size_number_of_participants_in_person', 'actual_audience_size_number_of_participants_online' ]]


In [65]:
print(events.actual_audience_size_number_of_participants_in_person.sum())

129436.0


Events CSVs

In [66]:
pd.DataFrame({
  'events': events.groupby('ward_code').event_unique_identifier.count(),
  'in_person': events.groupby('ward_code').actual_audience_size_number_of_participants_in_person.sum().astype(int),
  'online': events.groupby('ward_code').actual_audience_size_number_of_participants_online.sum().astype(int),
}).to_csv(os.path.join(OUTPUT_DIR, 'events_by_ward.csv'))


In [67]:
pd.DataFrame({
  'events': events.groupby('date').event_unique_identifier.count(),
  'attendances': events.groupby('date').attendances.sum().astype(int),
}).resample('M').sum().cumsum().astype(int).to_csv(os.path.join(OUTPUT_DIR, 'events_by_date.csv'))

Tickets CSV

In [68]:
pd.DataFrame({
  'orders': tickets.groupby('ward_code').number_of_tickets.count(),
  'tickets': tickets.groupby('ward_code').number_of_tickets.sum().astype(int),
}).to_csv(os.path.join(OUTPUT_DIR, 'tickets_by_ward.csv'))

In [69]:
pd.DataFrame({
  'orders': tickets.groupby('event_date').number_of_tickets.count(),
  'tickets': tickets.groupby('event_date').number_of_tickets.sum().astype(int),
}).resample('M').sum().cumsum().to_csv(os.path.join(OUTPUT_DIR, 'tickets_by_event_date.csv'))

In [70]:
pd.Series({
    'total_events': events.event_unique_identifier.count(),
    'total_in_person_attendances': events.actual_audience_size_number_of_participants_in_person.sum().astype(int),
    'total_online_attendances': events.actual_audience_size_number_of_participants_online.sum().astype(int),
    'first_event': events.date.min(),
    'latest_event': events.date.max(),
    'total_ticket_orders_for_events_to_date': tickets.number_of_tickets.count(),
    'total_tickets_sold_for_events_to_date': tickets.number_of_tickets.sum(),
}).to_json(os.path.join(OUTPUT_DIR, 'headlines.json'), indent=2)

In [71]:
print(events.attendances)

2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
        ..
1166   NaN
1167   NaN
1168   NaN
1169   NaN
1170   NaN
Name: attendances, Length: 745, dtype: float64
