In [1]:
import os
import sys
import pandas as pd
import numpy as np

In [2]:
lib_dir = os.path.abspath('../../../../lib/')
if not lib_dir in sys.path: sys.path.append(lib_dir)

import util.geo
import util.convert

In [3]:
data = pd.read_csv('../../../../data/metrics/events/master/all.csv', parse_dates=['start_date'])
data = data.loc[
    (data.project_type == 'Produce (Signature)') &
    (data.project_name == '05 - The Barn') &
    (data.event_type.str.contains('Public Event -')) &
    (data.start_date < pd.Timestamp.now())
]

In [4]:
wards = data['ward_(from_venue)'].fillna(value='[]').pipe(util.convert.literal_converter).apply(np.unique)

In [5]:
data[wards.apply(len) != 1]

Unnamed: 0,event_unique_identifier,project_name,event_name,event_type,season,start_date,airtable_id,actual_audience_size_number_of_participants_in_person,project_type,ward_(from_venue),postcode_(from_venue),end_date,number_of_booked_participants,actual_audience_size_number_of_participants_online
363,05 - The Barn: The Space Digital Interactive P...,05 - The Barn,The Space Digital Interactive Playspace (schoo...,"['Public Event - Digital Experience', 'CLE - C...",['Season 2 - Playing'],2023-05-16,1623,4.0,Produce (Signature),,,2023-05-18,7.0,0.0
543,05 - The Barn: The Space Digital Interactive P...,05 - The Barn,The Space Digital Interactive Playspace,"['Public Event - Digital Experience', 'CLE - C...",['Season 2 - Playing'],2023-05-16,351,128.0,Produce (Signature),,,2023-05-18,7.0,


In [6]:
data['ward_code'] = wards.apply(lambda x: x[0] if len(x) > 0 else np.nan).pipe(util.geo.normalise_leeds_wards).pipe(util.geo.ward_name_to_code)

In [7]:
data['audience_size'] = data.actual_audience_size_number_of_participants_in_person.fillna(0) + data.actual_audience_size_number_of_participants_online.fillna(0)

In [8]:
OUT_DIR = '../../../../docs/metrics/events/the-barn/_data/events'
os.makedirs(OUT_DIR, exist_ok=True)

In [9]:
by_ward = pd.DataFrame(
  {
    'events': data.groupby('ward_code').audience_size.count(),
    'audience': data.groupby('ward_code').audience_size.sum(),
  }
)
by_ward.to_csv(os.path.join(OUT_DIR, 'by_ward.csv'))

In [10]:
by_week = pd.DataFrame({
  'events': data.groupby('start_date').audience_size.count(),
  'audience': data.groupby('start_date').audience_size.sum().astype(int)
}).resample('W-FRI').sum()
by_week.index.name = 'week_ending'
by_week['cumulative_events'] = by_week.events.cumsum()
by_week['cumulative_audience'] = by_week.audience.cumsum()
by_week.to_csv(os.path.join(OUT_DIR, 'by_week.csv'))

In [11]:
by_month = by_week.resample('M').sum()
by_month['cumulative_events'] = by_month.events.cumsum()
by_month['cumulative_audience'] = by_month.audience.cumsum()
by_month.index.name = 'month_ending'
by_month.to_csv(os.path.join(OUT_DIR, 'by_month.csv'))

In [12]:
by_month.drop(
    columns=['events', 'audience']
).rename(
    columns=lambda n: n.replace('cumulative', 'total')
).iloc[-1].to_json(os.path.join(OUT_DIR, 'headlines.json'))