# My LEEDS 2023

Prepares data for the My LEEDS 2023 page 

In [1]:
import os
import sys
import pandas as pd
import numpy as np

In [2]:
lib_dir = os.path.abspath('../../../../lib/')
if not lib_dir in sys.path: sys.path.append(lib_dir)

import util.geo
import util.convert

Read in event data and filter by past / current (starting before today) tagged with the appropriate project name and event type

In [3]:
data = pd.read_csv('../../../../data/metrics/events/master/all.csv', parse_dates=['start_date'])
data = data.loc[
    (data.project_type == 'Produce (Signature)') &
    (data.project_name == '12 - My LEEDS 2023') &
    (data.event_type.str.contains('Public Event -')) &
    (data.start_date < pd.Timestamp.now())
]

Calculate series of wards

In [4]:
wards = data['ward_(from_venue)'].fillna(value='[]').pipe(util.convert.literal_converter).apply(np.unique)

Print a list of events which have no ward, or more than one

In [5]:
data[wards.apply(len) != 1]

Unnamed: 0,event_unique_identifier,project_name,event_name,event_type,season,start_date,airtable_id,actual_audience_size_number_of_participants_in_person,project_type,ward_(from_venue),postcode_(from_venue),end_date,number_of_booked_participants,actual_audience_size_number_of_participants_online
11,12 - My LEEDS 2023: My Leeds Event Day 1 (Live...,12 - My LEEDS 2023,My Leeds Event Day 1 (Live Activation),['Public Event - Performance'],['Season 2 - Playing'],2023-07-08,265,,Produce (Signature),,,2023-07-09,,
506,12 - My LEEDS 2023: The Gift: digital gift lau...,12 - My LEEDS 2023,The Gift: digital gift launch,['Public Event - Digital Experience'],['Season 2 - Playing'],2023-06-19,656,,Produce (Signature),,,2023-06-19,,
570,12 - My LEEDS 2023: The Gift: Garments,12 - My LEEDS 2023,The Gift: Garments,['Public Event - Installation'],['Season 2 - Playing'],2023-06-17,657,,Produce (Signature),,,2023-08-27,,
686,12 - My LEEDS 2023: My LEEDS Summer: Alwoodley...,12 - My LEEDS 2023,My LEEDS Summer: Alwoodley Festival of Sport a...,['Public Event - Performance'],['Season 2 - Playing'],2023-07-09,430,,Produce (Signature),"['Alwoodley', 'Alwoodley', 'Alwoodley', 'Alwoo...","['LS17 7NZ', 'LS17 7PF', 'LS17 5HX', 'LS17 7HL...",2023-07-09,,


Pick the first ward in the list and create a new column

In [6]:
data['ward_code'] = wards.apply(lambda x: x[0] if len(x) > 0 else np.nan).pipe(util.geo.normalise_leeds_wards).pipe(util.geo.ward_name_to_code)

Calculate audience size

In [7]:
data['audience_size'] = data.actual_audience_size_number_of_participants_in_person.fillna(0) + data.actual_audience_size_number_of_participants_online.fillna(0)

Create output directory

In [8]:
OUT_DIR = '../../../../docs/metrics/events/my-leeds-2023/_data/events'
os.makedirs(OUT_DIR, exist_ok=True)

Create counts of events by ward and save CSV

In [9]:
by_ward = pd.DataFrame(
  {
    'events': data.groupby('ward_code').audience_size.count(),
    'audience': data.groupby('ward_code').audience_size.sum(),
  }
)
by_ward.to_csv(os.path.join(OUT_DIR, 'by_ward.csv'))

Create summary by week

In [10]:
by_week = pd.DataFrame({
  'events': data.groupby('start_date').audience_size.count(),
  'audience': data.groupby('start_date').audience_size.sum().astype(int)
}).resample('W-FRI').sum()
by_week.index.name = 'week_ending'
by_week['cumulative_events'] = by_week.events.cumsum()
by_week['cumulative_audience'] = by_week.audience.cumsum()
by_week.to_csv(os.path.join(OUT_DIR, 'by_week.csv'))

Create summary by month

In [11]:
by_month = by_week.resample('M').sum()
by_month['cumulative_events'] = by_month.events.cumsum()
by_month['cumulative_audience'] = by_month.audience.cumsum()
by_month.index.name = 'month_ending'
by_month.to_csv(os.path.join(OUT_DIR, 'by_month.csv'))

Create headline counts of events

In [12]:
by_month.drop(
    columns=['events', 'audience']
).rename(
    columns=lambda n: n.replace('cumulative', 'total')
).iloc[-1].to_json(os.path.join(OUT_DIR, 'headlines.json'))