In [2]:
import ast
import os
import sys
from pathlib import Path

import pandas as pd

In [3]:
TOP_DIR = Path('../../../').resolve()
if str(TOP_DIR) not in sys.path: sys.path.append(str(TOP_DIR))

In [4]:
MAIN_DATA_DIR = TOP_DIR.joinpath('data/metrics/partnerships')
PARTNERSHIPS_DATA = MAIN_DATA_DIR.joinpath('partnerships.csv')

SITE_DATA_DIR = TOP_DIR.joinpath('docs/metrics/partnerships/_data')
SITE_DATA_DIR.mkdir(parents=True, exist_ok=True)

In [5]:
partnerships_data = pd.read_csv(PARTNERSHIPS_DATA, parse_dates=['start_date'])
partnerships_data.list_of_countries_involved = partnerships_data.list_of_countries_involved.fillna('[]').apply(ast.literal_eval)

Summarise

Country code reference created with `jq`

```sh
jq '[.features[].properties]' docs/_data/geojson/world.geojson > data/reference/country_codes.json
```

In [6]:
country_codes = pd.read_json(
    TOP_DIR.joinpath('data/reference/country_codes.json')
).loc[:, ['ADM0_A3', 'NAME_LONG']].set_index('NAME_LONG')

In [7]:
countries_data = partnerships_data.rename(
    columns={'list_of_countries_involved':'countries'}
).countries.explode().value_counts().to_frame('number_of_partners').sort_index()

In [8]:
combined = pd.merge(
  left=countries_data,
  right=country_codes,
  left_on='countries',
  right_index=True,
  how='left',
).fillna('')
combined.to_csv(
  SITE_DATA_DIR.joinpath('countries.csv')
)

In [9]:
pd.Series({
    'total_partnerships': len(partnerships_data),
    'total_international': partnerships_data.international_element_to_project_.count(),
    'total_unique_countries': len(countries_data),
    'total_events': partnerships_data.total_number_of_events.sum(),
    'total_audience': partnerships_data.audience_numbers.sum(),
    'total_digital_audience': partnerships_data.digital_audiences.sum(),
    'total_participants': partnerships_data.number_of_participants.sum(),
    'total_in_person_events': partnerships_data['online_in_person_both'].value_counts()['In person'],
    'both_online_in_person_events': partnerships_data['online_in_person_both'].value_counts()['Both'],
    'online_only_events': partnerships_data['online_in_person_both'].value_counts()['Online']
}).astype(int).to_json(
    os.path.join(SITE_DATA_DIR, 'headlines.json'),
    date_format='iso', indent=2
)

Summarise by ward

In [10]:
partnerships_data.fillna('UNKNOWN').groupby('partner_venue_ward_code').partner_organisation.count().to_csv(os.path.join(SITE_DATA_DIR, 'partnerships_by_ward.csv'))

Summarise by month

In [11]:
partnerships_data.groupby('start_date').partner_organisation.count().resample('MS').sum().cumsum().to_csv(os.path.join(SITE_DATA_DIR, 'partnerships_by_month.csv'))

Event type

In [12]:
event_type = pd.DataFrame({
    'event_type': partnerships_data.groupby('online_in_person_both').partner_organisation.count()
}).reset_index().to_csv(os.path.join(SITE_DATA_DIR, 'event_type.csv'), index=False)

Project Artform

In [13]:
artform = pd.DataFrame({
    'number_of_partners': partnerships_data.groupby('project_artform').partner_organisation.count()
}).T.rename(columns={
    'Combined arts': 'combined_arts',
    'Dance': 'dance',
    'Literature': 'literature',
    'Music': 'music',
    'Theatre': 'theatre',
    'Visual art': 'visual_art'
}).reset_index().to_csv(os.path.join(SITE_DATA_DIR, 'artforms.csv'), index=False)

## Project outputs

Clean up the Project Outputs column

In [76]:
outputs = partnerships_data.project_output
outputs = partnerships_data.project_output.str.split(',').explode().str.replace("'", '').str.strip("[,] ")
outputs = outputs.value_counts().to_frame().T
outputs = outputs.drop(columns = {'murals', 'etc.)'}).rename(columns={'Public Artwork (sculptures': 'Public Artwork', 'School event': 'School Event'})

outputs.to_csv(os.path.join(SITE_DATA_DIR, 'project_outputs.csv'), index=False)