# Volunteering Data

Data extracted from Rosterfy and processes below.

In [31]:
from datetime import datetime, date
from pathlib import Path

import petl as etl

from utils import make_time_series, make_cumulative

In [None]:
induction_launch = date.fromisoformat('2024-09-12')
first_event = date.fromisoformat('2024-08-16')

Set up references to paths

In [None]:
ROOT = Path('../')
DATA = ROOT / 'data/published/volunteers'
_TARGET = ROOT / 'src/themes/volunteers/_data'
PEOPLE = _TARGET / 'people'
PEOPLE.mkdir(exist_ok=True, parents=True)
SHIFTS = _TARGET / 'shifts'
SHIFTS.mkdir(exist_ok=True, parents=True)

## Checkpoints

Read checkpoints data from CSV process.

In [34]:
checkpoints = etl.fromcsv(
    DATA / 'checkpoints.csv'
).convert(
    'date', etl.dateparser('%Y-%m-%d')
).convert('count', int).sort(
    key=('date', 'checkpoint')
).recast(
    variablefield='checkpoint', valuefield='count'
).replaceall(
    None, 0
)

Create summarise function. This takes a Pandas dataframe, resamples based on the desired frequency `freq`, adds cumulative counts of each column, converts to a PETL table and renames the date column based on the `title` provided.

In [35]:
def summarise(df, freq, title):
    return (
        df
            .pipe(make_time_series)
            .resample(freq)
            .sum()
            .pipe(make_cumulative)
            .reset_index()
            .pipe(etl.fromdataframe)
            .convert('date', datetime.date)
            .rename({ 'date': title })
    )

Create a weekly summary of checkpoints passed

In [36]:
checkpoints.todataframe(
).pipe(
    summarise, 'W-SUN', 'Week ending (Sunday)'
).selectge('Week ending (Sunday)', induction_launch).tocsv(PEOPLE / 'checkpoints_weekly.csv')

## Geography

In [37]:
geo = etl.fromcsv(DATA / 'geo-summary.csv')
for geography, table in geo.facet('type').items():
    table.tocsv(PEOPLE / f'by_geo_{geography}.csv')

## Demographics

In [38]:
demo = etl.fromcsv(DATA / 'demographics.csv').selectin('category', [
    'age_range',
])

for category, table in demo.facet('category').items():
    table.tocsv(PEOPLE / f'by_demographic_{category}.csv')

## Shifts

In [None]:
shifts = etl.fromcsv(
    DATA / 'shifts.csv'
).convert(
    ('demand', 'attended'), int
).convert(
    ('hours'), float
).replaceall(
    None, 0
).aggregate(
    key=['date', 'type'],
    aggregation={
        'attended': ('attended', sum),
        'hours': ('hours', sum)
    }
).convert(
    'date', date.fromisoformat
).cache()

Create some summary functions with a combination of PETL and Pandas

In [40]:
def summarise(table: etl.Table, column, freq) -> etl.Table:
    return (
        table
        .cut('date', 'type', column)
        .recast(variablefield='type', valuefield=column, reducers=sum)
        .replaceall(None, 0)
        .todataframe()
        .pipe(make_time_series)
        .resample(freq)
        .sum()
        .reset_index()
        .pipe(etl.fromdataframe)
        .convert('date', datetime.date)
        .sort('date')
    )    

In [None]:
summarise(
    shifts, 'attended', 'W-SUN'
).selectge(
    'date', first_event
).rename(
    'date', 'week_ending'
).tocsv(SHIFTS / 'attended_by_week.csv')

summarise(
    shifts, 'hours', 'W-SUN'
).selectge(
    'date', first_event
).rename(
    'date', 'week_ending'
).tocsv(SHIFTS / 'hours_by_week.csv')

In [None]:
summarise(
    shifts, 'attended', 'W-SUN'
).todataframe(
).set_index('date').cumsum().reset_index(
).pipe(
    etl.fromdataframe
).selectge(
    'date', first_event
).rename(
    'date', 'week_ending'
).tocsv(SHIFTS / 'attended_cumulative_by_week.csv')

summarise(
    shifts, 'hours', 'W-SUN'
).todataframe(
).set_index('date').cumsum().reset_index(
).pipe(
    etl.fromdataframe
).selectge(
    'date', first_event
).rename(
    'date', 'week_ending'
).tocsv(SHIFTS / 'hours_cumulative_by_week.csv')

### Process summaries

Convert timestamp columns to datetime to work with OI Lume Viz

In [43]:
# checkpoints_monthly['Month ending'] = pd.to_datetime(checkpoints_monthly['Month ending']).dt.strftime('%Y-%m-%d')
# shifts_monthly['Month ending'] = pd.to_datetime(shifts_monthly['month ending']).dt.strftime('%Y-%m-%d')
# shifts_weekly['Month ending'] = pd.to_datetime(shifts_weekly['week ending']).dt.strftime('%Y-%m-%d')

# checkpoints_monthly.to_csv(os.path.join(OUT_DIR, 'checkpoints_monthly.csv'), index=False)
# shifts_monthly.to_csv(os.path.join(OUT_DIR, 'shifts_monthly.csv'), index=False)
# shifts_weekly.to_csv(os.path.join(OUT_DIR, 'shifts_weekly.csv'), index=False)