# Volunteering Data

Data extracted from Rosterfy and processes below.

In [2]:
import petl as etl
from pathlib import Path
from datetime import datetime

In [3]:
ROOT = Path('../')
DATA = ROOT / 'data/volunteers'
_TARGET = ROOT / 'src/volunteers/_data'
SHIFTS = _TARGET / 'shifts'
SHIFTS.mkdir(exist_ok=True, parents=True)

In [4]:
data = etl.fromcsv(
    DATA / 'shifts-raw.csv'
).convert(
    ('demand', 'attended'), int
).convert(
    ('hours'), float
).cache()

Create some summary functions with a combination of PETL and Pandas

In [5]:
import pandas as pd

def summarise(table: etl.Table, column, freq) -> etl.Table:
    return (
        event_shifts
        .cut('date', 'type', column)
        .recast(variablefield='type', valuefield=column, reducers=sum)
        .replaceall(None, 0)
        .todataframe()
        .pipe(resample, freq)
        .pipe(etl.fromdataframe)
        .convert('date', datetime.date)
        .sort('date')
    )    

def resample(df: pd.DataFrame, freq, date_field='date') -> pd.DataFrame:
    df[date_field] = df[date_field].pipe(pd.DatetimeIndex)
    df.set_index(date_field, inplace=True)
    return df.resample(freq).sum().reset_index()

def make_cumulative(df: pd.DataFrame):
    return (
        df.set_index('date').cumsum().reset_index()
    )

In [6]:
event_shifts = data.replaceall(
    None, 0
).aggregate(
    key=['date', 'type'],
    aggregation={
        'attended': ('attended', sum),
        'hours': ('hours', sum)
    }
).cache()

In [7]:
summarise(event_shifts, 'attended', 'W-FRI').rename('date', 'week_ending').tocsv(SHIFTS / 'attended_by_week.csv')
summarise(event_shifts, 'hours', 'W-FRI').rename('date', 'week_ending').tocsv(SHIFTS / 'hours_by_week.csv')

In [10]:
summarise(
    event_shifts, 'attended', 'MS'
).todataframe().pipe(
    make_cumulative
).pipe(
    etl.fromdataframe
).rename(
    'date', 'month'
).tocsv(SHIFTS / 'attended_cumulative_by_month.csv')

summarise(
    event_shifts, 'hours', 'MS'
).todataframe().pipe(
    make_cumulative
).pipe(
    etl.fromdataframe
).rename(
    'date', 'month'
).tocsv(SHIFTS / 'hours_cumulative_by_month.csv')

### Process summaries

Convert timestamp columns to datetime to work with OI Lume Viz

In [9]:
# checkpoints_monthly['Month ending'] = pd.to_datetime(checkpoints_monthly['Month ending']).dt.strftime('%Y-%m-%d')
# shifts_monthly['Month ending'] = pd.to_datetime(shifts_monthly['month ending']).dt.strftime('%Y-%m-%d')
# shifts_weekly['Month ending'] = pd.to_datetime(shifts_weekly['week ending']).dt.strftime('%Y-%m-%d')

# checkpoints_monthly.to_csv(os.path.join(OUT_DIR, 'checkpoints_monthly.csv'), index=False)
# shifts_monthly.to_csv(os.path.join(OUT_DIR, 'shifts_monthly.csv'), index=False)
# shifts_weekly.to_csv(os.path.join(OUT_DIR, 'shifts_weekly.csv'), index=False)