# Vol Forecaster
Creates volume forecast with following inputs:
- Baseline weekly volume (current volume 'level' per week).
- MAU Forecast; this defines the volume growth over time.
- intra-week / intra-month seasonality; self explanatory. Created with seasonality extractor notebook.
- forecast offsets; offsets which are added (or subtracted) from the forecast, e.g. for product launch.
- holiday impacts; offsets for holidays which are deducted multiplicatively from the final forecast.

See [this](https://docs.google.com/spreadsheets/d/1xbLEsJI1o42iKuMTvckXg9bpmkb6qPCLWntP3Tpq8yI/edit#gid=1903889456) file for an example of the required data & column naming/formatting conventions. Follow the column names in this file & make sure all dates are formatted as 'dd/mm/yyyy', percentages must be formatted as '0.0%', and decimals must be formatted as '0.0' (no thousands (,) seperator!). This is because the Google Sheets API returns these columns as strings and we need to parse them here in the script with a known format.

In [1]:
import Bens_forecasting_utils as fc
import pandas as pd
import datetime
import numpy as np
import logging
import sys

###### Set Parameters Here:

In [2]:
parameters = dict(
    
    # Forecast date range: (yyyy, mm, dd), #put a date that is beginning of the week
    forecast_start_date           = datetime.datetime(2024, 12, 23), #older date (if no comparison is needed, put a requested date)
    forecast_start_date_2         = datetime.datetime(2024, 9, 30), #more recent date (only if you want to compare)
    forecast_end_date             = datetime.datetime(2025, 12, 31),

    
    # Forecast Input Parameters:
    sheet_id                      = '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y',
    mau_fc_range                  = 'mau_fc!A1:I',
    intra_week_seasonality_range  = 'intra_week_seasonality!A1:D',
    intra_month_seasonality_range = 'intra_month_seasonality!A1:C',
    business_line_list_range      = 'weekly_base_vol!A1:E',
    forecast_offsets_range        = 'offsets!A1:D',
    holiday_impacts_range         = 'holidays!A1:F',
    forecast_upper_bound          = 1.075,
    forecast_lower_bound          = 0.925
)

###### Calculate Forecast Baseline:

In [3]:
def baseline(start_date=parameters['forecast_start_date']):
    

    FORMAT = '%(levelname)s: %(message)s'
    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format=FORMAT)

    # Create dataframe with our date range & add columns for calendar week, calendar month, weekday & day of month
    d = {'forecast_date': pd.date_range(start = start_date, end = parameters['forecast_end_date'])}
    df = pd.DataFrame(data = d)
    df['iso_calendar_week'] = df['forecast_date'].dt.strftime('%G-w%V')
    df['calendar_month'] = df['forecast_date'].dt.strftime('%Y-%m')
    df['day_of_week'] = df['forecast_date'].dt.day_of_week+1 # +1 so Mon=1 as per ISO 8601
    df['day_of_month'] = df['forecast_date'].dt.day

    # import mau forecast, parse % values (input values must be strings with 0.0% format) & calculate cumulative sum
    df_mau_fc = fc.import_gsheet_to_df(parameters['sheet_id'], parameters['mau_fc_range'])
    df_mau_fc['% Change'] = df_mau_fc['% Change'].str.rstrip('%').astype('float') / 100.0
    df_mau_fc['% Change'] = df_mau_fc['% Change'].fillna(0.0)
    df_mau_fc['Week Starting'] = pd.to_datetime(df_mau_fc['Week Starting'], format='%d/%m/%Y')
    df_mau_fc = df_mau_fc[df_mau_fc['Week Starting'] >= start_date]
    df_mau_fc['cumulative_%growth'] = df_mau_fc.groupby('Language/Market/Iban')['% Change'].transform(pd.Series.cumsum) # TODO : change language column to something more agnostic

    # Check if we have an mau forecast for every week in our forecast period, and give a warning if not
    base_weeks = df['iso_calendar_week'].unique()
    mau_weeks = df_mau_fc['Calendar Week'].unique()
    missing_groups = pd.Series(np.setdiff1d(base_weeks, mau_weeks))
    if len(missing_groups) > 0:
        missing_groups_concat = '[' + missing_groups.str.cat(sep='\', \'') + ']'
        warn_str_plural = ''
        if missing_groups.size > 1:
            warn_str_plural = 's'
        warning_string = str(missing_groups.size) + ' week' + warn_str_plural + ' missing from the MAU forecast: ' + missing_groups_concat
        logging.warning(warning_string)
        logging.warning('These weeks will not have a forecast correctly calculated!!!')

    # Import weekly base volume & intra-week + intra-month seasonality
    df_base_vol = fc.import_gsheet_to_df(parameters['sheet_id'], parameters['business_line_list_range'])
    df_intra_week_seasonality = fc.import_gsheet_to_df(parameters['sheet_id'], parameters['intra_week_seasonality_range'])
    df_intra_month_seasonality = fc.import_gsheet_to_df(parameters['sheet_id'], parameters['intra_month_seasonality_range'])

    df_intra_week_seasonality['dow'] = pd.to_numeric(df_intra_week_seasonality['dow'], downcast="float")
    df_intra_month_seasonality['dom'] = pd.to_numeric(df_intra_month_seasonality['dom'], downcast="float")
    df_intra_month_seasonality['intra_month_seasonality'] = pd.to_numeric(df_intra_month_seasonality['intra_month_seasonality'], downcast="float")
    df_intra_week_seasonality['intra_week_distro'] = pd.to_numeric(df_intra_week_seasonality['intra_week_distro'], downcast="float")
    df_base_vol['weekly_base_vol'] = pd.to_numeric(df_base_vol['weekly_base_vol'], downcast="float")

    # Cross merge df (containing our list of forecast dates) with base_vol to get cartesean product of all dates & business lines
    df = df.merge(df_base_vol, how='cross')
    # Join MAU fc, intra-week & intra-month seasonality to get mega df with everything we need to calculate the final baseline volume forecast
    df = df.merge(df_mau_fc, how='left', left_on=['language/market/iban', 'iso_calendar_week'], right_on=['Language/Market/Iban', 'Calendar Week'])
    df = df.merge(df_intra_week_seasonality, how='left', left_on=['business_line_alias', 'day_of_week'], right_on=['business_line_alias', 'dow'])
    df = df.merge(df_intra_month_seasonality, how='left', left_on=['business_line_alias', 'day_of_month'], right_on=['business_line_alias', 'dom'])
    df = df.sort_values(by=['business_line_alias', 'forecast_date'])

    # Calculate daily baseline vol forecast by multiplying weekly baseline vol by growth, intra-week, & intra-month seasonality.
    df['baseline_vol_forecast'] = ((df['weekly_base_vol'] * (1 + df['cumulative_%growth'])) * df['intra_week_distro']) * (1 + df['intra_month_seasonality'])
    
    return df


In [4]:
baseline_1 = baseline(start_date=parameters['forecast_start_date'])
#baseline_2 = baseline(start_date=parameters['forecast_start_date_2'])

INFO: Importing range: 'mau_fc!A1:I' from G-sheet with id: '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y'
INFO:     G-Sheets API Credentials: token.json exists
INFO: Importing range: 'weekly_base_vol!A1:E' from G-sheet with id: '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y'
INFO:     G-Sheets API Credentials: token.json exists
INFO: Importing range: 'intra_week_seasonality!A1:D' from G-sheet with id: '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y'
INFO:     G-Sheets API Credentials: token.json exists
INFO: Importing range: 'intra_month_seasonality!A1:C' from G-sheet with id: '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y'
INFO:     G-Sheets API Credentials: token.json exists


In [5]:
baseline_1

Unnamed: 0,forecast_date,iso_calendar_week,calendar_month,day_of_week,day_of_month,business_line_alias,language/market/iban,channel,complaint_type,weekly_base_vol,...,Type,MAU,% Change,cumulative_%growth,dow,seasonality,intra_week_distro,dom,intra_month_seasonality,baseline_vol_forecast
2,2024-12-23,2024-w52,2024-12,1,23,ops-cmp-DEU-Authority-AFC & AML,DEU,AFC & AML,Authority,4.0,...,interpolated,817944,0.0047,0.0047,1.0,-0.12033014,0.175934,23.0,-0.156002,0.596743
152,2024-12-24,2024-w52,2024-12,2,24,ops-cmp-DEU-Authority-AFC & AML,DEU,AFC & AML,Authority,4.0,...,interpolated,817944,0.0047,0.0047,2.0,-0.055169411,0.188966,24.0,-0.154129,0.642369
302,2024-12-25,2024-w52,2024-12,3,25,ops-cmp-DEU-Authority-AFC & AML,DEU,AFC & AML,Authority,4.0,...,interpolated,817944,0.0047,0.0047,3.0,-0.07688964903,0.184622,25.0,-0.141026,0.637323
452,2024-12-26,2024-w52,2024-12,4,26,ops-cmp-DEU-Authority-AFC & AML,DEU,AFC & AML,Authority,4.0,...,interpolated,817944,0.0047,0.0047,4.0,0.3292789757,0.265856,26.0,-0.115221,0.945317
602,2024-12-27,2024-w52,2024-12,5,27,ops-cmp-DEU-Authority-AFC & AML,DEU,AFC & AML,Authority,4.0,...,interpolated,817944,0.0047,0.0047,5.0,-0.07688964903,0.184622,27.0,-0.075242,0.686133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55499,2025-12-27,2025-w52,2025-12,6,27,ops-cmp-ITA-NonAuthority-Products & Services,ITA,Products & Services,NonAuthority,0.0,...,interpolated,247504,0.0017,0.0967,6.0,0,0.166667,,,
55649,2025-12-28,2025-w52,2025-12,7,28,ops-cmp-ITA-NonAuthority-Products & Services,ITA,Products & Services,NonAuthority,0.0,...,interpolated,247504,0.0017,0.0967,,,,,,
55799,2025-12-29,2026-w01,2025-12,1,29,ops-cmp-ITA-NonAuthority-Products & Services,ITA,Products & Services,NonAuthority,0.0,...,abs,248320,0.0033,0.1000,1.0,0,0.166667,,,
55949,2025-12-30,2026-w01,2025-12,2,30,ops-cmp-ITA-NonAuthority-Products & Services,ITA,Products & Services,NonAuthority,0.0,...,abs,248320,0.0033,0.1000,2.0,0,0.166667,30.0,0.218477,0.000000


###### Add Forecast Offsets + Holidays:

In [6]:
def offset_df(df):

    # Import forecast offsets and prep them for merging onto forecast df
    df_fc_offsets = fc.import_gsheet_to_df(parameters['sheet_id'], parameters['forecast_offsets_range'])
    fc_offsets = df_fc_offsets['offset_name'].unique().tolist()
    df_fc_offsets['impact_date'] = pd.to_datetime(df_fc_offsets['impact_date'], format='%d/%m/%Y')
    df_fc_offsets['fc_offset'] = pd.to_numeric(df_fc_offsets['fc_offset'], downcast="float")
    df_fc_offsets = df_fc_offsets.pivot(index=['impact_date', 'business_line_alias'], columns='offset_name', values='fc_offset')
    df_fc_offsets = df_fc_offsets.fillna(0.0)
    df_fc_offsets = df_fc_offsets.reset_index()

    # Join offsets to baseline forecast df and initialise column for forecast + offsets
    df = df.merge(df_fc_offsets, how='left', left_on=['forecast_date', 'business_line_alias'], right_on=['impact_date', 'business_line_alias'])
    df['forecast_incl_offsets'] = df['baseline_vol_forecast']

    # loop over all our offsets and add them to the forecast_incl_offsets column (additive)
    for i in range(len(fc_offsets)):
        df[fc_offsets[i]] = df[fc_offsets[i]].fillna(0.0)
        df['forecast_incl_offsets'] = df['forecast_incl_offsets'] + df[fc_offsets[i]]
    df = df.drop(labels='impact_date', axis=1)

    # Import & prep holidays df
    df_holidays = fc.import_gsheet_to_df(parameters['sheet_id'], parameters['holiday_impacts_range'])
    df_holidays['date'] = pd.to_datetime(df_holidays['date'], format='%d/%m/%Y')
    df_holidays['holiday_offset_%'] = df_holidays['holiday_offset_%'].str.rstrip('%').astype('float') / 100.0

    # Join holidays and calculate offset (multiplicative)
    df = df.merge(df_holidays, how='left', left_on=['forecast_date', 'business_line_alias'], right_on=['date', 'business_line_alias'])
    df['holiday_offset_%'] = df['holiday_offset_%'].fillna(0.0)
    df['holiday_vol_offset'] = df['forecast_incl_offsets'] * df['holiday_offset_%']
    df['final_vol_forecast'] = df['forecast_incl_offsets'] + df['holiday_vol_offset']
    
    df['final_vol_forecast'] = np.where(df['final_vol_forecast']<0,0,df['final_vol_forecast'])

    #Calculate upper and lower bound
    df['final_vol_forecast_lower_bound'] = df['final_vol_forecast'] * parameters['forecast_lower_bound']
    df['final_vol_forecast_upper_bound'] = df['final_vol_forecast'] * parameters['forecast_upper_bound']

    # Name the columns we want to keep in the output, discard the rest. Also rename some columns for clarity.
    output_columns = [
    'forecast_date', 'language/market/iban','channel', 'complaint_type',
    'final_vol_forecast', 'final_vol_forecast_upper_bound','final_vol_forecast_lower_bound',
    'iso_calendar_week', 'calendar_month',
    'day_of_week', 'day_of_month', 'business_line_alias', 'MAU',
    'intra_week_distro', 'intra_month_seasonality', 'baseline_vol_forecast',
    'holiday_name', 'holiday_offset_%', 'holiday_vol_offset','weekly_base_vol', 'cumulative_%growth']

    output_columns = output_columns + fc_offsets 
    df = df.drop(labels=df.columns.difference(output_columns), axis=1)
    df = df[output_columns]
    df = df.rename(columns={'MAU':'mau_forecast','channel':'reason'})
    #df.loc[df['channel'] == 'call','channel'] = 'inbound phone'
    
    df[['final_vol_forecast', 'final_vol_forecast_upper_bound','final_vol_forecast_lower_bound']] = df[['final_vol_forecast', 'final_vol_forecast_upper_bound','final_vol_forecast_lower_bound']].fillna(0)
    
    return df

In [7]:
df = offset_df(df=baseline_1)
#df_2 = offset_df(df=baseline_2)

INFO: Importing range: 'offsets!A1:D' from G-sheet with id: '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y'
INFO:     G-Sheets API Credentials: token.json exists
INFO: Importing range: 'holidays!A1:F' from G-sheet with id: '1zB9--uEjG3X0kSrer_LYGhtS-DWisxp7ijCf1s-Ei8Y'
INFO:     G-Sheets API Credentials: token.json exists


### run this function only when you want to see 2 date

def final(df_1=df_1,df_2=df_2):
    
    merging_columns = ['forecast_date','language/market/iban','channel','business_line_alias']
    
    y_columns = ['final_vol_forecast','final_vol_forecast_upper_bound','final_vol_forecast_lower_bound','cumulative_%growth']
    
    df_2 = df_2[merging_columns + y_columns]
    
    for columns in y_columns:
        df_2.rename(columns={columns:columns+'_2'},inplace=True)
    
    merge = pd.merge(df_1,df_2,how='left',on=merging_columns)
    
    merge = merge[['forecast_date', 'language/market/iban', 'channel',
       'final_vol_forecast','final_vol_forecast_2', 'final_vol_forecast_upper_bound',
         'final_vol_forecast_upper_bound_2',
       'final_vol_forecast_lower_bound','final_vol_forecast_lower_bound_2', 'iso_calendar_week', 'calendar_month',
       'day_of_week', 'day_of_month', 'business_line_alias', 'mau_forecast',
       'intra_week_distro', 'intra_month_seasonality', 'baseline_vol_forecast',
       'holiday_day_name', 'holiday_offset_%', 'holiday_vol_offset',
       'weekly_base_vol', 'cumulative_%growth','cumulative_%growth_2', 'de_iban_revalidation',
       'Growth_cap_lift']]
    
    return merge
#df = final()

###### Export Forecast to GSheet:

In [8]:
# export parameters:
gsheet_export_params = dict(
    
    df                 = df,
    gsheet_id          = parameters['sheet_id'],
    gsheet_tab_name    = 'fc_output',
    include_df_headers = True,
    tab_colour         = (0.0, 0.0, 0.0) #RGB tab colour
)

#date column must be turned into strings because datetime type is not JSON serialisable
df['forecast_date'] = df['forecast_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
fc.export_df_to_google_sheet(**gsheet_export_params)

INFO:     G-Sheets API Credentials: token.json exists
INFO: couldn't create new sheet with name 'fc_output', it may already exist.
INFO:     G-Sheets API Credentials: token.json exists
INFO: {1234222} cells updated.


In [9]:
df.head()

Unnamed: 0,forecast_date,language/market/iban,reason,complaint_type,final_vol_forecast,final_vol_forecast_upper_bound,final_vol_forecast_lower_bound,iso_calendar_week,calendar_month,day_of_week,...,mau_forecast,intra_week_distro,intra_month_seasonality,baseline_vol_forecast,holiday_name,holiday_offset_%,holiday_vol_offset,weekly_base_vol,cumulative_%growth,Growth_cap_lift
0,2024-12-23 00:00:00,DEU,AFC & AML,Authority,0.548885,0.590051,0.507718,2024-w52,2024-12,1,...,817944,0.175934,-0.156002,0.596743,Christmas Day -2d,-0.0802,-0.047859,4.0,0.0047,0.0
1,2024-12-24 00:00:00,DEU,AFC & AML,Authority,0.344117,0.369926,0.318308,2024-w52,2024-12,2,...,817944,0.188966,-0.154129,0.642369,Christmas Day -1d,-0.4643,-0.298252,4.0,0.0047,0.0
2,2024-12-25 00:00:00,DEU,AFC & AML,Authority,0.198717,0.213621,0.183814,2024-w52,2024-12,3,...,817944,0.184622,-0.141026,0.637323,Christmas Day,-0.6882,-0.438606,4.0,0.0047,0.0
3,2024-12-26 00:00:00,DEU,AFC & AML,Authority,0.635253,0.682897,0.587609,2024-w52,2024-12,4,...,817944,0.265856,-0.115221,0.945317,St. Stephen's Day,-0.328,-0.310064,4.0,0.0047,0.0
4,2024-12-27 00:00:00,DEU,AFC & AML,Authority,0.660815,0.710376,0.611254,2024-w52,2024-12,5,...,817944,0.184622,-0.075242,0.686133,St. Stephen's Day +1d,-0.0369,-0.025318,4.0,0.0047,0.0
