In [1]:
import numpy as np
import pandas as pd

In [2]:
lido = (pd
        .read_csv('assets/lido_transactions.csv',
                  parse_dates=['CreateDate'],
                  date_format='%Y-%m-%d %H:%M:%S',
                  dtype={'Store': 'category', 'Gross': 'float64', 'HeadCount': 'int64'},
                  usecols=['CreateDate', 'Gross', 'Store', 'DeleteById', 'HeadCount', 'Tags'])
        .where(lambda x: x['DeleteById'].isna())
        .dropna(how='all')
        .drop(columns=['DeleteById']))

yanzi = (pd
         .read_csv('assets/yanzi_transactions.csv',
                   parse_dates=['CreateDate'],
                   date_format='%Y-%m-%d %H:%M:%S',
                   dtype={'Store': 'category', 'Gross': 'float64', 'HeadCount': 'int64'},
                   usecols=['CreateDate', 'Gross', 'Store', 'DeleteById', 'HeadCount', 'Tags'])
         .where(lambda x: x['DeleteById'].isna())
         .dropna(how='all')
         .drop(columns=['DeleteById']))

transactions = pd.concat([lido, yanzi], ignore_index=True)
transactions.head()

Unnamed: 0,HeadCount,Gross,Tags,Store,CreateDate
0,1.0,255.0,#DineIn,Erod,2022-03-23 08:54:52
1,1.0,255.0,#DineIn,Erod,2022-03-23 09:00:02
2,3.0,1005.0,#DineIn#SeniorDiscount#VatExempt,Erod,2022-03-23 09:01:29
3,1.0,515.0,TakeOut#Grab,Erod,2022-03-23 09:39:35
4,1.0,1547.0,Delivery#FoodPanda,Erod,2022-03-23 09:44:01


In [3]:
start = pd.Timestamp('2025-03-01').date()
end = pd.Timestamp('2025-03-31').date()
transactions = transactions.loc[transactions['CreateDate'].dt.date.between(start, end)]
transactions

Unnamed: 0,HeadCount,Gross,Tags,Store,CreateDate
235035,1.0,1194.0,#TakeOut#Grab,Cainta,2025-03-01 08:00:20
235036,7.0,1895.0,#DineIn#SeniorDiscount#VatExempt,Cainta,2025-03-01 08:05:47
235037,3.0,770.0,#DineIn#Pwd#VatExempt#SeniorDiscount,Cainta,2025-03-01 08:18:49
235038,1.0,435.0,#TakeOut#Grab,Cainta,2025-03-01 08:27:39
235039,1.0,1338.0,#TakeOut#Grab,Cainta,2025-03-01 08:38:33
...,...,...,...,...,...
340977,3.0,897.0,#DineIn#SeniorDiscount#VatExempt,Marikina,2025-03-31 20:26:05
340978,5.0,2228.0,#DineIn,Marikina,2025-03-31 20:41:54
340979,1.0,345.0,#DineIn#SeniorDiscount#VatExempt,Marikina,2025-03-31 20:45:53
340980,1.0,565.0,#TakeOut#Grab,Marikina,2025-03-31 21:51:34


# Compute gross total stats

In [4]:
totals = transactions.groupby([transactions['CreateDate'].dt.date, 'Store'], observed=False)
totals = totals.agg({
    'Gross': ['sum', 'count', 'mean'],
    'HeadCount': 'sum'
})
totals.columns = ['_'.join(c).strip() for c in totals.columns]
totals = totals.rename(columns={
    'Gross_sum': 'TotalGross',
    'HeadCount_sum': 'TotalHeadCount',
    'Gross_count': 'TotalTransactionCount',
    'Gross_mean': 'TotalAverageCheque'
})

# Compute stats for each category

In [5]:
def get_aggregated_data(cond, col_name) -> pd.DataFrame:
    filtered = transactions.where(cond)

    group = filtered.groupby([filtered['CreateDate'].dt.date, 'Store'], observed=False)
    aggregated = group.agg({
        'Gross': ['sum', 'count', 'mean'],
        'HeadCount': 'sum'
    })

    aggregated.columns = ['_'.join(c).strip() for c in aggregated.columns]

    aggregated = aggregated.rename(columns={
        'Gross_sum': f'{col_name}Gross',
        'HeadCount_sum': f'{col_name}HeadCount',
        'Gross_count': f'{col_name}TransactionCount',
        'Gross_mean': f'{col_name}AverageCheque'
    })

    return aggregated[[f'{col_name}Gross', f'{col_name}HeadCount', f'{col_name}TransactionCount', f'{col_name}AverageCheque']].fillna(0).round(5)

In [7]:
final = totals

# compute stats for each transaction type and third party vendor
transaction_types = ['DineIn', 'TakeOut', 'Function', 'Institutional', 'Delivery']
third_parties = ['FoodPanda', 'Ons', 'Grab']

for tag in transaction_types + third_parties:
    agg = get_aggregated_data(lambda x: x['Tags'].str.contains(tag, na=False), tag)
    final = final.join(agg, on=['CreateDate', 'Store'], how='left')


# compute stats for each Day Part

conditions = {
    'Breakfast': transactions['CreateDate'].dt.hour.between(7, 10),
    'Lunch': transactions['CreateDate'].dt.hour.between(11, 14),
    'Merienda': transactions['CreateDate'].dt.hour.between(15, 17),
    'Dinner': transactions['CreateDate'].dt.hour.between(18, 24)
}

for k in conditions.keys():
    agg = get_aggregated_data(conditions[k], k)
    final = final.join(agg, on=['CreateDate', 'Store'], how='left')

# Cleaning up the output

In [None]:
columns = [
    'TotalGross', 'TotalHeadCount', 'TotalTransactionCount', 'TotalAverageCheque',
    'DineInGross', 'DineInHeadCount', 'DineInTransactionCount', 'DineInAverageCheque',
    'TakeOutGross', 'TakeOutHeadCount', 'TakeOutTransactionCount', 'TakeOutAverageCheque',
    'FunctionGross', 'FunctionHeadCount', 'FunctionTransactionCount', 'FunctionAverageCheque',
    'InstitutionalGross', 'InstitutionalHeadCount', 'InstitutionalTransactionCount', 'InstitutionalAverageCheque',
    'DeliveryGross', 'DeliveryHeadCount', 'DeliveryTransactionCount', 'DeliveryAverageCheque',
    'BreakfastGross', 'BreakfastHeadCount', 'BreakfastTransactionCount', 'BreakfastAverageCheque',
    'LunchGross', 'LunchHeadCount', 'LunchTransactionCount', 'LunchAverageCheque',
    'DinnerGross', 'DinnerHeadCount', 'DinnerTransactionCount', 'DinnerAverageCheque',
    'MeriendaGross', 'MeriendaHeadCount', 'MeriendaTransactionCount', 'MeriendaAverageCheque',
    'FoodPandaGross', 'FoodPandaHeadCount', 'FoodPandaTransactionCount', 'FoodPandaAverageCheque',
    'OnsGross', 'OnsHeadCount', 'OnsTransactionCount', 'OnsAverageCheque',
    'GrabGross', 'GrabHeadCount', 'GrabTransactionCount', 'GrabAverageCheque']

final = final[columns]

final.fillna(0).round(5).to_csv(f"outputs/daily_sales_report-{start}-{end}.csv")