In [1]:
import os
import sys
import numpy as np
import pandas as pd
import warnings
from datetime import date, datetime
import statsmodels.api as sm
from IPython.core.display import display, HTML
import snowflake.connector 
import matplotlib.pyplot as plt
import json
from scipy import stats

warnings.filterwarnings('ignore')
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', 550)
pd.set_option('display.max_columns', 550)
import snowflake.connector
cnx = snowflake.connector.connect(
    user='nmittleman@toasttab.com',
    account='toast.us-east-1',
    authenticator='externalbrowser'
    )

%matplotlib inline

def QueryDWH(query, cnx, col=[]):
    cur = cnx.cursor()
    cur.execute(query)
    # If there is no col input, then get the columns from the return of the
    # snowflake query
    if not col:
        df = pd.DataFrame(cur.fetchall())
        if not df.empty:
            df.columns = [desc[0] for desc in cur.description]
    else:
        df = pd.DataFrame(cur.fetchall(), columns=col)

    return df
QueryDWH("""USE DATABASE TOAST""", cnx)
QueryDWH("""USE WAREHOUSE TOAST_WH""", cnx)

# 25th Percentile
def q25(x):
    return x.quantile(0.25)

# 50th Percentile
def q50(x):
    return x.quantile(0.5)

# 90th Percentile
def q75(x):
    return x.quantile(0.75)

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  warn_incompatible_dep(


# Snowflake Queries

## First Query Gets Customers details from the customer table <br>


In [2]:
query = """
        WITH max_dt AS 
        (SELECT customer_id, MAX(dt) AS dt
        FROM payments.daily_customer_details
        GROUP BY 1)
        SELECT a.customer_id
             , a.region
             , a.restaurant_type
             , a.restaurant_service_model
             , a.pos_first_order_date
             , DATE_TRUNC('Quarter', a.pos_first_order_date)
        FROM payments.daily_customer_details a
        INNER JOIN max_dt b
        ON a.customer_id = b.customer_id
        AND a.dt = b.dt;
        """

cust = QueryDWH(query, cnx, ['Customer Id', 'Region', 'Type', 'Service Model', 'FOD', 'FOQ'])
cust['FOD'] = pd.to_datetime(cust['FOD']).dt.date
cust['FOQ'] = pd.to_datetime(cust['FOQ']).dt.date

## Second Query gets the modules that a customer has for each quarter


In [3]:
query = """
        WITH cust_module AS
            (SELECT  customer_id
                   , DATE_TRUNC('Quarter', dt) AS quarter
                   , SUM(IFF(product_name IN ('Software Monthly Subscription',
                                              'Additional Tablet Monthly Software Subscription',
                                              'Caller ID Monthly Subscription'), saas_quantity, 0)) AS core_count
                   , SUM(IFF(product_name IN ('Enterprise Data Management Monthly Subscription',
                                              'Enterprise Software Monthly Subscription'), saas_quantity, 0)) AS ent_count 
                   , SUM(IFF(product_name IN ('Handheld Monthly Software Subscription'), saas_quantity, 0)) AS hh_count
                   , SUM(IFF(product_name IN ('Online Ordering Monthly Subscription'), saas_quantity, 0)) AS oo_count
                   , SUM(IFF(product_name IN ('Gift Card Program Monthly Subscription'), saas_quantity, 0)) AS gc_count
                   , SUM(IFF(product_name IN ('API Monthly Subscription'), saas_quantity, 0)) AS api_count
                   , SUM(IFF(product_name IN ('Loyalty Program Monthly Subscription'), saas_quantity, 0)) AS loy_count
                   , SUM(IFF(product_name IN ('Marketing Monthly Subscription - 1st Location', 'Marketing Monthly Subscription - Additional Location', 'CRM Monthly Subscription'), saas_quantity, 0)) AS marketing_count
                   , SUM(IFF(product_name IN ('Toast Order & Pay', 'Toast Mobile Order & Pay'), saas_quantity, 0)) AS opt_count
                   , SUM(IFF(product_name IN ('Kitchen Display Screen Monthly Subscription'), saas_quantity, 0)) AS kds_count
                   , SUM(IFF(product_name IN ('Kiosk Monthly Subscription'), saas_quantity, 0)) AS kiosk_count
                   , SUM(IFF(product_name IN ('Inventory Monthly Subscription'), saas_quantity, 0)) AS inv_count
               , SUM(IFF(product_name IN ('xtraCHEF Pro' , 'Invoice Automation', 'Recipe Costing', 'Inventory Management'), saas_quantity, 0)) AS xc_count
               , SUM(IFF(product_name IN ('DoorDash Integration', 'GrubHub Integration', 'UberEats Integration') AND dt >= '2021-04-01', saas_quantity, 0)) AS three_pd_count
               
               ,  IFF(ent_count > 0, 1, 0) + IFF(hh_count > 0, 1, 0) + IFF(oo_count > 0, 1, 0) + IFF(gc_count > 0, 1, 0) 
                + IFF(api_count > 0, 1, 0) + IFF(loy_count > 0, 1, 0) + IFF(marketing_count > 0, 1, 0) + IFF(kds_count > 0, 1, 0) 
                + IFF(opt_count > 0, 1, 0) + IFF(kiosk_count > 0, 1, 0) + IFF(inv_count > 0, 1, 0) + IFF(three_pd_count > 0, 1, 0)
                + IFF(xc_count > 0, 1, 0) AS module_count_live_today
              FROM analytics_core_arr.saas_daily_status
              WHERE LAST_DAY(dt, 'Quarter') = dt
              AND dt >= '2018-01-01'
              AND pos_status = 'Live'
              AND saas_status = 'Live'
              GROUP BY 1,2), tds AS 
              (SELECT DATE_TRUNC('Quarter', dt) AS quarter
                    , customer_id
                    , SUM(TDS_TRANSACTIONS) AS tds_tx
                    , IFF(tds_tx > 0, 1, 0) AS tds_count
                 FROM payments.daily_customer_details 
                 WHERE LAST_DAY(dt, 'Quarter') = LAST_DAY(dt, 'Month')
                 GROUP BY 1,2
               ), ec AS 

              (        SELECT DISTINCT
                        DATE_TRUNC('Quarter', p.check_date) AS quarter
                      , t.customer_id
                      , 1 AS ec
                    FROM source_estratex.payroll_earning_historical_entity_current e
                    LEFT JOIN source_estratex.payroll_historical_entity_current p
                        ON e.estratex_payroll_uuid = p.estratex_payroll_uuid
                    LEFT JOIN source_estratex.location_entity_current l
                        ON e.estratex_location_uuid = l.estratex_location_uuid
                    INNER JOIN analytics_core.toastorders_customer_bridge t
                    ON l.restaurant_guid = t.toastorders_restaurant_guid
                    WHERE DATE_TRUNC(month, p.check_date) >= '2018-01-01'),

              live_count AS
            (SELECT  DATE_TRUNC('Quarter', dt) AS quarter
                   , customer_id
                   , 1 AS live_locs
              FROM payments.daily_customer_details
              WHERE LAST_DAY(dt, 'Quarter') = dt
              AND dt >= '2018-01-01'
              AND pos_status = 'Live')
            (SELECT c.customer_id
                 , c.quarter
                 , DATE_TRUNC('Quarter', DATEADD('Days', -1, c.quarter))
                 , ZEROIFNULL(b.tds_count) AS  tds_count
                 , a.core_count
                 , a.ent_count 
                 , a.hh_count
                 , a.oo_count
                 , a.gc_count
                 , a.api_count
                 , a.loy_count
                 , a.marketing_count
                 , a.opt_count
                 , a.kds_count
                 , a.kiosk_count
                 , a.inv_count
                 , a.three_pd_count
                 , a.xc_count
                 , ZEROIFNULL(d.ec) AS ec_count
                 , a.module_count_live_today + ZEROIFNULL(b.tds_count) + ZEROIFNULL(d.ec) AS unique_mod
            FROM live_count c
            LEFT JOIN cust_module a
            ON a.quarter = c.quarter
            AND a.customer_id = c.customer_id
            LEFT JOIN tds b
            ON a.quarter = b.quarter
            AND a.customer_id = b.customer_id
            LEFT JOIN ec d
            ON a.quarter = d.quarter
            AND a.customer_id = d.customer_id)
        """

modCounts = QueryDWH(query, cnx, ['Customer Id', 'Quarter', 'Prev Quarter', 'TDS','Core', 'Ent', 'HH', 'OO', 'GC', 'API', 'Loy', 'Mkt', 'OPT', 'KDS', 'Kiosk', 'Inv', '3PD', 'XC', 'EC', 'Mod Count'])

for col in ['TDS','Core', 'Ent', 'HH', 'OO', 'GC', 'API', 'Loy', 'Mkt', 'OPT', 'KDS', 'Kiosk', 'Inv', '3PD', 'XC', 'EC', 'Mod Count']:
    modCounts[col] = modCounts[col].astype(float)



In [4]:
# Set data columns to date type
# Create a 'Next Year' column - this is actually 15 months from the quarter start, because what we want 
# is the end of next year. for example. for Q1 - 2021, the Quarter date is 1/1/2021. and one year from the end
# of Q1 - 2021 is the start of Q2 - 2022. hence 15 months
modCounts['Quarter'] = pd.to_datetime(modCounts['Quarter']).dt.date
modCounts['Prev Quarter'] = pd.to_datetime(modCounts['Prev Quarter']).dt.date
modCounts['Next Year'] = modCounts['Quarter'] + pd.DateOffset(months=15)
modCounts['Next Year'] = pd.to_datetime(modCounts['Next Year']).dt.date

In [5]:
# Create a dataframe that has the modules for each quarter as well as the modules at the start of 
# the one year prior
modCountsLastYear = modCounts.copy()
for col in ['TDS','Core', 'Ent', 'HH', 'OO', 'GC', 'API', 'Loy', 'Mkt', 'OPT', 'KDS', 'Kiosk', 'Inv', '3PD', 'XC', 'EC', 'Mod Count', 'Loans']:
    modCountsLastYear.rename(columns = {col: f'{col} Last Year'}, inplace=True)
    
yoyModCounts = modCounts[['Customer Id', 'Quarter', 'Prev Quarter']].\
                    merge(modCounts.drop(['Prev Quarter', 'Next Year', 'Next Quarter'], axis=1, errors='ignore').rename(columns = {'Quarter': 'Quarter'}) ).\
                    merge(modCountsLastYear.drop(['Prev Quarter', 'Quarter', 'Next Quarter'], axis=1, errors='ignore').rename(columns = {'Next Year': 'Quarter'} ))
yoyModCounts['Mod Count Change'] = yoyModCounts['Mod Count'] - yoyModCounts['Mod Count Last Year']

## Third Query gets the GMV and GMV per Day for each customer for each quarter

In [6]:
query = """
            SELECT CUSTOMER_ID
                , DATE_TRUNC('Quarter', dt) AS quarter
                , SUM(gmv) AS gmv
                , COUNT(DISTINCT IFF(gmv > 0, dt, NULL)) AS active_days               
                , MIN(IFF(gmv > 0, gmv, NULL)) AS min_gmv_per_day
                , APPROX_PERCENTILE(IFF(gmv > 0, gmv, NULL), 0.25 ) AS gmv_25th
                , MEDIAN(IFF(gmv > 0, gmv, NULL)) AS median_gmv_per_day            
                , APPROX_PERCENTILE(IFF(gmv > 0, gmv, NULL), 0.75 ) AS gmv_75th
                , MAX(IFF(gmv > 0, gmv, NULL)) AS max_gmv_per_day
            FROM PAYMENTS.daily_customer_details
            WHERE pos_status = 'Live'
            AND pos_first_order_date < DATE_TRUNC('Quarter' ,dt)
            GROUP BY 1, 2;
        """
quarterGMV = QueryDWH(query, cnx, ['Customer Id', 'Quarter', 'GMV', 'Active Days', 'Min GMV PD', '25th Pct GMV PD', 'Med GMV PD', '75th Pct GMV PD', 'Max GMV PD'])
for col in ['GMV', 'Active Days', 'Min GMV PD', '25th Pct GMV PD', 'Med GMV PD', '75th Pct GMV PD', 'Max GMV PD']:
    quarterGMV[col] = quarterGMV[col].astype(float)
    

# Here we actually want to use 12 months from the quarter because we are comparing all GMV in one quarter
# to all GMV in the same quarter the following year
quarterGMV['Quarter'] = pd.to_datetime(quarterGMV['Quarter']).dt.date
quarterGMV['Next Year'] = quarterGMV['Quarter'] + pd.DateOffset(months=12)
quarterGMV['Next Year'] = pd.to_datetime(quarterGMV['Next Year']).dt.date

# Combine GMV, Module, and Customer Data


In [7]:
# Join the quarter GMV to itself to get the GMV from Prev Year
yoyGMV = quarterGMV.merge(quarterGMV[['Customer Id', 'Next Year', 'GMV', 'Active Days',  'Med GMV PD']].\
                          rename(columns = {'Next Year': 'Quarter', 
                                            'GMV': 'GMV Last Year', 
                                            'Active Days': 'Active Days Last Year',
                                            'Med GMV PD': 'Med GMV PD Last Year'}) ).\
drop(['Next Year', 'Min GMV PD', '25th Pct GMV PD', '75th Pct GMV PD', 'Max GMV PD'], axis=1, errors='ignore')

# We define the GMV per Day as the average GMV on a given day
yoyGMV['GMV Per Day'] = yoyGMV['GMV'] / yoyGMV['Active Days']
yoyGMV['GMV Per Day Last Year'] = yoyGMV['GMV Last Year'] / yoyGMV['Active Days Last Year']

# We define growth rate in two ways
# Here GMV Growth is how much total GMV grew Year over Year
yoyGMV['GMV Growth'] = yoyGMV['GMV'] - yoyGMV['GMV Last Year']
yoyGMV['GMV Growth Percent'] = np.round(100 * yoyGMV['GMV Growth'] / yoyGMV['GMV Last Year'], 2)

# Here GMV Per Day Growth is how much average GMV Per Day grew
yoyGMV['GMV Per Day Growth'] = yoyGMV['GMV Per Day'] - yoyGMV['GMV Per Day Last Year']
yoyGMV['GMV Per Day Growth Percent'] = np.round(100 * yoyGMV['GMV Per Day Growth'] / yoyGMV['GMV Per Day Last Year'], 2)

# Here Median GMV Per Day Growth is how much average GMV Per Day grew
yoyGMV['Med GMV Per Day Growth'] = yoyGMV['Med GMV PD'] - yoyGMV['Med GMV PD Last Year']
yoyGMV['Med GMV Per Day Growth Percent'] = np.round(100 * yoyGMV['Med GMV Per Day Growth'] / yoyGMV['Med GMV PD Last Year'], 2)


In [8]:
# Join the GMV and Module information to create a dataset that has Year over Year GMV growth and the modules that 
# each customer has in the current quarter and quarter one year previous
yoyCompare = yoyGMV.merge(yoyModCounts)
yoyCompare['Guest Count'] = yoyCompare[['OO', 'GC', 'Loy', 'Mkt']].sum(axis=1)
yoyCompare['Guest Count Last Year'] = yoyCompare[['OO Last Year', 'GC Last Year', 'Loy Last Year', 'Mkt Last Year']].sum(axis=1)
for mod in ['TDS','Core', 'Ent', 'HH', 'OO', 'GC', 'API', 'Loy', 'Mkt', 'OPT', 'KDS', 'Kiosk', 'Inv', '3PD', 'XC', 'EC', 'Mod Count', 'Guest Count']:
    yoyCompare[f'{mod} Change'] = (yoyCompare[mod] - yoyCompare[f'{mod} Last Year']).fillna(0)


# Get the Year over Year Growth for Customers that ADDED modules

In [9]:
for mod in ['TDS','Core', 'Ent', 'HH', 'OO', 'GC', 'API', 'Loy', 'Mkt', 'OPT', 'KDS', 'Kiosk', 'Inv', '3PD', 'XC', 'EC', 'Mod Count', 'Guest Count']:

    comp = yoyCompare[yoyCompare['GMV Per Day Growth Percent'] > -50]
    comp = comp[comp['GMV Per Day Growth Percent'] < 200]
    
    comp[f'Mod Groups'] = 'No Change'
    comp[f'Mod Groups'] = np.where(comp[f'{mod} Change'] > 0, 'Added', comp[f'Mod Groups'])
    comp[f'Mod Groups'] = np.where(comp[f'{mod} Change'] <= 0, 'Did Not Add', comp[f'Mod Groups'])


    results = pd.DataFrame()
    for quarter in comp.sort_values('Quarter')['Quarter'].unique().tolist():
        tmp = comp[comp['Quarter'] == quarter]

        agg = tmp.groupby(['Mod Groups']).agg({'Customer Id': 'nunique','GMV Per Day Growth Percent': q50, 'GMV': 'sum', 'GMV Last Year': 'sum'})
        agg['Avg'] = np.round(100 * (agg['GMV'] - agg['GMV Last Year']) / agg['GMV Last Year'], 2)
        if len(agg) == 2:
            pVal = stats.ttest_ind(tmp[tmp['Mod Groups'] == 'Added']['GMV Per Day Growth Percent'].to_numpy(), tmp[tmp['Mod Groups'] == 'Did Not Add']['GMV Per Day Growth Percent'].to_numpy())[1]
            pVal = np.round(pVal, 4)
            tmpResults = pd.DataFrame({'# Customers Added Modules': [agg.loc['Added']['Customer Id']],
                                       '# Customers Did Not Add Modules': [agg.loc['Did Not Add']['Customer Id']],
                                       'Added - Typical Growth ': [agg.loc['Added']['GMV Per Day Growth Percent']],
                                       'Did Not Add - Typical Growth': [agg.loc['Did Not Add']['GMV Per Day Growth Percent']],
                                       'Typical Growth Difference': [agg.loc['Added']['GMV Per Day Growth Percent'] - agg.loc['Did Not Add']['GMV Per Day Growth Percent']],
                                       'Growth Rate Difference': [np.round(100 * (agg.loc['Added']['GMV Per Day Growth Percent'] - agg.loc['Did Not Add']['GMV Per Day Growth Percent']) / (agg.loc['Did Not Add']['GMV Per Day Growth Percent']), 2)],

                                       'pVal': [pVal]}, index=[quarter])
            results = pd.concat([results, tmpResults])
    print(mod)
    display(results)
    results.to_csv(f'./Spark 2022 Files/{mod} Added.csv')

TDS


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2020-01-01,1.0,11814.0,-15.26,-6.44,-8.82,136.96,
2020-04-01,239.0,8250.0,-30.27,-20.19,-10.08,49.93,0.0
2020-07-01,591.0,13016.0,-16.91,-7.53,-9.38,124.57,0.0
2020-10-01,990.0,14891.0,-13.49,-7.75,-5.74,74.06,0.0
2021-01-01,1652.0,17727.0,2.5,6.13,-3.63,-59.22,0.0004
2021-04-01,1665.0,18708.0,63.82,56.1,7.72,13.76,0.0
2021-07-01,1491.0,24486.0,36.45,29.78,6.67,22.4,0.0001
2021-10-01,1527.0,28032.0,36.15,32.525,3.625,11.15,0.0174
2022-01-01,1336.0,30985.0,22.095,19.59,2.505,12.79,0.0178
2022-04-01,1023.0,35330.0,11.16,7.805,3.355,42.99,0.0


Core


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,462.0,6221.0,5.755,2.03,3.725,183.5,0.0
2019-07-01,355.0,8190.0,6.54,1.4,5.14,367.14,0.0
2019-10-01,382.0,9773.0,7.3,2.21,5.09,230.32,0.0
2020-01-01,481.0,11334.0,-3.27,-6.54,3.27,-50.0,0.0
2020-04-01,439.0,8050.0,-18.91,-20.535,1.625,-7.91,0.0482
2020-07-01,627.0,12980.0,-0.05,-8.36,8.31,-99.4,0.0
2020-10-01,733.0,15148.0,-1.46,-8.56,7.1,-82.94,0.0
2021-01-01,900.0,18479.0,17.735,5.24,12.495,238.45,0.0
2021-04-01,869.0,19504.0,69.53,56.135,13.395,23.86,0.0
2021-07-01,1394.0,24583.0,39.25,29.68,9.57,32.24,0.0


Ent


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,49.0,6634.0,5.79,2.26,3.53,156.19,0.1962
2019-07-01,49.0,8496.0,4.18,1.635,2.545,155.66,0.9966
2019-10-01,79.0,10076.0,2.45,2.37,0.08,3.38,0.8584
2020-01-01,107.0,11708.0,-7.07,-6.42,-0.65,10.12,0.2426
2020-04-01,130.0,8359.0,-22.975,-20.39,-2.585,12.68,0.2114
2020-07-01,201.0,13406.0,-10.15,-8.0,-2.15,26.88,0.4128
2020-10-01,243.0,15638.0,-3.93,-8.295,4.365,-52.62,0.0954
2021-01-01,278.0,19101.0,9.98,5.64,4.34,76.95,0.6934
2021-04-01,188.0,20185.0,57.66,56.85,0.81,1.42,0.5032
2021-07-01,247.0,25730.0,28.41,30.18,-1.77,-5.86,0.6326


HH


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,421.0,6262.0,6.09,2.03,4.06,200.0,0.0
2019-07-01,630.0,7915.0,5.34,1.33,4.01,301.5,0.0
2019-10-01,748.0,9407.0,6.335,2.02,4.315,213.61,0.0
2020-01-01,925.0,10890.0,-5.22,-6.52,1.3,-19.94,0.0
2020-04-01,857.0,7632.0,-18.91,-20.59,1.68,-8.16,0.0911
2020-07-01,1381.0,12226.0,-4.29,-8.39,4.1,-48.87,0.0
2020-10-01,1586.0,14295.0,-4.945,-8.53,3.585,-42.03,0.0
2021-01-01,2195.0,17184.0,12.15,4.88,7.27,148.98,0.0
2021-04-01,2504.0,17869.0,67.03,55.54,11.49,20.69,0.0
2021-07-01,3792.0,22185.0,37.93,28.93,9.0,31.11,0.0


OO


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,273.0,6410.0,0.79,2.425,-1.635,-67.42,0.4046
2019-07-01,309.0,8236.0,2.34,1.625,0.715,44.0,0.3292
2019-10-01,393.0,9762.0,4.05,2.295,1.755,76.47,0.0031
2020-01-01,1180.0,10635.0,-6.825,-6.39,-0.435,6.81,0.7122
2020-04-01,1299.0,7190.0,-25.95,-19.38,-6.57,33.9,0.0
2020-07-01,2392.0,11215.0,-12.705,-6.95,-5.755,82.81,0.0
2020-10-01,3154.0,12727.0,-11.845,-7.06,-4.785,67.78,0.0
2021-01-01,3909.0,15470.0,2.1,6.81,-4.71,-69.16,0.0
2021-04-01,2399.0,17974.0,72.11,55.135,16.975,30.79,0.0
2021-07-01,1441.0,24536.0,37.1,29.89,7.21,24.12,0.0


GC


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,182.0,6501.0,5.22,2.17,3.05,140.55,0.0001
2019-07-01,258.0,8287.0,5.12,1.55,3.57,230.32,0.0
2019-10-01,390.0,9765.0,4.965,2.24,2.725,121.65,0.0
2020-01-01,408.0,11407.0,-4.24,-6.5,2.26,-34.77,0.0
2020-04-01,333.0,8156.0,-15.38,-20.6,5.22,-25.34,0.0
2020-07-01,526.0,13081.0,-3.52,-8.18,4.66,-56.97,0.0008
2020-10-01,827.0,15054.0,-6.55,-8.29,1.74,-20.99,0.0007
2021-01-01,828.0,18551.0,7.985,5.63,2.355,41.83,0.0017
2021-04-01,806.0,19567.0,57.76,56.84,0.92,1.62,0.8288
2021-07-01,1014.0,24963.0,29.295,30.21,-0.915,-3.03,0.9924


API


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,637.0,6046.0,4.01,2.095,1.915,91.41,0.0218
2019-07-01,746.0,7799.0,2.38,1.53,0.85,55.56,0.1632
2019-10-01,847.0,9308.0,2.58,2.33,0.25,10.73,0.0698
2020-01-01,1092.0,10723.0,-6.665,-6.4,-0.265,4.14,0.5544
2020-04-01,647.0,7842.0,-22.56,-20.31,-2.25,11.08,0.8835
2020-07-01,967.0,12640.0,-7.97,-8.04,0.07,-0.87,0.063
2020-10-01,1099.0,14782.0,-7.2,-8.275,1.075,-12.99,0.0024
2021-01-01,1263.0,18116.0,10.38,5.47,4.91,89.76,0.0
2021-04-01,1192.0,19181.0,60.715,56.69,4.025,7.1,0.2048
2021-07-01,1628.0,24349.0,33.37,29.97,3.4,11.34,0.0908


Loy


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,134.0,6549.0,5.37,2.23,3.14,140.81,0.012
2019-07-01,199.0,8346.0,3.72,1.625,2.095,128.92,0.1478
2019-10-01,277.0,9878.0,4.57,2.32,2.25,96.98,0.0136
2020-01-01,332.0,11483.0,-5.74,-6.46,0.72,-11.15,0.9947
2020-04-01,252.0,8237.0,-24.175,-20.34,-3.835,18.85,0.5154
2020-07-01,416.0,13191.0,-11.515,-7.93,-3.585,45.21,0.3827
2020-10-01,588.0,15293.0,-9.64,-8.12,-1.52,18.72,0.6225
2021-01-01,709.0,18670.0,4.72,5.805,-1.085,-18.69,0.2418
2021-04-01,754.0,19619.0,64.415,56.55,7.865,13.91,0.0002
2021-07-01,914.0,25063.0,34.525,30.0,4.525,15.08,0.0721


Mkt


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-10-01,3.0,10152.0,25.93,2.37,23.56,994.09,0.1524
2020-01-01,307.0,11508.0,-7.46,-6.42,-1.04,16.2,0.649
2020-04-01,345.0,8144.0,-19.01,-20.485,1.475,-7.2,0.2417
2020-07-01,688.0,12919.0,-10.27,-7.85,-2.42,30.83,0.6835
2020-10-01,1116.0,14765.0,-9.025,-8.12,-0.905,11.15,0.7143
2021-01-01,1666.0,17713.0,5.88,5.72,0.16,2.8,0.5374
2021-04-01,1512.0,18861.0,61.83,56.32,5.51,9.78,0.0005
2021-07-01,1818.0,24159.0,33.55,29.91,3.64,12.17,0.0775
2021-10-01,861.0,28698.0,32.12,32.815,-0.695,-2.12,0.6402
2022-01-01,791.0,31530.0,18.7,19.7,-1.0,-5.08,0.4265


OPT


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2020-07-01,221.0,13386.0,-19.23,-7.79,-11.44,146.85,0.0
2020-10-01,379.0,15502.0,-16.33,-7.925,-8.405,106.06,0.0
2021-01-01,679.0,18700.0,-4.14,6.16,-10.3,-167.21,0.0
2021-04-01,919.0,19454.0,79.24,55.88,23.36,41.8,0.0
2021-07-01,1464.0,24513.0,41.745,29.53,12.215,41.36,0.0
2021-10-01,1360.0,28199.0,41.91,32.34,9.57,29.59,0.0
2022-01-01,1296.0,31025.0,24.49,19.5,4.99,25.59,0.0
2022-04-01,1303.0,35050.0,9.2,7.84,1.36,17.35,0.0
2022-07-01,1009.0,39336.0,6.57,4.35,2.22,51.03,0.0


KDS


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,189.0,6494.0,5.57,2.185,3.385,154.92,0.0065
2019-07-01,230.0,8315.0,5.035,1.53,3.505,229.08,0.0004
2019-10-01,272.0,9883.0,6.72,2.26,4.46,197.35,0.0
2020-01-01,337.0,11478.0,-4.91,-6.46,1.55,-23.99,0.0123
2020-04-01,357.0,8132.0,-20.84,-20.415,-0.425,2.08,0.6568
2020-07-01,596.0,13011.0,-6.43,-8.13,1.7,-20.91,0.0235
2020-10-01,722.0,15159.0,-2.01,-8.46,6.45,-76.24,0.0
2021-01-01,938.0,18441.0,13.625,5.31,8.315,156.59,0.0
2021-04-01,1107.0,19266.0,62.61,56.555,6.055,10.71,0.0001
2021-07-01,1534.0,24443.0,35.45,29.85,5.6,18.76,0.0004


Kiosk


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,62.0,6621.0,-3.745,2.38,-6.125,-257.35,0.0277
2019-07-01,90.0,8455.0,1.19,1.66,-0.47,-28.31,0.1765
2019-10-01,103.0,10052.0,2.93,2.365,0.565,23.89,0.8923
2020-01-01,99.0,11716.0,-6.78,-6.44,-0.34,5.28,0.0309
2020-04-01,80.0,8409.0,-20.47,-20.43,-0.04,0.2,0.1632
2020-07-01,116.0,13491.0,-9.265,-8.01,-1.255,15.67,0.28
2020-10-01,142.0,15739.0,-9.49,-8.17,-1.32,16.16,0.4005
2021-01-01,154.0,19225.0,8.045,5.72,2.325,40.65,0.4489
2021-04-01,175.0,20198.0,42.64,56.97,-14.33,-25.15,0.016
2021-07-01,221.0,25756.0,34.96,30.16,4.8,15.92,0.1995


Inv


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,46.0,6637.0,6.985,2.27,4.715,207.71,0.4553
2019-07-01,57.0,8488.0,7.04,1.61,5.43,337.27,0.1195
2019-10-01,86.0,10069.0,6.39,2.33,4.06,174.25,0.0055
2020-01-01,104.0,11711.0,-4.69,-6.46,1.77,-27.4,0.1904
2020-04-01,72.0,8417.0,-17.72,-20.43,2.71,-13.26,0.729
2020-07-01,127.0,13480.0,-7.62,-8.04,0.42,-5.22,0.0049
2020-10-01,140.0,15741.0,-5.795,-8.22,2.425,-29.5,0.3246
2021-01-01,173.0,19206.0,11.44,5.67,5.77,101.76,0.0049
2021-04-01,165.0,20208.0,78.24,56.7,21.54,37.99,0.0
2021-07-01,172.0,25805.0,33.56,30.16,3.4,11.27,0.1728


3PD


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2021-04-01,1056.0,19317.0,57.14,56.85,0.29,0.51,0.9627
2021-07-01,2317.0,23660.0,33.32,29.87,3.45,11.55,0.4552
2021-10-01,4665.0,24894.0,35.61,32.27,3.34,10.35,0.0068
2022-01-01,6005.0,26316.0,22.69,18.99,3.7,19.48,0.0
2022-04-01,7171.0,29182.0,10.04,7.36,2.68,36.41,0.0
2022-07-01,7521.0,32824.0,4.99,4.29,0.7,16.32,0.0232


XC


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2021-10-01,50.0,29509.0,39.835,32.79,7.045,21.49,0.1549
2022-01-01,246.0,32075.0,20.02,19.67,0.35,1.78,0.6433
2022-04-01,496.0,35857.0,10.19,7.87,2.32,29.48,0.3112
2022-07-01,901.0,39444.0,5.18,4.38,0.8,18.26,0.137


EC


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,136.0,6547.0,3.915,2.27,1.645,72.47,0.8793
2019-07-01,181.0,8364.0,2.52,1.64,0.88,53.66,0.7082
2019-10-01,348.0,9807.0,2.11,2.37,-0.26,-10.97,0.6521
2020-01-01,478.0,11337.0,-7.405,-6.39,-1.015,15.88,0.3899
2020-04-01,350.0,8139.0,-23.73,-20.31,-3.42,16.84,0.1025
2020-07-01,513.0,13094.0,-9.52,-7.93,-1.59,20.05,0.1361
2020-10-01,555.0,15326.0,-9.72,-8.13,-1.59,19.56,0.8756
2021-01-01,696.0,18683.0,5.64,5.74,-0.1,-1.74,0.8038
2021-04-01,564.0,19809.0,61.145,56.76,4.385,7.73,0.2996
2021-07-01,822.0,25155.0,38.45,29.92,8.53,28.51,0.0


Mod Count


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,1492.0,5191.0,3.89,1.88,2.01,106.91,0.0
2019-07-01,1942.0,6603.0,3.14,1.12,2.02,180.36,0.0
2019-10-01,2432.0,7723.0,3.925,1.81,2.115,116.85,0.0
2020-01-01,3532.0,8283.0,-6.295,-6.52,0.225,-3.45,0.0439
2020-04-01,3090.0,5399.0,-22.545,-19.16,-3.385,17.67,0.0003
2020-07-01,5136.0,8471.0,-10.525,-6.38,-4.145,64.97,0.0
2020-10-01,6435.0,9446.0,-10.08,-6.74,-3.34,49.55,0.004
2021-01-01,8183.0,11196.0,4.74,6.46,-1.72,-26.63,0.1602
2021-04-01,7922.0,12451.0,63.525,52.97,10.555,19.93,0.0
2021-07-01,9332.0,16645.0,34.205,28.41,5.795,20.4,0.0


Guest Count


Unnamed: 0,# Customers Added Modules,# Customers Did Not Add Modules,Added - Typical Growth,Did Not Add - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
2019-04-01,509.0,6174.0,2.69,2.25,0.44,19.56,0.0004
2019-07-01,667.0,7878.0,2.78,1.51,1.27,84.11,0.0001
2019-10-01,905.0,9250.0,4.64,2.165,2.475,114.32,0.0
2020-01-01,1873.0,9942.0,-6.31,-6.46,0.15,-2.32,0.0812
2020-04-01,1851.0,6638.0,-24.16,-19.55,-4.61,23.58,0.0004
2020-07-01,3289.0,10318.0,-11.58,-6.905,-4.675,67.7,0.0
2020-10-01,4490.0,11391.0,-10.585,-7.05,-3.535,50.14,0.0001
2021-01-01,5625.0,13754.0,3.68,6.73,-3.05,-45.32,0.0
2021-04-01,4310.0,16063.0,66.225,54.55,11.675,21.4,0.0
2021-07-01,4065.0,21912.0,34.2,29.57,4.63,15.66,0.0


# See Year over Year GVM Growth by Customers with Growth Package


In [10]:
# Here we look at customers that had components of the growth package
# OO, GC, Mkt, Loy, and OPT for fun!
# We then look at customers who had different groupings of those modules over the course of the year
# ie had OO and Loy at the start of a 12 month period and at the end
# and see if their growth rates compare to customers without any Growth Package modules
results = pd.DataFrame()
for quarter in comp[comp['Quarter'].isin([date(2022,4,1)]) ].sort_values('Quarter')['Quarter'].unique().tolist():

    comp = cust.merge(yoyCompare[yoyCompare['GMV Per Day Growth Percent'] > -50])
    comp = comp[comp['GMV Per Day Growth Percent'] < 200]

    tmp = comp[comp['Quarter'] == quarter]
    tmp['Mod Groups'] = 'Didnt Have'
    tmp['Mod Groups'] = np.where((tmp[['OO', 'GC', 'Loy', 'Mkt']].sum(axis=1) > 0), 'Had', tmp['Mod Groups'])
    
    tmp['Mod Tmp'] = ''
    tmp['Mod Tmp'] = np.where((tmp['OO'] > 0) & (tmp['OO Last Year'] > 0), tmp['Mod Tmp'] + ' OO', tmp['Mod Tmp'])
    tmp['Mod Tmp'] = np.where((tmp['GC'] > 0) & (tmp['GC Last Year'] > 0), tmp['Mod Tmp'] + ' GC', tmp['Mod Tmp'])
    tmp['Mod Tmp'] = np.where((tmp['Loy'] > 0) & (tmp['Loy Last Year'] > 0), tmp['Mod Tmp'] + ' Loy', tmp['Mod Tmp'])
    tmp['Mod Tmp'] = np.where((tmp['Mkt'] > 0) & (tmp['Mkt Last Year'] > 0), tmp['Mod Tmp'] + ' Mkt', tmp['Mod Tmp'])

    
    agg = tmp.groupby(['Mod Tmp']).agg({'Customer Id': 'nunique','GMV Per Day': 'mean','GMV Per Day Last Year': 'mean','GMV Per Day Growth Percent': q50})
    agg['Avg'] = np.round(100 * (agg['GMV Per Day'] - agg['GMV Per Day Last Year']) / agg['GMV Per Day Last Year'], 2)
    agg.sort_values('Customer Id', ascending=False, inplace=True)
    listTmp = agg[agg['Customer Id'] > 500].index.tolist()
    
    for mod in listTmp + ['All']:
        if mod == 'All':
            newTmp = tmp.copy()
        else:
            newTmp = tmp[tmp['Mod Tmp'].isin([mod, ''])]
        agg = newTmp.groupby(['Mod Groups']).agg({'Customer Id': 'nunique','GMV Per Day': 'mean','GMV Per Day Last Year': 'mean','GMV Per Day Growth Percent': q50})
        agg['Avg'] = np.round(100 * (agg['GMV Per Day'] - agg['GMV Per Day Last Year']) / agg['GMV Per Day Last Year'], 2)

        if len(agg) == 2:
            pVal = stats.ttest_ind(newTmp[newTmp['Mod Groups'] == 'Didnt Have']['GMV Per Day Growth Percent'].to_numpy(), newTmp[newTmp['Mod Groups'] == 'Had']['GMV Per Day Growth Percent'].to_numpy())[1]
            pVal = np.round(pVal, 4)
            
            index = pd.MultiIndex.from_tuples([(quarter, mod)], names=["Quarter", "Module"])
            tmpResults = pd.DataFrame({'# Customers Had Modules': [agg.loc['Had']['Customer Id']],
                                       '# Customers Did Not Have Modules': [agg.loc['Didnt Have']['Customer Id']],
                                       'Had - Typical Growth ': [agg.loc['Had']['GMV Per Day Growth Percent']],
                                       'Did Not Have - Typical Growth': [agg.loc['Didnt Have']['GMV Per Day Growth Percent']],
                                       'Typical Growth Difference': [agg.loc['Had']['GMV Per Day Growth Percent'] - agg.loc['Didnt Have']['GMV Per Day Growth Percent']],
                                       'Growth Rate Difference': [np.round(100 * (agg.loc['Had']['GMV Per Day Growth Percent'] - agg.loc['Didnt Have']['GMV Per Day Growth Percent']) / (agg.loc['Didnt Have']['GMV Per Day Growth Percent']), 2)],
                                       'pVal': [pVal]}, index=index)
            results = pd.concat([results, tmpResults])
display(results.sort_values(['Module', 'Quarter']))
results.to_csv(f'./Spark 2022 Files/Growth Package Had.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,# Customers Had Modules,# Customers Did Not Have Modules,Had - Typical Growth,Did Not Have - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
Quarter,Module,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-04-01,,524.0,6496.0,11.895,5.395,6.5,120.48,0.0
2022-04-01,GC,4501.0,6510.0,8.53,5.43,3.1,57.09,0.0
2022-04-01,GC Loy,2123.0,6496.0,7.17,5.395,1.775,32.9,0.0078
2022-04-01,OO,4347.0,6509.0,8.56,5.43,3.13,57.64,0.0
2022-04-01,OO GC,8406.0,6497.0,9.56,5.38,4.18,77.7,0.0
2022-04-01,OO GC Loy,9401.0,6496.0,7.87,5.395,2.475,45.88,0.1405
2022-04-01,OO GC Loy Mkt,1467.0,6496.0,9.6,5.395,4.205,77.94,0.0
2022-04-01,OO Loy,1603.0,6496.0,9.18,5.395,3.785,70.16,0.0
2022-04-01,All,29820.0,6533.0,8.35,5.46,2.89,52.93,0.0


In [11]:
# Here we look at customers that had cOPT
# We then look at customers who had different groupings of those modules over the course of the year
# ie had OO and Loy at the start of a 12 month period and at the end
# and see if their growth rates compare to customers without any Growth Package modules
results = pd.DataFrame()
for quarter in comp[comp['Quarter'].isin([date(2022,4,1)]) ].sort_values('Quarter')['Quarter'].unique().tolist():

    comp = cust.merge(yoyCompare[yoyCompare['GMV Per Day Growth Percent'] > -50])
    comp = comp[comp['GMV Per Day Growth Percent'] < 200]

    tmp = comp[comp['Quarter'] == quarter]
    tmp['Mod Groups'] = 'Didnt Have'
    tmp['Mod Groups'] = np.where((tmp[['OPT']].sum(axis=1) > 0), 'Had', tmp['Mod Groups'])
    
    tmp['Mod Tmp'] = ''
    tmp['Mod Tmp'] = np.where((tmp['OPT'] > 0) & (tmp['OPT Last Year'] > 0), tmp['Mod Tmp'] + ' OPT', tmp['Mod Tmp'])

    
    agg = tmp.groupby(['Mod Tmp']).agg({'Customer Id': 'nunique','GMV Per Day': 'mean','GMV Per Day Last Year': 'mean','GMV Per Day Growth Percent': q50})
    agg['Avg'] = np.round(100 * (agg['GMV Per Day'] - agg['GMV Per Day Last Year']) / agg['GMV Per Day Last Year'], 2)
    agg.sort_values('Customer Id', ascending=False, inplace=True)
    listTmp = agg[agg['Customer Id'] > 500].index.tolist()
    
    for mod in listTmp + ['All']:
        if mod == 'All':
            newTmp = tmp.copy()
        else:
            newTmp = tmp[tmp['Mod Tmp'].isin([mod, ''])]
        agg = newTmp.groupby(['Mod Groups']).agg({'Customer Id': 'nunique','GMV Per Day': 'mean','GMV Per Day Last Year': 'mean','GMV Per Day Growth Percent': q50})
        agg['Avg'] = np.round(100 * (agg['GMV Per Day'] - agg['GMV Per Day Last Year']) / agg['GMV Per Day Last Year'], 2)

        if len(agg) == 2:
            pVal = stats.ttest_ind(newTmp[newTmp['Mod Groups'] == 'Didnt Have']['GMV Per Day Growth Percent'].to_numpy(), newTmp[newTmp['Mod Groups'] == 'Had']['GMV Per Day Growth Percent'].to_numpy())[1]
            pVal = np.round(pVal, 4)
            
            index = pd.MultiIndex.from_tuples([(quarter, mod)], names=["Quarter", "Module"])
            tmpResults = pd.DataFrame({'# Customers Had Modules': [agg.loc['Had']['Customer Id']],
                                       '# Customers Did Not Have Modules': [agg.loc['Didnt Have']['Customer Id']],
                                       'Had - Typical Growth ': [agg.loc['Had']['GMV Per Day Growth Percent']],
                                       'Did Not Have - Typical Growth': [agg.loc['Didnt Have']['GMV Per Day Growth Percent']],
                                       'Typical Growth Difference': [agg.loc['Had']['GMV Per Day Growth Percent'] - agg.loc['Didnt Have']['GMV Per Day Growth Percent']],
                                       'Growth Rate Difference': [np.round(100 * (agg.loc['Had']['GMV Per Day Growth Percent'] - agg.loc['Didnt Have']['GMV Per Day Growth Percent']) / (agg.loc['Didnt Have']['GMV Per Day Growth Percent']), 2)],
                                       'pVal': [pVal]}, index=index)
            results = pd.concat([results, tmpResults])
display(results.sort_values(['Module', 'Quarter']))
results.to_csv(f'./Spark 2022 Files/OPT Had.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,# Customers Had Modules,# Customers Did Not Have Modules,Had - Typical Growth,Did Not Have - Typical Growth,Typical Growth Difference,Growth Rate Difference,pVal
Quarter,Module,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-04-01,,1290.0,33533.0,9.24,7.71,1.53,19.84,0.0
2022-04-01,OPT,2820.0,33533.0,10.755,7.71,3.045,39.49,0.0
2022-04-01,All,2820.0,33533.0,10.755,7.71,3.045,39.49,0.0
