In [2]:
import numpy as np
import pandas as pd
from google.cloud import bigquery
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import db_dtypes
import bigframes.pandas as bpd
from IPython.display import display, HTML
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import math
import statsmodels.api as sm
from datetime import datetime
from typing import Union
import logging
import sys

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s:%(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)


growth_data = """

WITH holdout_entities AS (
  SELECT
        entity_id
       ,`Release Date` as release_date
  FROM `logistics-data-storage-staging.long_term_pricing.global_holdout_rollout`
  WHERE `Release Date` < DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE())- 2 DAY)
),
customer_information AS (
  SELECT
         e.entity_id
        ,e.release_date
        ,IF(d.created_date <= e.release_date, FALSE, COALESCE(is_customer_holdout, FALSE)) AS is_customer_holdout
        ,customer_id
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  JOIN holdout_entities AS e
        ON d.entity_id = e.entity_id
  WHERE d.created_date >= date('2025-01-01')
    AND created_date < CURRENT_DATE()  -- data is not full/not present for current date
    AND customer_id IS NOT NULL
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`)) -- get rid of ids like 'null', 'test' etc
    --AND d.entity_id in ('FP_MM','GV_CI', 'PY_UY','GV_BA','PY_NI','TB_IQ','TB_OM','PY_HN','TB_KW','GV_ME','PO_FI','FP_SG','FP_TH','HS_SA','FY_CY','TB_JO','GV_RS','GV_UG','GV_NG','NP_HU')
  GROUP BY 1, 2, 3, 4
)
  SELECT 
     e.entity_id
    ,e.customer_id customer_id
    ,e.is_customer_holdout
    ,COUNT(DISTINCT case when dps.created_date <= e.release_date then dps.platform_order_code end) AS orders_pre
    ,COUNT(DISTINCT case when dps.created_date > e.release_date then dps.platform_order_code end) AS orders_post
    ,SUM(case when dps.created_date <= e.release_date then dps.fully_loaded_gross_profit_eur end) AS flgp_pre
    ,SUM(case when dps.created_date > e.release_date then dps.fully_loaded_gross_profit_eur end) AS flgp_post
  FROM customer_information e
  LEFT JOIN `fulfillment-dwh-production.cl.dps_sessions_mapped_to_orders` dps
    ON dps.entity_id = e.entity_id
    AND dps.dps_customer_id = e.customer_id
  WHERE created_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
  AND created_date < CURRENT_DATE
  AND platform_order_code IS NOT NULL
  AND is_own_delivery
  AND is_sent
  --AND dps.entity_id in ('FP_MM','GV_CI', 'PY_UY','GV_BA','PY_NI','TB_IQ','TB_OM','PY_HN','TB_KW','GV_ME','PO_FI','FP_SG','FP_TH','HS_SA','FY_CY','TB_JO','GV_RS','GV_UG','GV_NG','NP_HU')
  AND vendor_vertical_parent in ('Restaurant','restaurant','restaurants')
  GROUP BY 1,2,3

  """


# mkt_data = """
  
#   WITH holdout_entities AS (
#   SELECT
#         entity_id
#        ,`Release Date` as release_date
#   FROM `logistics-data-storage-staging.long_term_pricing.global_holdout_rollout`
#   WHERE `Release Date` < DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE())- 2 DAY)
# ),
# customer_information AS (
#   SELECT
#          e.entity_id
#         ,e.release_date
#         ,IF(d.created_date <= e.release_date, FALSE, COALESCE(is_customer_holdout, FALSE)) AS is_customer_holdout
#         ,customer_id
#   FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
#   JOIN holdout_entities AS e
#         ON d.entity_id = e.entity_id
#   WHERE d.created_date >= date('2025-01-01')
#     AND created_date < CURRENT_DATE()  -- data is not full/not present for current date
#     AND customer_id IS NOT NULL
#     AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`)) -- get rid of ids like 'null', 'test' etc
#   GROUP BY 1, 2, 3, 4
# )
#   SELECT 
#      e.entity_id
#     ,e.customer_id customer_id
#     ,e.is_customer_holdout
#     ,COUNT(DISTINCT case when mkt.order_date <= e.release_date then mkt.order_id end) AS orders_pre
#     ,COUNT(DISTINCT case when mkt.order_date > e.release_date then mkt.order_id end) AS orders_post
#     ,SUM(case when mkt.order_date <= e.release_date then mkt.analytical_profit end) AS analytical_profit_pre
#     ,SUM(case when mkt.order_date > e.release_date then mkt.analytical_profit end) AS analytical_profit_post
#     ,SUM(case when mkt.order_date <= e.release_date then mkt.local_analytical_profit end) AS local_analytical_profit_pre
#     ,SUM(case when mkt.order_date > e.release_date then mkt.local_analytical_profit end) AS local_analytical_profit_post
#     ,SUM(case when mkt.order_date <= e.release_date then mkt.revenue_net end) AS revenue_pre
#     ,SUM(case when mkt.order_date > e.release_date then mkt.revenue_net end) AS revenue_post
#     ,SUM(case when mkt.order_date <= e.release_date then mkt.cost_of_sales end) AS cost_of_sales_pre
#     ,SUM(case when mkt.order_date > e.release_date then mkt.cost_of_sales end) AS cost_of_sales_post
#   FROM customer_information e
#   LEFT JOIN `fulfillment-dwh-production.cl.dps_sessions_mapped_to_orders` dps
#     ON dps.entity_id = e.entity_id
#     AND dps.dps_customer_id = e.customer_id 
#   LEFT JOIN fulfillment-dwh-production.curated_data_shared_mkt.bima_order_profitability mkt
#     ON mkt.global_entity_id = dps.entity_id
#     AND mkt.order_id = dps.platform_order_code
#   WHERE order_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
#   AND order_date < CURRENT_DATE
#   and created_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
#   and created_date < CURRENT_DATE
#   AND platform_order_code IS NOT NULL
#   AND is_own_delivery
#   AND is_sent
#   AND e.entity_id is not null
#   AND vendor_vertical_parent in ('Restaurant','restaurant','restaurants')
#   GROUP BY 1,2,3
  
#   """

mkt_data = """
  
  WITH holdout_entities AS (
  SELECT
        entity_id
       ,`Release Date` as release_date
  FROM `logistics-data-storage-staging.long_term_pricing.global_holdout_rollout`
  WHERE `Release Date` < DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE())- 2 DAY)
),
customer_information AS (
  SELECT
         e.entity_id
        ,e.release_date
        ,IF(d.created_date <= e.release_date, FALSE, COALESCE(is_customer_holdout, FALSE)) AS is_customer_holdout
        ,customer_id
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  JOIN holdout_entities AS e
        ON d.entity_id = e.entity_id
  WHERE d.created_date >= date('2025-01-01')
    AND created_date < CURRENT_DATE()  -- data is not full/not present for current date
    AND customer_id IS NOT NULL
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`)) -- get rid of ids like 'null', 'test' etc
  GROUP BY 1, 2, 3, 4
)
  SELECT 
     e.entity_id
    ,e.customer_id customer_id
    ,e.is_customer_holdout
    ,COUNT(DISTINCT case when mkt.order_date <= e.release_date then mkt.order_id end) AS orders_pre
    ,COUNT(DISTINCT case when mkt.order_date > e.release_date then mkt.order_id end) AS orders_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.analytical_profit end) AS analytical_profit_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.analytical_profit end) AS analytical_profit_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.local_analytical_profit end) AS local_analytical_profit_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.local_analytical_profit end) AS local_analytical_profit_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.revenue_net end) AS revenue_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.revenue_net end) AS revenue_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.cost_of_sales end) AS cost_of_sales_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.cost_of_sales end) AS cost_of_sales_post

    ,SUM(case when mkt.order_date <= e.release_date then mkt.dh_funded_basket_incentives end) AS dh_funded_basket_incentives_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.dh_funded_basket_incentives end) AS dh_funded_basket_incentives_post

    ,SUM(case when mkt.order_date <= e.release_date then mkt.dh_funded_voucher_local end) AS dh_funded_voucher_local_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.dh_funded_voucher_local end) AS dh_funded_voucher_local_post

    ,SUM(case when mkt.order_date <= e.release_date then mkt.dh_funded_voucher_local end) AS dh_funded_voucher_local_pre
    SUM(case when mkt.order_date > e.release_date then mkt.dh_funded_voucher_local end) AS dh_funded_voucher_local_post
  FROM customer_information e
  LEFT JOIN `fulfillment-dwh-production.cl.dps_sessions_mapped_to_orders` dps
    ON dps.entity_id = e.entity_id
    AND dps.dps_customer_id = e.customer_id 
  LEFT JOIN fulfillment-dwh-production.curated_data_shared_mkt.bima_order_profitability mkt
    ON mkt.global_entity_id = dps.entity_id
    AND mkt.order_id = dps.platform_order_code
  WHERE order_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
    AND order_date < CURRENT_DATE
    and created_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
    and created_date < CURRENT_DATE
    AND platform_order_code IS NOT NULL
    AND is_own_delivery
    AND is_sent
    AND e.entity_id is not null
    AND vendor_vertical_parent in ('Restaurant','restaurant','restaurants')
  GROUP BY 1,2,3
  
"""


marketing_data = """
WITH eligible_holdout_entities AS (
  SELECT
        entity_id
       ,`Release Date` as release_date
  FROM `logistics-data-storage-staging.long_term_pricing.global_holdout_rollout`
  WHERE `Release Date` < DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE())- 2 DAY)
),
customer_information AS (
  SELECT
         e.entity_id
        ,e.release_date
        ,COUNT(DISTINCT CASE WHEN is_customer_holdout IS True THEN customer_id END) AS holdout_customers
        ,COUNT(DISTINCT CASE WHEN is_customer_holdout IS False THEN customer_id END) AS non_holdout_customers
        ,COUNT(DISTINCT customer_id) AS all_customers
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  JOIN eligible_holdout_entities AS e
  ON d.entity_id = e.entity_id
  WHERE d.created_date >= DATE('2025-01-01')
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`))
    AND d.created_date > e.release_date
    AND customer_id IS NOT NULL
  GROUP BY e.entity_id, e.release_date
), 
orders as (
select
      DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE()) - 2 DAY) AS analysis_week 
    ,global_entity_id
    ,release_date
    ,holdout_customers
    ,non_holdout_customers
    ,all_customers
    ,count(distinct case when dps.is_customer_holdout then op.order_id end) holdout_orders
    ,count(distinct case when dps.is_customer_holdout is false then op.order_id end) non_holdout_orders
    ,sum(case when dps.is_customer_holdout then op.gmv end) holdout_gmv
    ,sum(case when dps.is_customer_holdout is false then op.gmv end) non_holdout_gmv
    ,sum(case when dps.is_customer_holdout then op.gfv end) holdout_gfv
    ,sum(case when dps.is_customer_holdout is false then op.gfv end) non_holdout_gfv
    ,sum(case when dps.is_customer_holdout then op.analytical_profit end) flgp_holdout
    ,sum(case when dps.is_customer_holdout is false then op.analytical_profit end) flgp_non_holdout
    ,sum(case when dps.is_customer_holdout then op.revenue_net end) holdout_revenue_net
    ,sum(case when dps.is_customer_holdout is false then op.revenue_net end) non_holdout_revenue_net
    ,sum(case when dps.is_customer_holdout then op.delivery_costs end) holdout_delivery_costs
    ,sum(case when dps.is_customer_holdout is false then op.delivery_costs end) non_holdout_delivery_costs
    ,sum(case when dps.is_customer_holdout then op.delivery_fee_net end) holdout_delivery_fee_net
    ,sum(case when dps.is_customer_holdout is false then op.delivery_fee_net end) non_holdout_delivery_fee_net
    ,sum(case when dps.is_customer_holdout then dh_funded_basket_incentives end) holdout_dh_funded_basket_incentives
    ,sum(case when dps.is_customer_holdout is false then dh_funded_basket_incentives end) non_holdout_dh_funded_basket_incentives
    ,sum(case when dps.is_customer_holdout then cost_of_sales end) holdout_cost_of_sales
    ,sum(case when dps.is_customer_holdout is false then cost_of_sales end) non_holdout_cost_of_sales
    ,sum(case when dps.is_customer_holdout then cost_of_goods_sold end) holdout_cost_of_goods_sold
    ,sum(case when dps.is_customer_holdout is false then cost_of_goods_sold end) non_holdout_cost_of_goods_sold
    ,sum(case when dps.is_customer_holdout then payment_costs end) holdout_payment_costs
    ,sum(case when dps.is_customer_holdout is false then payment_costs end) non_holdout_payment_costs
    ,sum(case when dps.is_customer_holdout then recooks_and_wastage end) holdout_recooks_and_wastage
    ,sum(case when dps.is_customer_holdout is false then recooks_and_wastage end) non_holdout_recooks_and_wastage
    ,sum(case when dps.is_customer_holdout then refunds_and_compensation end) holdout_refunds_and_compensation
    ,sum(case when dps.is_customer_holdout is false then refunds_and_compensation end) non_holdout_refunds_and_compensation
    ,sum(case when dps.is_customer_holdout then semi_variable_delivery_costs end) holdout_semi_variable_delivery_costs
    ,sum(case when dps.is_customer_holdout is false then semi_variable_delivery_costs end) non_holdout_semi_variable_delivery_costs
    ,sum(case when dps.is_customer_holdout then contact_center_costs end) holdout_contact_center_costs
    ,sum(case when dps.is_customer_holdout is false then contact_center_costs end) non_holdout_contact_center_costs
    ,sum(case when dps.is_customer_holdout then other_non_specific_delivery_costs end) holdout_other_non_specific_delivery_costs
    ,sum(case when dps.is_customer_holdout is false then other_non_specific_delivery_costs end) non_holdout_other_non_specific_delivery_costs
    
    ,sum(case when dps.is_customer_holdout then commission_fee_net end) holdout_commission_fee_net
    ,sum(case when dps.is_customer_holdout is false then commission_fee_net end) non_holdout_commission_fee_net

    ,sum(case when dps.is_customer_holdout then sale_of_goods_revenue_net end) holdout_sale_of_goods_revenue_net
    ,sum(case when dps.is_customer_holdout is false then sale_of_goods_revenue_net end) non_holdout_sale_of_goods_revenue_net
    
    ,sum(case when dps.is_customer_holdout then card_fee_net end) holdout_card_fee_net
    ,sum(case when dps.is_customer_holdout is false then card_fee_net end) non_holdout_card_fee_net

    ,sum(case when dps.is_customer_holdout then delivery_fee_net end) holdout_delivery_fee_net
    ,sum(case when dps.is_customer_holdout is false then delivery_fee_net end) non_holdout_delivery_fee_net

    ,sum(case when dps.is_customer_holdout then priority_fee_net end) holdout_priority_fee_net
    ,sum(case when dps.is_customer_holdout is false then priority_fee_net end) non_holdout_priority_fee_net

    ,sum(case when dps.is_customer_holdout then bad_weather_fee_net end) holdout_bad_weather_fee_net
    ,sum(case when dps.is_customer_holdout is false then bad_weather_fee_net end) non_holdout_bad_weather_fee_net

    ,sum(case when dps.is_customer_holdout then vendor_funded_delivery_fee_subsidy_net end) holdout_vendor_funded_delivery_fee_subsidy_net
    ,sum(case when dps.is_customer_holdout is false then vendor_funded_delivery_fee_subsidy_net end) non_holdout_vendor_funded_delivery_fee_subsidy_net

    ,sum(case when dps.is_customer_holdout then service_fee_net end) holdout_service_fee_net
    ,sum(case when dps.is_customer_holdout is false then service_fee_net end) non_holdout_service_fee_net

    ,sum(case when dps.is_customer_holdout then listing_fee_net end) holdout_listing_fee_net
    ,sum(case when dps.is_customer_holdout is false then listing_fee_net end) non_holdout_listing_fee_net

    ,sum(case when dps.is_customer_holdout then sublease_revenue_net end) holdout_sublease_revenue_net
    ,sum(case when dps.is_customer_holdout is false then sublease_revenue_net end) non_holdout_sublease_revenue_net

    ,sum(case when dps.is_customer_holdout then ads_revenue_net end) holdout_ads_revenue_net
    ,sum(case when dps.is_customer_holdout is false then ads_revenue_net end) non_holdout_ads_revenue_net

    ,sum(case when dps.is_customer_holdout then joker_vendor_fee_net end) holdout_joker_vendor_fee_net
    ,sum(case when dps.is_customer_holdout is false then joker_vendor_fee_net end) non_holdout_joker_vendor_fee_net

    ,sum(case when dps.is_customer_holdout then other_ncr_net end) holdout_other_ncr_net
    ,sum(case when dps.is_customer_holdout is false then other_ncr_net end) non_holdout_other_ncr_net

    ,sum(case when dps.is_customer_holdout then small_basket_fee_net end) holdout_small_basket_fee_net
    ,sum(case when dps.is_customer_holdout is false then small_basket_fee_net end) non_holdout_small_basket_fee_net

    ,sum(case when dps.is_customer_holdout then subscription_fee_net end) holdout_subscription_fee_net
    ,sum(case when dps.is_customer_holdout is false then subscription_fee_net end) non_holdout_subscription_fee_net

    ,sum(case when dps.is_customer_holdout then subscription_vendor_subsidy_net end) holdout_subscription_vendor_subsidy_net
    ,sum(case when dps.is_customer_holdout is false then subscription_vendor_subsidy_net end) non_holdout_subscription_vendor_subsidy_net

    ,sum(case when dps.is_customer_holdout then other_revenue_net end) holdout_other_revenue_net
    ,sum(case when dps.is_customer_holdout is false then other_revenue_net end) non_holdout_other_revenue_net

from fulfillment-dwh-production.curated_data_shared_mkt.bima_order_profitability op
JOIN customer_information usi 
    ON usi.entity_id = op.global_entity_id
JOIN `fulfillment-dwh-production.cl.dps_sessions_mapped_to_orders` dps 
    ON dps.entity_id = op.global_entity_id
    and dps.platform_order_code = op.order_id
    and dps.created_date >= '2025-01-01'
    and dps.created_date > usi.release_date
WHERE op.order_date > usi.release_date
    AND op.order_date >= '2025-01-01'
    AND dps.is_own_delivery
    AND dps.is_sent 
    AND vendor_vertical_parent in ('Restaurant','restaurant','restaurants')
GROUP BY 1,2,3,4,5,6
)
select * 
from orders

"""

In [3]:
project_id = "logistics-customer-staging"
logging.info(f"Initializing BigQuery client for project: {project_id}")

try:
    # Initialize BigQuery client
    client = bigquery.Client(project=project_id)
except Exception as e:
    logging.error(f"Failed to initialize BigQuery client: {e}")
    sys.exit(1)

#growth_df = client.query(growth_data).to_dataframe()
#mkt_df =  client.query(mkt_data).to_dataframe()

2025-02-13 12:51:40,778 INFO:Initializing BigQuery client for project: logistics-customer-staging






In [4]:
mkt_df = client.query(marketing_data).to_dataframe()

In [14]:
mkt_df.to_csv('raw_data.csv', index=False) 

In [5]:
mkt_df.dtypes

analysis_week                                   dbdate
global_entity_id                                object
release_date                                    dbdate
holdout_customers                                Int64
non_holdout_customers                            Int64
                                                ...   
non_holdout_subscription_fee_net               float64
holdout_subscription_vendor_subsidy_net        float64
non_holdout_subscription_vendor_subsidy_net    float64
holdout_other_revenue_net                      float64
non_holdout_other_revenue_net                  float64
Length: 72, dtype: object

In [6]:
mkt_df['discount_correction'] = mkt_df['non_holdout_dh_funded_basket_incentives'] / mkt_df['holdout_dh_funded_basket_incentives']

mkt_df['non_holdout_discount_o_df'] = mkt_df['non_holdout_dh_funded_basket_incentives'] / mkt_df['non_holdout_delivery_fee_net']

mkt_df['holdout_discount_o_df'] = mkt_df['holdout_dh_funded_basket_incentives'] / mkt_df['holdout_delivery_fee_net']

mkt_df['discount_correction_holdout'] = mkt_df['non_holdout_discount_o_df'] / mkt_df['holdout_discount_o_df']

mkt_df['holdout_dh_funded_basket_incentives_corrected'] = mkt_df['discount_correction_holdout'] * mkt_df['holdout_dh_funded_basket_incentives']

mkt_df[['holdout_dh_funded_basket_incentives_corrected','holdout_dh_funded_basket_incentives']]

mkt_df['flgp_holdout_corrected'] = mkt_df['flgp_holdout'] + mkt_df['holdout_dh_funded_basket_incentives_corrected']



In [10]:
mkt_df['scaled_holdout_total_flgp_cuped'] = (mkt_df['flgp_holdout_corrected'] / mkt_df['holdout_customers']) * mkt_df['non_holdout_customers']
mkt_df['scaled_holdout_total_orders_cuped'] = (mkt_df['holdout_orders'] / mkt_df['holdout_customers']) * mkt_df['non_holdout_customers']

mkt_df['holdout_flgp_per_order_cuped'] = np.where(
    mkt_df['holdout_orders'] != 0, 
    mkt_df['flgp_holdout_corrected'] / mkt_df['holdout_orders'], 
    np.nan
)

mkt_df['non_holdout_flgp_per_order_cuped'] = np.where(
    mkt_df['non_holdout_orders'] != 0, 
    mkt_df['flgp_non_holdout'] / mkt_df['non_holdout_orders'], 
    np.nan
)

mkt_df['holdout_orders_per_user_cuped'] = np.where(
    mkt_df['holdout_customers'] != 0, 
    mkt_df['holdout_orders'] / mkt_df['holdout_customers'], 
    np.nan
)

mkt_df['non_holdout_orders_per_user_cuped'] = np.where(
    mkt_df['non_holdout_customers'] != 0, 
    mkt_df['non_holdout_orders'] / mkt_df['non_holdout_customers'], 
    np.nan
)

mkt_df['incremental_orders_cuped'] = mkt_df['non_holdout_orders'] - mkt_df['scaled_holdout_total_orders_cuped']
mkt_df['incremental_flgp_cuped'] = mkt_df['flgp_non_holdout'] - mkt_df['scaled_holdout_total_flgp_cuped']

mkt_df['sustainable_growth'] = np.where(
    mkt_df['scaled_holdout_total_orders_cuped'] != 0, 
    ((mkt_df['incremental_orders_cuped'] + 
      (mkt_df['incremental_flgp_cuped'] / mkt_df['non_holdout_flgp_per_order_cuped'])) 
     / mkt_df['scaled_holdout_total_orders_cuped']) * 100, 
    np.nan
)

In [12]:
mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['non_holdout_dh_funded_basket_incentives'] / mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['non_holdout_delivery_fee_net']

23   -0.329079
dtype: float64

In [13]:
mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['holdout_dh_funded_basket_incentives_corrected'] / mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['holdout_delivery_fee_net']

23   -0.329079
dtype: float64

In [None]:
# Normalize for Population Differences
holdout_user_count = data['is_customer_holdout'].sum()
non_holdout_user_count = (~data['is_customer_holdout']).sum()

scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

# FLGP per Order and Orders per User (CUPED)
holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

# Incremental Differences (CUPED)
incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

# Percentage Changes (CUPED)
percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

# Sustainable Growth Calculation
sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

In [None]:
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan


In [510]:
mkt_df['holdout_delivery_fee_per_order'] = mkt_df['holdout_delivery_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_delivery_fee_per_order'] = mkt_df['non_holdout_delivery_fee_net'] / mkt_df['non_holdout_orders'] 

mkt_df['holdout_discount_per_order'] = mkt_df['holdout_dh_funded_basket_incentives'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_discount_per_order'] = mkt_df['non_holdout_dh_funded_basket_incentives'] / mkt_df['non_holdout_orders'] 

mkt_df['holdout_revenue_per_order'] = mkt_df['holdout_revenue_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_revenue_per_order'] = mkt_df['non_holdout_revenue_net'] / mkt_df['non_holdout_orders'] 

mkt_df['holdout_cos_per_order'] = mkt_df['holdout_cost_of_sales'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_cos_per_order'] = mkt_df['non_holdout_cost_of_sales'] / mkt_df['non_holdout_orders'] 

mkt_df['holdout_delivery_costs_per_order'] = mkt_df['holdout_delivery_costs'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_delivery_costs_per_order'] = mkt_df['non_holdout_delivery_costs'] / mkt_df['non_holdout_orders'] 

mkt_df['holdout_cost_of_goods_sold_per_order'] = mkt_df['holdout_cost_of_goods_sold'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_holdout_cost_of_goods_sold_per_order'] = mkt_df['non_holdout_cost_of_goods_sold'] / mkt_df['non_holdout_orders'] 

mkt_df['holdout_payment_costs_per_order'] = mkt_df['holdout_payment_costs'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_payment_costs_per_order'] = mkt_df['non_holdout_payment_costs'] / mkt_df['non_holdout_orders']

mkt_df['holdout_recooks_and_wastage_per_order'] = mkt_df['holdout_recooks_and_wastage'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_recooks_and_wastage_per_order'] = mkt_df['non_holdout_recooks_and_wastage'] / mkt_df['non_holdout_orders']

mkt_df['holdout_refunds_and_compensation_per_order'] = mkt_df['holdout_refunds_and_compensation'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_refunds_and_compensation_per_order'] = mkt_df['non_holdout_refunds_and_compensation'] / mkt_df['non_holdout_orders']

mkt_df['holdout_semi_variable_delivery_costs_per_order'] = mkt_df['holdout_semi_variable_delivery_costs'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_semi_variable_delivery_costs_per_order'] = mkt_df['non_holdout_semi_variable_delivery_costs'] / mkt_df['non_holdout_orders']

mkt_df['holdout_contact_center_costs_per_order'] = mkt_df['holdout_contact_center_costs'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_contact_center_costs_per_order'] = mkt_df['non_holdout_contact_center_costs'] / mkt_df['non_holdout_orders']

mkt_df['holdout_other_non_specific_delivery_costs_per_order'] = mkt_df['holdout_other_non_specific_delivery_costs'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_other_non_specific_delivery_costs_per_order'] = mkt_df['non_holdout_other_non_specific_delivery_costs'] / mkt_df['non_holdout_orders']

mkt_df['holdout_commission_fee_net_per_order'] = mkt_df['holdout_commission_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_commission_fee_net_per_order'] = mkt_df['non_holdout_commission_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_sale_of_goods_revenue_net_per_order'] = mkt_df['holdout_sale_of_goods_revenue_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_sale_of_goods_revenue_net_per_order'] = mkt_df['non_holdout_sale_of_goods_revenue_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_card_fee_net_per_order'] = mkt_df['holdout_card_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_card_fee_net_per_order'] = mkt_df['non_holdout_card_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_priority_fee_net_per_order'] = mkt_df['holdout_priority_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_priority_fee_net_per_order'] = mkt_df['non_holdout_priority_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_bad_weather_fee_net_per_order'] = mkt_df['holdout_bad_weather_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_bad_weather_fee_net_per_order'] = mkt_df['non_holdout_bad_weather_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_vendor_funded_delivery_fee_subsidy_net_per_order'] = mkt_df['holdout_vendor_funded_delivery_fee_subsidy_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_vendor_funded_delivery_fee_subsidy_net_per_order'] = mkt_df['non_holdout_vendor_funded_delivery_fee_subsidy_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_service_fee_net_per_order'] = mkt_df['holdout_service_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_service_fee_net_per_order'] = mkt_df['non_holdout_service_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_listing_fee_net_per_order'] = mkt_df['holdout_listing_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_listing_fee_net_per_order'] = mkt_df['non_holdout_listing_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_sublease_revenue_net_per_order'] = mkt_df['holdout_sublease_revenue_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_sublease_revenue_net_per_order'] = mkt_df['non_holdout_sublease_revenue_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_ads_revenue_net_per_order'] = mkt_df['holdout_ads_revenue_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_ads_revenue_net_per_order'] = mkt_df['non_holdout_ads_revenue_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_joker_vendor_fee_net_per_order'] = mkt_df['holdout_joker_vendor_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_joker_vendor_fee_net_per_order'] = mkt_df['non_holdout_joker_vendor_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_other_ncr_net_per_order'] = mkt_df['holdout_other_ncr_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_other_ncr_net_per_order'] = mkt_df['non_holdout_other_ncr_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_small_basket_fee_net_per_order'] = mkt_df['holdout_small_basket_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_small_basket_fee_net_per_order'] = mkt_df['non_holdout_small_basket_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_subscription_fee_net_per_order'] = mkt_df['holdout_subscription_fee_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_subscription_fee_net_per_order'] = mkt_df['non_holdout_subscription_fee_net'] / mkt_df['non_holdout_orders']

mkt_df['holdout_other_revenue_net_per_order'] = mkt_df['holdout_other_revenue_net'] / mkt_df['holdout_orders'] 
mkt_df['non_holdout_other_revenue_net_per_order'] = mkt_df['non_holdout_other_revenue_net'] / mkt_df['non_holdout_orders']

# tmp_cos = mkt_df[['global_entity_id','holdout_cost_of_goods_sold_per_order','non_holdout_holdout_cost_of_goods_sold_per_order',
# 'holdout_recooks_and_wastage_per_order','non_holdout_recooks_and_wastage_per_order',
# 'holdout_refunds_and_compensation_per_order','non_holdout_refunds_and_compensation_per_order',
# 'holdout_semi_variable_delivery_costs_per_order','non_holdout_semi_variable_delivery_costs_per_order',
# 'holdout_contact_center_costs_per_order','non_holdout_contact_center_costs_per_order',
# 'holdout_other_non_specific_delivery_costs_per_order','non_holdout_other_non_specific_delivery_costs_per_order',
# 'holdout_delivery_costs_per_order','non_holdout_delivery_costs_per_order','holdout_payment_costs_per_order',
# 'non_holdout_payment_costs_per_order', 'holdout_cos_per_order','non_holdout_cos_per_order', 'sustainable_growth', 
# ]]

# tmp_cos['sustainable_growth'] = 100 * tmp_cos['sustainable_growth']

# tmp_cos.sort_values(by = 'sustainable_growth').transpose()

# tmp_revenue = mkt_df[['global_entity_id',
# 'holdout_commission_fee_net_per_order', 'non_holdout_commission_fee_net_per_order',
# 'holdout_sale_of_goods_revenue_net_per_order','non_holdout_sale_of_goods_revenue_net_per_order',
# 'holdout_card_fee_net_per_order','non_holdout_card_fee_net_per_order',
# 'holdout_delivery_fee_per_order','non_holdout_delivery_fee_per_order',
# 'holdout_priority_fee_net_per_order','non_holdout_priority_fee_net_per_order',
# 'holdout_bad_weather_fee_net_per_order','non_holdout_bad_weather_fee_net_per_order',
# 'holdout_vendor_funded_delivery_fee_subsidy_net_per_order','non_holdout_vendor_funded_delivery_fee_subsidy_net_per_order',
# 'holdout_service_fee_net_per_order','non_holdout_service_fee_net_per_order',
# 'holdout_listing_fee_net_per_order','non_holdout_listing_fee_net_per_order',
# 'holdout_sublease_revenue_net_per_order','non_holdout_sublease_revenue_net_per_order',
# 'holdout_ads_revenue_net_per_order','non_holdout_ads_revenue_net_per_order',
# 'holdout_joker_vendor_fee_net_per_order','non_holdout_joker_vendor_fee_net_per_order',
# 'holdout_other_ncr_net_per_order','non_holdout_other_ncr_net_per_order',
# 'holdout_small_basket_fee_net_per_order','non_holdout_small_basket_fee_net_per_order',
# 'holdout_subscription_fee_net_per_order','non_holdout_subscription_fee_net_per_order',
# 'holdout_other_revenue_net_per_order','non_holdout_other_revenue_net_per_order',
# 'holdout_revenue_per_order','non_holdout_revenue_per_order',
# 'sustainable_growth'
# ]]

# tmp_revenue['sustainable_growth'] = 100 * tmp_revenue['sustainable_growth']

# tmp_revenue.sort_values(by = 'sustainable_growth').transpose()


tmp_combined = mkt_df[['global_entity_id',
'holdout_discount_per_order', 'non_holdout_discount_per_order',
'holdout_cos_per_order', 'non_holdout_cos_per_order',
'holdout_revenue_per_order', 'non_holdout_revenue_per_order',
'flgpo_holdout', 'flgpo_non_holdout',
'flgp_growth', 'orders_growth',
'sustainable_growth'
]]

tmp_combined['discount_diff'] = 100 * ((tmp_combined['non_holdout_discount_per_order'] - tmp_combined['holdout_discount_per_order']) / tmp_combined['holdout_discount_per_order'])
tmp_combined['cos_diff'] = 100 * ((tmp_combined['non_holdout_cos_per_order'] - tmp_combined['holdout_cos_per_order']) / tmp_combined['holdout_cos_per_order'] )
tmp_combined['revenue_diff'] = 100 * ((tmp_combined['non_holdout_revenue_per_order'] - tmp_combined['holdout_revenue_per_order']) / tmp_combined['holdout_revenue_per_order'] )


tmp_combined['sustainable_growth'] = 100 * tmp_combined['sustainable_growth']

tmp_combined.sort_values(by = 'sustainable_growth').transpose()



KeyError: "['flgpo_holdout', 'flgpo_non_holdout', 'flgp_growth', 'orders_growth'] not in index"

In [493]:
mkt_df.columns

Index(['analysis_week', 'global_entity_id', 'release_date',
       'holdout_customers', 'non_holdout_customers', 'all_customers',
       'holdout_orders', 'non_holdout_orders', 'holdout_gmv',
       'non_holdout_gmv',
       ...
       'holdout_joker_vendor_fee_net_per_order',
       'non_holdout_joker_vendor_fee_net_per_order',
       'holdout_other_ncr_net_per_order',
       'non_holdout_other_ncr_net_per_order',
       'holdout_small_basket_fee_net_per_order',
       'non_holdout_small_basket_fee_net_per_order',
       'holdout_subscription_fee_net_per_order',
       'non_holdout_subscription_fee_net_per_order',
       'holdout_other_revenue_net_per_order',
       'non_holdout_other_revenue_net_per_order'],
      dtype='object', length=137)

In [None]:
mkt_df['delio']

In [495]:
mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['holdout_dh_funded_basket_incentives'] / mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['holdout_delivery_fee_net']

6   -0.244227
dtype: float64

In [496]:
mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['non_holdout_dh_funded_basket_incentives'] / mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['non_holdout_delivery_fee_net']

6   -0.329079
dtype: float64

In [499]:
((-0.329079 / -0.244227) * mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['holdout_dh_funded_basket_incentives']) / mkt_df[mkt_df['global_entity_id'] == 'PO_FI']['holdout_delivery_fee_net']


6   -0.329078
dtype: float64

In [466]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind

def calculate_sustainable_growth(data, pre_profit, post_profit):
    
    data = data.copy()

    entity = data['entity_id'].iloc[0]

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=[pre_profit, post_profit])
    theta_flgp = np.cov(data_flgp[pre_profit], data_flgp[post_profit])[0, 1] / np.var(data_flgp[pre_profit])
    data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    # T-tests for significance
    t_stat_orders, p_value_orders = ttest_ind(
        data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'],
        data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'],
        equal_var=False
    )

    t_stat_flgp, p_value_flgp = ttest_ind(
        data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        equal_var=False
    )

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'incremental_orders_cuped': incremental_orders_cuped,
        'non_holdout_total_orders_cuped':non_holdout_total_orders_cuped,
        'holdout_total_orders_cuped':scaled_holdout_total_orders_cuped,
        't_stat_orders': t_stat_orders,
        'p_value_orders': p_value_orders,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'holdout_total_flgp_cuped':scaled_holdout_total_flgp_cuped,
        't_stat_flgp': t_stat_flgp,
        'p_value_flgp': p_value_flgp,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped
    }


# results = []
# for entity in mkt_df['entity_id'].unique():
#     entity_data = mkt_df[mkt_df['entity_id'] == entity]
#     result = calculate_sustainable_growth(entity_data, 'local_analytical_profit_pre','local_analytical_profit_post')
#     result['entity_id'] = entity
#     results.append(result)

# pd.DataFrame(results)

mkt_df['profit_pre'] = mkt_df['revenue_pre'] + mkt_df['cost_of_sales_pre']
mkt_df['profit_post'] = mkt_df['revenue_pre'] + mkt_df['cost_of_sales_pre']

# Define the list of pre and post metrics
metric_pairs = [
    ('analytical_profit_pre', 'analytical_profit_post'),
    ('local_analytical_profit_pre', 'local_analytical_profit_post'),
    ('profit_pre', 'profit_post')
]

results = []

# Loop through each metric pair
for pre_metric, post_metric in metric_pairs:
    for entity in mkt_df['entity_id'].unique():
        entity_data = mkt_df[mkt_df['entity_id'] == entity]
        result = calculate_sustainable_growth(entity_data, pre_metric, post_metric)
        
        # Add entity_id and metric information to the result
        result['metric_used'] = f"{pre_metric}_vs_{post_metric}"
        
        results.append(result)

# Convert results to DataFrame
final_results_df = pd.DataFrame(results)


final_results_df = final_results_df.dropna(subset= 'sustainable_growth')

final_results_df.to_csv('profitable_growth.csv', index=False) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

Unnamed: 0,entity,sustainable_growth,percentage_change_orders_cuped,incremental_orders_cuped,non_holdout_total_orders_cuped,holdout_total_orders_cuped,t_stat_orders,p_value_orders,percentage_change_flgp_cuped,incremental_flgp_cuped,non_holdout_total_flgp_cuped,holdout_total_flgp_cuped,t_stat_flgp,p_value_flgp,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,entity_id
0,EF_GR,7.729787,6.11063,138111.699112,2398299.0,2260188.0,-8.072846,7.224959e-16,1.549559,33514.267634,2196340.0,2162826.0,-0.441929,0.6585492,0.956923,0.915791,2.178339,2.311449,EF_GR
1,FP_TW,1.835712,0.844197,78427.237009,9368589.0,9290161.0,-2.193176,0.02829853,0.992978,62693.155414,6376345.0,6313652.0,-1.534955,0.1248041,0.679606,0.680609,3.269494,3.297095,FP_TW
2,YS_TR,-32.182999,1.723559,143208.367893,8452083.0,8308874.0,-4.254027,2.101781e-05,24.999284,514917.054223,-1544810.0,-2059727.0,-7.681676,1.603083e-14,-0.247895,-0.182773,1.985008,2.019221,YS_TR
3,PY_AR,1.53274,0.787178,61342.206187,7854017.0,7792675.0,-2.037969,0.04155684,0.745252,102134.123058,13806770.0,13704640.0,-2.840333,0.004509081,1.758657,1.757925,2.380435,2.399173,PY_AR
4,MJM_AT,3.401775,-0.075154,-707.678628,940925.0,941632.7,0.071441,0.9430481,3.604981,118602.074819,3408552.0,3289950.0,-1.746552,0.08077751,3.493878,3.622554,2.288688,2.286968,MJM_AT
5,FP_MY,2.536091,-0.468499,-24918.010045,5293769.0,5318687.0,0.958192,0.33797,3.112697,101758.162683,3370890.0,3269132.0,-3.738859,0.000185227,0.61465,0.636766,1.9025,1.893587,FP_MY
6,PY_EC,-1.420451,1.862626,18538.614435,1013833.0,995294.7,-1.586408,0.112679,-3.122407,-27440.173565,851374.6,878814.8,1.642152,0.1006245,0.882969,0.839758,2.125594,2.165186,PY_EC
7,DJ_CZ,-0.983975,0.144272,1662.130284,1153745.0,1152083.0,-0.146085,0.8838573,-1.11407,-13753.049398,1220734.0,1234487.0,0.694165,0.4876073,1.071526,1.058062,2.250641,2.253888,DJ_CZ
8,NP_HU,-0.438168,-0.027398,-306.15843,1117159.0,1117465.0,0.026504,0.9788557,-0.409202,-3593.909816,874679.9,878273.8,-0.102614,0.918273,0.785952,0.78295,2.397249,2.396592,NP_HU
9,FP_MM,0.771945,1.48277,10888.645986,745233.7,734345.1,-1.007453,0.3137495,-0.695567,-3391.743385,484231.3,487623.0,-0.098895,0.9212277,0.664024,0.649771,2.016146,2.046041,FP_MM
