In [429]:
import numpy as np
import pandas as pd
from google.cloud import bigquery
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import db_dtypes
import bigframes.pandas as bpd
from IPython.display import display, HTML
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import math
import statsmodels.api as sm
from datetime import datetime
from typing import Union
import logging
import sys

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s:%(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)


growth_data = """

WITH holdout_entities AS (
  SELECT
        entity_id
       ,`Release Date` as release_date
  FROM `logistics-data-storage-staging.long_term_pricing.global_holdout_rollout`
  WHERE `Release Date` < DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE())- 2 DAY)
),
customer_information AS (
  SELECT
         e.entity_id
        ,e.release_date
        ,IF(d.created_date <= e.release_date, FALSE, COALESCE(is_customer_holdout, FALSE)) AS is_customer_holdout
        ,customer_id
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  JOIN holdout_entities AS e
        ON d.entity_id = e.entity_id
  WHERE d.created_date >= date('2025-01-01')
    AND created_date < CURRENT_DATE()  -- data is not full/not present for current date
    AND customer_id IS NOT NULL
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`)) -- get rid of ids like 'null', 'test' etc
    --AND d.entity_id in ('FP_MM','GV_CI', 'PY_UY','GV_BA','PY_NI','TB_IQ','TB_OM','PY_HN','TB_KW','GV_ME','PO_FI','FP_SG','FP_TH','HS_SA','FY_CY','TB_JO','GV_RS','GV_UG','GV_NG','NP_HU')
  GROUP BY 1, 2, 3, 4
)
  SELECT 
     e.entity_id
    ,e.customer_id customer_id
    ,e.is_customer_holdout
    ,COUNT(DISTINCT case when dps.created_date <= e.release_date then dps.platform_order_code end) AS orders_pre
    ,COUNT(DISTINCT case when dps.created_date > e.release_date then dps.platform_order_code end) AS orders_post
    ,SUM(case when dps.created_date <= e.release_date then dps.fully_loaded_gross_profit_eur end) AS flgp_pre
    ,SUM(case when dps.created_date > e.release_date then dps.fully_loaded_gross_profit_eur end) AS flgp_post
  FROM customer_information e
  LEFT JOIN `fulfillment-dwh-production.cl.dps_sessions_mapped_to_orders` dps
    ON dps.entity_id = e.entity_id
    AND dps.dps_customer_id = e.customer_id
  WHERE created_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
  AND created_date < CURRENT_DATE
  AND platform_order_code IS NOT NULL
  AND is_own_delivery
  AND is_sent
  --AND dps.entity_id in ('FP_MM','GV_CI', 'PY_UY','GV_BA','PY_NI','TB_IQ','TB_OM','PY_HN','TB_KW','GV_ME','PO_FI','FP_SG','FP_TH','HS_SA','FY_CY','TB_JO','GV_RS','GV_UG','GV_NG','NP_HU')
  AND vendor_vertical_parent in ('Restaurant','restaurant','restaurants')
  GROUP BY 1,2,3

  """


mkt_data = """
  
  WITH holdout_entities AS (
  SELECT
        entity_id
       ,`Release Date` as release_date
  FROM `logistics-data-storage-staging.long_term_pricing.global_holdout_rollout`
  WHERE `Release Date` < DATE_SUB(CURRENT_DATE(), INTERVAL EXTRACT(DAYOFWEEK FROM CURRENT_DATE())- 2 DAY)
),
customer_information AS (
  SELECT
         e.entity_id
        ,e.release_date
        ,IF(d.created_date <= e.release_date, FALSE, COALESCE(is_customer_holdout, FALSE)) AS is_customer_holdout
        ,customer_id
  FROM `fulfillment-dwh-production.cl.dps_holdout_users` AS d
  JOIN holdout_entities AS e
        ON d.entity_id = e.entity_id
  WHERE d.created_date >= date('2025-01-01')
    AND created_date < CURRENT_DATE()  -- data is not full/not present for current date
    AND customer_id IS NOT NULL
    AND customer_id NOT IN UNNEST(ARRAY(SELECT id FROM `fulfillment-dwh-production.cl._bad_dps_logs_ids`)) -- get rid of ids like 'null', 'test' etc
  GROUP BY 1, 2, 3, 4
)
  SELECT 
     e.entity_id
    ,e.customer_id customer_id
    ,e.is_customer_holdout
    ,COUNT(DISTINCT case when mkt.order_date <= e.release_date then mkt.order_id end) AS orders_pre
    ,COUNT(DISTINCT case when mkt.order_date > e.release_date then mkt.order_id end) AS orders_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.analytical_profit end) AS analytical_profit_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.analytical_profit end) AS analytical_profit_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.local_analytical_profit end) AS local_analytical_profit_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.local_analytical_profit end) AS local_analytical_profit_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.revenue_net end) AS revenue_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.revenue_net end) AS revenue_post
    ,SUM(case when mkt.order_date <= e.release_date then mkt.cost_of_sales end) AS cost_of_sales_pre
    ,SUM(case when mkt.order_date > e.release_date then mkt.cost_of_sales end) AS cost_of_sales_post
  FROM customer_information e
  LEFT JOIN `fulfillment-dwh-production.cl.dps_sessions_mapped_to_orders` dps
    ON dps.entity_id = e.entity_id
    AND dps.dps_customer_id = e.customer_id 
  LEFT JOIN fulfillment-dwh-production.curated_data_shared_mkt.bima_order_profitability mkt
    ON mkt.global_entity_id = dps.entity_id
    AND mkt.order_id = dps.platform_order_code
  WHERE order_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
  AND order_date < CURRENT_DATE
  and created_date >= DATE_SUB(release_date, INTERVAL 8 WEEK)
  and created_date < CURRENT_DATE
  AND platform_order_code IS NOT NULL
  AND is_own_delivery
  AND is_sent
  AND e.entity_id is not null
  AND vendor_vertical_parent in ('Restaurant','restaurant','restaurants')
  GROUP BY 1,2,3
  
  """


In [430]:
project_id = "logistics-customer-staging"
logging.info(f"Initializing BigQuery client for project: {project_id}")

try:
    # Initialize BigQuery client
    client = bigquery.Client(project=project_id)
except Exception as e:
    logging.error(f"Failed to initialize BigQuery client: {e}")
    sys.exit(1)

growth_df = client.query(growth_data).to_dataframe()

mkt_df =  client.query(mkt_data).to_dataframe()

2025-02-11 22:46:45,977 INFO:Initializing BigQuery client for project: logistics-customer-staging






In [466]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind

def calculate_sustainable_growth(data, pre_profit, post_profit):
    
    data = data.copy()

    entity = data['entity_id'].iloc[0]

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=[pre_profit, post_profit])
    theta_flgp = np.cov(data_flgp[pre_profit], data_flgp[post_profit])[0, 1] / np.var(data_flgp[pre_profit])
    data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    # T-tests for significance
    t_stat_orders, p_value_orders = ttest_ind(
        data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'],
        data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'],
        equal_var=False
    )

    t_stat_flgp, p_value_flgp = ttest_ind(
        data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        equal_var=False
    )

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'incremental_orders_cuped': incremental_orders_cuped,
        'non_holdout_total_orders_cuped':non_holdout_total_orders_cuped,
        'holdout_total_orders_cuped':scaled_holdout_total_orders_cuped,
        't_stat_orders': t_stat_orders,
        'p_value_orders': p_value_orders,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'holdout_total_flgp_cuped':scaled_holdout_total_flgp_cuped,
        't_stat_flgp': t_stat_flgp,
        'p_value_flgp': p_value_flgp,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped
    }


# results = []
# for entity in mkt_df['entity_id'].unique():
#     entity_data = mkt_df[mkt_df['entity_id'] == entity]
#     result = calculate_sustainable_growth(entity_data, 'local_analytical_profit_pre','local_analytical_profit_post')
#     result['entity_id'] = entity
#     results.append(result)

# pd.DataFrame(results)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

Unnamed: 0,entity,sustainable_growth,percentage_change_orders_cuped,incremental_orders_cuped,non_holdout_total_orders_cuped,holdout_total_orders_cuped,t_stat_orders,p_value_orders,percentage_change_flgp_cuped,incremental_flgp_cuped,non_holdout_total_flgp_cuped,holdout_total_flgp_cuped,t_stat_flgp,p_value_flgp,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,entity_id
0,EF_GR,7.729787,6.11063,138111.699112,2398299.0,2260188.0,-8.072846,7.224959e-16,1.549559,33514.267634,2196340.0,2162826.0,-0.441929,0.6585492,0.956923,0.915791,2.178339,2.311449,EF_GR
1,FP_TW,1.835712,0.844197,78427.237009,9368589.0,9290161.0,-2.193176,0.02829853,0.992978,62693.155414,6376345.0,6313652.0,-1.534955,0.1248041,0.679606,0.680609,3.269494,3.297095,FP_TW
2,YS_TR,-32.182999,1.723559,143208.367893,8452083.0,8308874.0,-4.254027,2.101781e-05,24.999284,514917.054223,-1544810.0,-2059727.0,-7.681676,1.603083e-14,-0.247895,-0.182773,1.985008,2.019221,YS_TR
3,PY_AR,1.53274,0.787178,61342.206187,7854017.0,7792675.0,-2.037969,0.04155684,0.745252,102134.123058,13806770.0,13704640.0,-2.840333,0.004509081,1.758657,1.757925,2.380435,2.399173,PY_AR
4,MJM_AT,3.401775,-0.075154,-707.678628,940925.0,941632.7,0.071441,0.9430481,3.604981,118602.074819,3408552.0,3289950.0,-1.746552,0.08077751,3.493878,3.622554,2.288688,2.286968,MJM_AT
5,FP_MY,2.536091,-0.468499,-24918.010045,5293769.0,5318687.0,0.958192,0.33797,3.112697,101758.162683,3370890.0,3269132.0,-3.738859,0.000185227,0.61465,0.636766,1.9025,1.893587,FP_MY
6,PY_EC,-1.420451,1.862626,18538.614435,1013833.0,995294.7,-1.586408,0.112679,-3.122407,-27440.173565,851374.6,878814.8,1.642152,0.1006245,0.882969,0.839758,2.125594,2.165186,PY_EC
7,DJ_CZ,-0.983975,0.144272,1662.130284,1153745.0,1152083.0,-0.146085,0.8838573,-1.11407,-13753.049398,1220734.0,1234487.0,0.694165,0.4876073,1.071526,1.058062,2.250641,2.253888,DJ_CZ
8,NP_HU,-0.438168,-0.027398,-306.15843,1117159.0,1117465.0,0.026504,0.9788557,-0.409202,-3593.909816,874679.9,878273.8,-0.102614,0.918273,0.785952,0.78295,2.397249,2.396592,NP_HU
9,FP_MM,0.771945,1.48277,10888.645986,745233.7,734345.1,-1.007453,0.3137495,-0.695567,-3391.743385,484231.3,487623.0,-0.098895,0.9212277,0.664024,0.649771,2.016146,2.046041,FP_MM


In [471]:
mkt_df['profit_pre'] = mkt_df['revenue_pre'] + mkt_df['cost_of_sales_pre']
mkt_df['profit_post'] = mkt_df['revenue_pre'] + mkt_df['cost_of_sales_pre']

# Define the list of pre and post metrics
metric_pairs = [
    ('analytical_profit_pre', 'analytical_profit_post'),
    ('local_analytical_profit_pre', 'local_analytical_profit_post'),
    ('profit_pre', 'profit_post')
]

results = []

# Loop through each metric pair
for pre_metric, post_metric in metric_pairs:
    for entity in mkt_df['entity_id'].unique():
        entity_data = mkt_df[mkt_df['entity_id'] == entity]
        result = calculate_sustainable_growth(entity_data, pre_metric, post_metric)
        
        # Add entity_id and metric information to the result
        result['entity_id'] = entity
        result['metric_used'] = f"{pre_metric}_vs_{post_metric}"
        
        results.append(result)

# Convert results to DataFrame
final_results_df = pd.DataFrame(results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp[post_profit] - theta_flgp * (data_flgp[pre_profit] - data_flgp[pre_profit].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

In [475]:
final_results_df = final_results_df.dropna(subset= 'sustainable_growth')

final_results_df.to_csv('profitable_growth.csv', index=False) 

Unnamed: 0,entity,sustainable_growth,percentage_change_orders_cuped,incremental_orders_cuped,non_holdout_total_orders_cuped,holdout_total_orders_cuped,t_stat_orders,p_value_orders,percentage_change_flgp_cuped,incremental_flgp_cuped,non_holdout_total_flgp_cuped,holdout_total_flgp_cuped,t_stat_flgp,p_value_flgp,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,entity_id,metric_used
0,EF_GR,7.729787,6.110630,138111.699112,2.398299e+06,2.260188e+06,-8.072846,7.224959e-16,1.549559,33514.267634,2.196340e+06,2.162826e+06,-0.441929,6.585492e-01,0.956923,0.915791,2.178339,2.311449,EF_GR,analytical_profit_pre_vs_analytical_profit_post
1,FP_TW,1.809193,0.844197,78427.237009,9.368589e+06,9.290161e+06,-2.193176,2.829853e-02,0.966163,93392.018397,9.759671e+06,9.666278e+06,-2.048362,4.053218e-02,1.040486,1.041744,3.269494,3.297095,FP_TW,analytical_profit_pre_vs_analytical_profit_post
2,YS_TR,5.855582,1.723559,143208.367893,8.452083e+06,8.308874e+06,-4.254027,2.101781e-05,4.233997,708256.324910,1.743610e+07,1.672784e+07,-9.226161,2.926753e-20,2.013250,2.062935,1.985008,2.019221,YS_TR,analytical_profit_pre_vs_analytical_profit_post
3,PY_AR,1.428210,0.787178,61342.206187,7.854017e+06,7.792675e+06,-2.037969,4.155684e-02,0.640097,97936.653707,1.539823e+07,1.530029e+07,-2.765591,5.684768e-03,1.963420,1.960555,2.380435,2.399173,PY_AR,analytical_profit_pre_vs_analytical_profit_post
4,MJM_AT,3.401393,-0.075154,-707.678628,9.409250e+05,9.416327e+05,0.071441,9.430481e-01,3.604571,118588.565262,3.408538e+06,3.289950e+06,-1.746318,8.081812e-02,3.493878,3.622540,2.288688,2.286968,MJM_AT,analytical_profit_pre_vs_analytical_profit_post
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,PY_CR,1.904478,2.927063,6488.169666,2.281496e+05,2.216614e+05,-1.289456,1.973481e-01,-0.983731,-2354.326607,2.369720e+05,2.393263e+05,0.610485,5.416052e-01,1.079693,1.038669,1.690717,1.740205,PY_CR,profit_pre_vs_profit_post
108,FP_BD,1.998459,2.774880,21627.297884,8.010231e+05,7.793958e+05,-2.321433,2.027669e-02,-0.749793,-19700.772285,2.607793e+06,2.627493e+06,1.010311,3.123648e-01,3.371193,3.255577,1.087418,1.117592,FP_BD,profit_pre_vs_profit_post
109,PO_FI,1.977270,-0.369520,-1070.341687,2.885873e+05,2.896577e+05,0.203520,8.387389e-01,2.412316,8620.469061,3.659729e+05,3.573525e+05,-1.476519,1.399085e-01,1.233706,1.268153,1.417743,1.412504,PO_FI,profit_pre_vs_profit_post
110,PY_UY,1.588543,2.085454,17071.950813,8.356922e+05,8.186202e+05,-1.743124,8.135735e-02,-0.484402,-11733.445726,2.410519e+06,2.422252e+06,1.333777,1.823307e-01,2.958945,2.884458,2.554604,2.607879,PY_UY,profit_pre_vs_profit_post


In [470]:
mkt_df.columns

Index(['entity_id', 'customer_id', 'is_customer_holdout', 'orders_pre',
       'orders_post', 'analytical_profit_pre', 'analytical_profit_post',
       'local_analytical_profit_pre', 'local_analytical_profit_post',
       'revenue_pre', 'revenue_post', 'cost_of_sales_pre',
       'cost_of_sales_post', 'flgp_pre', 'flgp_post', 'profit_pre',
       'profit_post'],
      dtype='object')

In [None]:
import numpy as np
import pandas as pd

def calculate_sustainable_growth(data):
    
    data = data.copy()

    entity = data['entity_id'].iloc[0]

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=['flgp_pre', 'flgp_post'])
    theta_flgp = np.cov(data_flgp['flgp_pre'], data_flgp['flgp_post'])[0, 1] / np.var(data_flgp['flgp_pre'])
    data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped
    }


#calculate_sustainable_growth(growth_df)


results = []
for entity in growth_df['entity_id'].unique():
    entity_data = growth_df[growth_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    results.append(result)

pd.DataFrame(results)

In [432]:
mkt_df['flgp_pre'] = mkt_df['analytical_profit_pre']
mkt_df['flgp_post'] = mkt_df['analytical_profit_post']

# mkt_df['flgp_pre'] = mkt_df['local_analytical_profit_pre']
# mkt_df['flgp_post'] = mkt_df['local_analytical_profit_post']

# mkt_df['flgp_pre'] = mkt_df['revenue_pre'] + mkt_df['cost_of_sales_pre']
# mkt_df['flgp_post'] = mkt_df['revenue_post'] + mkt_df['cost_of_sales_post']


In [450]:
# Drop NaNs for relevant columns
cleaned_df = mkt_df.dropna(subset=['flgp_post', 'orders_post'])

# Filter for specific entity and perform groupby aggregation
a = cleaned_df[cleaned_df['entity_id'] == 'PO_FI'].groupby('is_customer_holdout').agg({
    'flgp_post': 'sum',
    'orders_post': 'sum'
}).reset_index()

# Display the result
print(a['flgp_post'] / a['orders_post'])

0    0.304443
1    0.870967
dtype: Float64


In [458]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind

# Function to calculate sustainable growth with CUPED adjustments
def calculate_sustainable_growth(data):
    data = data.copy()
    entity = data['entity_id'].iloc[0]

    # Handle missing data
    data = data.dropna(subset=['flgp_pre', 'flgp_post', 'orders_pre', 'orders_post'])

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=['flgp_pre', 'flgp_post'])
    theta_flgp = np.cov(data_flgp['flgp_pre'], data_flgp['flgp_post'])[0, 1] / np.var(data_flgp['flgp_pre'])
    data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())

    # Calculate FLGP per order
    flgp_per_order_pre = data['flgp_pre'] / data['orders_pre']
    flgp_per_order_post = data['flgp_post'] / data['orders_post']

    # Apply CUPED on FLGP per order
    theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
    flgp_per_order_post_cuped = flgp_per_order_post - theta_flgp_per_order * (flgp_per_order_pre - flgp_per_order_pre.mean())

    # Split data into holdout and non-holdout
    holdout = data['is_customer_holdout'].values
    holdout_post_cuped = flgp_per_order_post_cuped[holdout]
    non_holdout_post_cuped = flgp_per_order_post_cuped[~holdout]

    # Total FLGP (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    # Total Orders (CUPED)
    holdout_total_orders_cuped = data.loc[data['is_customer_holdout'], 'orders_post'].sum()
    non_holdout_total_orders_cuped = data.loc[~data['is_customer_holdout'], 'orders_post'].sum()

    # FLGP per Order (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped

    # Sustainable Growth Calculation using FLGP per Order CUPED
    additional_orders_from_flgp = incremental_flgp_cuped / non_holdout_post_cuped.() if non_holdout_post_cuped != 0 else np.nan
    sustainable_growth = ((incremental_orders_cuped + additional_orders_from_flgp) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped
    }

# Process data for each entity
results = []
for entity in mkt_df['entity_id'].unique():
    entity_data = mkt_df[mkt_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    results.append(result)

# Convert to DataFrame for visualization
results_df = pd.DataFrame(results)
print(results_df[['entity', 'sustainable_growth', 'holdout_flgp_per_order_cuped', 'non_holdout_flgp_per_order_cuped']])


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [456]:
results_df

Unnamed: 0,entity,sustainable_growth,incremental_orders_cuped,incremental_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,scaled_holdout_total_orders_cuped,non_holdout_total_orders_cuped,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped
0,EF_GR,5.046343,83627.809122,17829.010498,1.101382,1.065353,1977980.0,2061608,2196340.0,2178511.0
1,FP_TW,1.640263,35903.674157,118702.291842,1.098355,1.107349,8777644.0,8813548,9759671.0,9640968.0
2,YS_TR,6.483732,111649.218142,817311.71736,2.330566,2.407488,7130795.0,7242444,17436100.0,16618790.0
3,PY_AR,3.097309,109595.539217,222153.678987,2.26192,2.258145,6709378.0,6818974,15398230.0,15176080.0
4,MJM_AT,2.621031,-3493.584805,100925.289648,4.073673,4.216114,811948.6,808455,3408538.0,3307613.0
5,FP_MY,3.492495,30439.708516,117327.056615,0.898701,0.918091,4609641.0,4640081,4260018.0,4142691.0
6,PY_EC,-2.464113,8179.280983,-29555.110895,1.029443,0.984307,833181.7,841361,828157.6,857712.7
7,DJ_CZ,-2.686371,-9645.725479,-21119.33278,1.270725,1.26157,977736.7,968091,1221315.0,1242434.0
8,NP_HU,1.298831,8839.404729,3106.661052,0.92918,0.923786,937985.6,946825,874663.9,871557.3
9,FP_MM,3.239033,17440.038005,3099.569418,0.956327,0.935626,638498.0,655938,613712.4,610612.8


In [453]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind

# Function to calculate sustainable growth with CUPED adjustments
def calculate_sustainable_growth(data):
    data = data.copy()
    entity = data['entity_id'].iloc[0]

    # Handle missing data
    data = data.dropna(subset=['flgp_pre', 'flgp_post', 'orders_pre', 'orders_post'])

    # Calculate FLGP per order
    flgp_per_order_pre = data['flgp_pre'] / data['orders_pre']
    flgp_per_order_post = data['flgp_post'] / data['orders_post']

    # Apply CUPED on FLGP per order
    theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
    flgp_per_order_post_cuped = flgp_per_order_post - theta_flgp_per_order * (flgp_per_order_pre - flgp_per_order_pre.mean())

    # Split data into holdout and non-holdout
    holdout = data['is_customer_holdout'].values
    holdout_post_cuped = flgp_per_order_post_cuped[holdout]
    non_holdout_post_cuped = flgp_per_order_post_cuped[~holdout]

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = (holdout_post_cuped * data.loc[data['is_customer_holdout'], 'orders_post'].values).sum()
    non_holdout_total_flgp_cuped = (non_holdout_post_cuped * data.loc[~data['is_customer_holdout'], 'orders_post'].values).sum()

    holdout_total_orders = data.loc[data['is_customer_holdout'], 'orders_post'].sum()
    non_holdout_total_orders = data.loc[~data['is_customer_holdout'], 'orders_post'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders = (holdout_total_orders / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders if holdout_total_orders != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders if non_holdout_total_orders != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped
    incremental_orders = non_holdout_total_orders - scaled_holdout_total_orders

    # Percentage Changes (CUPED)
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan
    percentage_change_orders = ((incremental_orders) / abs(scaled_holdout_total_orders)) * 100 if scaled_holdout_total_orders != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders) * 100 if scaled_holdout_total_orders != 0 else np.nan

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'incremental_orders': incremental_orders,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'percentage_change_orders': percentage_change_orders,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'scaled_holdout_total_orders': scaled_holdout_total_orders,
        'non_holdout_total_orders': non_holdout_total_orders,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped
    }

# Process data for each entity
results = []
for entity in mkt_df['entity_id'].unique():
    entity_data = mkt_df[mkt_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    results.append(result)

# Convert to DataFrame for visualization
results_df = pd.DataFrame(results)
print(results_df[['entity', 'sustainable_growth', 'holdout_flgp_per_order_cuped', 'non_holdout_flgp_per_order_cuped']])


  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_ord

    entity  sustainable_growth  holdout_flgp_per_order_cuped  \
0    EF_GR            6.702903                      1.133566   
1    FP_TW            1.256262                      1.136943   
2    YS_TR            6.512142                      2.309508   
3    PY_AR            3.429563                      2.226056   
4   MJM_AT            2.338526                      4.205267   
5    FP_MY            4.010636                      0.845244   
6    PY_EC           -2.925274                      1.018851   
7    DJ_CZ           -3.096751                      1.367239   
8    NP_HU           -0.506632                      0.976970   
9    FP_MM            3.479059                      0.919002   
10   PY_BO            4.129248                      1.072168   
11   PY_PE            5.549271                      0.725680   
12   PY_SV            4.702858                      1.423019   
13   PY_PY           -0.734180                      1.438724   
14   FP_PH            4.572125          

  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta_flgp_per_order = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


In [454]:
results_df

Unnamed: 0,entity,sustainable_growth,incremental_orders,incremental_flgp_cuped,percentage_change_orders,percentage_change_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,scaled_holdout_total_orders,non_holdout_total_orders,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped
0,EF_GR,6.702903,83627.809122,54536.873028,4.22794,2.432325,1.133566,1.114037,1977980.0,2061608,2296707.0,2242170.0
1,FP_TW,1.256262,35903.674157,84922.655887,0.409035,0.850956,1.136943,1.141947,8777644.0,8813548,10064600.0,9979680.0
2,YS_TR,6.512142,111649.218142,843108.52579,1.565733,5.119482,2.309508,2.390317,7130795.0,7242444,17311740.0,16468630.0
3,PY_AR,3.429563,109595.539217,268691.905361,1.633468,1.799021,2.226056,2.229682,6709378.0,6818974,15204150.0,14935450.0
4,MJM_AT,2.338526,-3493.584805,97663.851226,-0.430272,2.860301,4.205267,4.344242,811948.6,808455,3512124.0,3414461.0
5,FP_MY,4.010636,30439.708516,134144.710837,0.660349,3.442899,0.845244,0.868609,4609641.0,4640081,4030416.0,3896271.0
6,PY_EC,-2.925274,8179.280983,-31619.990419,0.981692,-3.72487,1.018851,0.971365,833181.7,841361,817268.3,848888.3
7,DJ_CZ,-3.096751,-9645.725479,-27895.878518,-0.986536,-2.086766,1.367239,1.352046,977736.7,968091,1308903.0,1336799.0
8,NP_HU,-0.506632,8839.404729,-12968.402034,0.942382,-1.415172,0.97697,0.954152,937985.6,946825,903415.3,916383.7
9,FP_MM,3.479059,17440.038005,4301.68958,2.731416,0.733099,0.919002,0.901126,638498.0,655938,591082.8,586781.1


In [445]:
def analyze_cuped_flgp_per_order(data):
    results = []

    for entity in data['entity_id'].unique():
        entity_data = data[data['entity_id'] == entity].copy()

        # Handle missing data
        entity_data = entity_data.dropna(subset=['flgp_pre', 'flgp_post', 'orders_pre', 'orders_post'])

        # Calculate FLGP per order
        flgp_per_order_pre = entity_data['flgp_pre'] / entity_data['orders_pre']
        flgp_per_order_post = entity_data['flgp_post'] / entity_data['orders_post']

        # Apply CUPED
        theta = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
        flgp_per_order_post_cuped = flgp_per_order_post - theta * (flgp_per_order_pre - flgp_per_order_pre.mean())

        # Split data into holdout and non-holdout
        holdout = entity_data['is_customer_holdout'].values
        holdout_post = flgp_per_order_post[holdout]
        non_holdout_post = flgp_per_order_post[~holdout]

        holdout_post_cuped = flgp_per_order_post_cuped[holdout]
        non_holdout_post_cuped = flgp_per_order_post_cuped[~holdout]

        # T-tests
        t_stat_before, p_value_before = ttest_ind(non_holdout_post, holdout_post, equal_var=False, nan_policy='omit')
        t_stat_after, p_value_after = ttest_ind(non_holdout_post_cuped, holdout_post_cuped, equal_var=False, nan_policy='omit')

        # Averages
        avg_holdout_post = np.mean(holdout_post) if holdout_post.size > 0 else np.nan
        avg_non_holdout_post = np.mean(non_holdout_post) if non_holdout_post.size > 0 else np.nan

        avg_holdout_post_cuped = np.mean(holdout_post_cuped) if holdout_post_cuped.size > 0 else np.nan
        avg_non_holdout_post_cuped = np.mean(non_holdout_post_cuped) if non_holdout_post_cuped.size > 0 else np.nan

        # Percentage Change After CUPED
        percentage_change_cuped = ((avg_non_holdout_post_cuped - avg_holdout_post_cuped) / abs(avg_holdout_post_cuped)) * 100 if avg_holdout_post_cuped != 0 else np.nan

        results.append({
            'entity': entity,
            't_stat_before': t_stat_before,
            'p_value_before': p_value_before,
            't_stat_after': t_stat_after,
            'p_value_after': p_value_after,
            'avg_holdout_post': avg_holdout_post,
            'avg_non_holdout_post': avg_non_holdout_post,
            'avg_holdout_post_cuped': avg_holdout_post_cuped,
            'avg_non_holdout_post_cuped': avg_non_holdout_post_cuped,
            'percentage_change_cuped': percentage_change_cuped
        })

    return pd.DataFrame(results)

# Apply the function to your dataset
results_df = analyze_cuped_flgp_per_order(mkt_df)


  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  t_stat_before, p_value_before = ttest_ind(non_holdout_post, holdout_post, equal_var=False, nan_policy='omit')
  t_stat_after, p_value_after = ttest_ind(non_holdout_post_cuped, holdout_post_cuped, equal_var=False, nan_policy='omit')
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta = np.cov(flgp_per_order_pre, flgp_per_order_post)[0, 1] / np.var(flgp_per_order_pre)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  t_stat_before, p_value_before = ttest_ind(non_holdout_post, holdout_post, equal_var=False, nan_policy='omit')
  t_stat_after, p_value_after = ttest_ind(non_holdout_post_cuped, holdout_post_cuped, equal_var=False, nan_policy='omit')
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  theta = np.cov(flgp_per_order_pre, flgp_

In [443]:
results_df

Unnamed: 0,entity,t_stat_before,p_value_before,t_stat_after,p_value_after,avg_holdout_post,avg_non_holdout_post,avg_holdout_post_cuped,avg_non_holdout_post_cuped,percentage_change_cuped
0,EF_GR,-0.077374,0.9383276,-0.281913,0.778015,1.194842,1.192779,1.199559,1.192686,-0.57299
1,FP_TW,2.979098,0.002893012,3.340604,0.0008368579,1.221005,1.244268,1.221145,1.244265,1.893328
2,YS_TR,7.996709,1.309313e-15,8.324273,8.729735e-17,2.310452,2.385093,2.312665,2.385048,3.129877
3,PY_AR,0.549933,0.5823689,0.847791,0.3965596,2.262631,2.268799,2.260175,2.268849,0.383745
4,MJM_AT,0.582807,0.5600497,0.913783,0.3608759,4.597794,4.640166,4.579144,4.640545,1.34088
5,FP_MY,3.896904,9.765881e-05,3.902426,9.545808e-05,0.848267,0.876974,0.851015,0.876918,3.043747
6,PY_EC,-1.718055,0.08585159,-2.416833,0.01569351,1.074256,1.037886,1.084606,1.037679,-4.326664
7,DJ_CZ,0.104013,0.917163,-0.232128,0.8164469,1.573238,1.577266,1.584996,1.57703,-0.502567
8,NP_HU,-0.174613,0.8613904,0.177711,0.8589571,1.031977,1.025668,1.019988,1.025912,0.5808
9,FP_MM,0.125503,0.9001328,-0.199702,0.841725,0.904025,0.90591,0.908526,0.905819,-0.297966


In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.preprocessing import StandardScaler
# from sklearn.cluster import KMeans

# # Convert 'date' column to datetime
# woowa_df['created_date_local'] = pd.to_datetime(woowa_df['created_date_local'])

# # Group by 'zone_id' and aggregate the required metrics
# zone_df = woowa_df.groupby('zone_id').agg(
#     delivery_distance_actual_m=('delivery_distance_actual_m', 'mean'),
#     straight_line_distance_m=('delivery_distance_m', 'mean'),
#     total_orders=('platform_order_code', 'nunique'),
#     min_date=('created_date_local', 'min'),
#     max_date=('created_date_local', 'max'),
#     marketing_classification= ('regional_classification_id', 'max')
# ).reset_index()

# # Calculate the difference in months between min and max date for each 'zone_id'
# zone_df['total_months'] = ((zone_df['max_date'].dt.year - zone_df['min_date'].dt.year) * 12 +
#                      (zone_df['max_date'].dt.month - zone_df['min_date'].dt.month) +
#                      ((zone_df['max_date'].dt.day - zone_df['min_date'].dt.day) > 0).astype(int))

# # calculate the monthly orders for each zone
# zone_df['monthly_orders'] = zone_df['total_orders'] / zone_df['total_months']

# # 1. Log transformation for `total_orders` and 'monthly_orders'
# zone_df['log_monthly_orders'] = np.log1p(zone_df['monthly_orders'])  # log(1 + x) to handle zeros
# zone_df['log_total_orders'] = np.log1p(zone_df['total_orders'])  # log(1 + x) to handle zeros
# zone_df['log_delivery_distance_actual_m'] = np.log1p(zone_df['delivery_distance_actual_m'])  # log(1 + x) to handle zeros
# zone_df['log_straight_line_distance_m'] = np.log1p(zone_df['straight_line_distance_m'])  # log(1 + x) to handle zeros

# # # 2. Z-score scaling for the distance features
# # scaler = StandardScaler()
# # zone_df[['z_delivery_distance_actual_m', 'z_straight_line_distance_m']] = scaler.fit_transform(
# #     zone_df[['delivery_distance_actual_m', 'straight_line_distance_m']]
# # )

# # 3. K-Means Clustering
# def apply_kmeans(data, n_clusters, random_state=42):
#     model = KMeans(n_clusters=n_clusters, random_state=random_state)
#     model.fit(data)
#     return model.labels_

# # Apply K-Means for each feature independently
# zone_df['cluster_delivery_distance'] = apply_kmeans(zone_df[['log_delivery_distance_actual_m']], n_clusters=3)
# zone_df['cluster_straight_line_distance'] = apply_kmeans(zone_df[['log_straight_line_distance_m']], n_clusters=3)
# zone_df['cluster_log_monthly_orders'] = apply_kmeans(zone_df[['log_monthly_orders']], n_clusters=3)
# zone_df['cluster_log_total_orders'] = apply_kmeans(zone_df[['log_total_orders']], n_clusters=3)
# zone_df['cluster_log_order_straight_d'] = apply_kmeans(zone_df[['log_total_orders','log_straight_line_distance_m']], n_clusters=4)

TypeError: agg function failed [how->mean,dtype->object]

In [433]:
import numpy as np
import pandas as pd

def calculate_sustainable_growth(data):
    
    data = data.copy()

    entity = data['entity_id'].iloc[0]

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=['flgp_pre', 'flgp_post'])
    theta_flgp = np.cov(data_flgp['flgp_pre'], data_flgp['flgp_post'])[0, 1] / np.var(data_flgp['flgp_pre'])
    data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped
    }


#calculate_sustainable_growth(growth_df)


results = []
for entity in mkt_df['entity_id'].unique():
    entity_data = mkt_df[mkt_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    results.append(result)

pd.DataFrame(results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

Unnamed: 0,entity,sustainable_growth,incremental_orders_cuped,incremental_flgp_cuped,percentage_change_orders_cuped,percentage_change_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,scaled_holdout_total_orders_cuped,non_holdout_total_orders_cuped,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped
0,EF_GR,7.729787,138111.699112,33514.267634,6.11063,1.549559,0.956923,0.915791,2.178339,2.311449,2260188.0,2398299.0,2196340.0,2162826.0
1,FP_TW,1.809193,78427.237009,93392.018397,0.844197,0.966163,1.040486,1.041744,3.269494,3.297095,9290161.0,9368589.0,9759671.0,9666278.0
2,YS_TR,5.855582,143208.367893,708256.32491,1.723559,4.233997,2.01325,2.062935,1.985008,2.019221,8308874.0,8452083.0,17436100.0,16727840.0
3,PY_AR,1.42821,61342.206187,97936.653707,0.787178,0.640097,1.96342,1.960555,2.380435,2.399173,7792675.0,7854017.0,15398230.0,15300290.0
4,MJM_AT,3.401393,-707.678628,118588.565262,-0.075154,3.604571,3.493878,3.62254,2.288688,2.286968,941632.7,940925.0,3408538.0,3289950.0
5,FP_MY,1.814275,-24918.010045,97704.348202,-0.468499,2.347357,0.782583,0.804723,1.9025,1.893587,5318687.0,5293769.0,4260018.0,4162314.0
6,PY_EC,-1.462751,18538.614435,-27035.783598,1.862626,-3.161365,0.859236,0.816858,2.125594,2.165186,995294.7,1013833.0,828157.6,855193.4
7,DJ_CZ,-0.925941,1662.130284,-13051.833049,0.144272,-1.057371,1.071422,1.058566,2.250641,2.253888,1152083.0,1153745.0,1221315.0,1234367.0
8,NP_HU,-0.439964,-306.15843,-3609.561069,-0.027398,-0.410984,0.785951,0.782936,2.397249,2.396592,1117465.0,1117159.0,874663.9,878273.5
9,FP_MM,1.063984,10888.645986,-2532.584785,1.48277,-0.41097,0.839176,0.823517,2.016146,2.046041,734345.1,745233.7,613712.4,616245.0


In [440]:
import numpy as np
import pandas as pd

def calculate_sustainable_growth(data):
    
    data = data.copy()

    entity = data['entity_id'].iloc[0]

    # Ensure consistent data for CUPED (both FLGP and Orders must have non-null values)
    consistent_data = data.copy()

    # Apply CUPED to FLGP
    theta_flgp = np.cov(consistent_data['flgp_pre'], consistent_data['flgp_post'])[0, 1] / np.var(consistent_data['flgp_pre'])
    consistent_data['flgp_post_cuped'] = consistent_data['flgp_post'] - theta_flgp * (consistent_data['flgp_pre'] - consistent_data['flgp_pre'].mean())

    # Apply CUPED to Orders
    theta_orders = np.cov(consistent_data['orders_pre'], consistent_data['orders_post'])[0, 1] / np.var(consistent_data['orders_pre'])
    consistent_data['orders_post_cuped'] = consistent_data['orders_post'] - theta_orders * (consistent_data['orders_pre'] - consistent_data['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = consistent_data.loc[consistent_data['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = consistent_data.loc[~consistent_data['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = consistent_data.loc[consistent_data['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = consistent_data.loc[~consistent_data['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = consistent_data['is_customer_holdout'].sum()
    non_holdout_user_count = (~consistent_data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped
    }


# Process data for each entity
results = []
for entity in mkt_df['entity_id'].unique():
    entity_data = mkt_df[mkt_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    results.append(result)

# Convert to DataFrame for visualization
pd.DataFrame(results)


  sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
  sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
  sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
  sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
  sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if s

Unnamed: 0,entity,sustainable_growth,incremental_orders_cuped,incremental_flgp_cuped,percentage_change_orders_cuped,percentage_change_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,scaled_holdout_total_orders_cuped,non_holdout_total_orders_cuped,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped
0,EF_GR,,138111.699112,0.0,6.11063,,0.0,0.0,2.178339,2.311449,2260188.0,2398299.0,0.0,0.0
1,FP_TW,,78427.237009,0.0,0.844197,,0.0,0.0,3.269494,3.297095,9290161.0,9368589.0,0.0,0.0
2,YS_TR,,143208.367893,0.0,1.723559,,0.0,0.0,1.985008,2.019221,8308874.0,8452083.0,0.0,0.0
3,PY_AR,,61342.206187,0.0,0.787178,,0.0,0.0,2.380435,2.399173,7792675.0,7854017.0,0.0,0.0
4,MJM_AT,,-707.678628,0.0,-0.075154,,0.0,0.0,2.288688,2.286968,941632.7,940925.0,0.0,0.0
5,FP_MY,,-24918.010045,0.0,-0.468499,,0.0,0.0,1.9025,1.893587,5318687.0,5293769.0,0.0,0.0
6,PY_EC,,18538.614435,0.0,1.862626,,0.0,0.0,2.125594,2.165186,995294.7,1013833.0,0.0,0.0
7,DJ_CZ,,1662.130284,0.0,0.144272,,0.0,0.0,2.250641,2.253888,1152083.0,1153745.0,0.0,0.0
8,NP_HU,,-306.15843,0.0,-0.027398,,0.0,0.0,2.397249,2.396592,1117465.0,1117159.0,0.0,0.0
9,FP_MM,,10888.645986,0.0,1.48277,,0.0,0.0,2.016146,2.046041,734345.1,745233.7,0.0,0.0


In [434]:
import numpy as np
import pandas as pd

def calculate_sustainable_growth(data):
    
    data = data.copy()

    entity = data['entity_id'].iloc[0]

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=['flgp_pre', 'flgp_post'])
    theta_flgp = np.cov(data_flgp['flgp_pre'], data_flgp['flgp_post'])[0, 1] / np.var(data_flgp['flgp_pre'])
    data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    return {
        'entity': entity,
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped
    }


#calculate_sustainable_growth(growth_df)


results = []
for entity in growth_df['entity_id'].unique():
    entity_data = growth_df[growth_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    results.append(result)

pd.DataFrame(results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

Unnamed: 0,entity,sustainable_growth,incremental_orders_cuped,incremental_flgp_cuped,percentage_change_orders_cuped,percentage_change_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,scaled_holdout_total_orders_cuped,non_holdout_total_orders_cuped,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped
0,TB_KW,5.20573,62630.521326,278085.989648,1.369434,3.933325,1.545875,1.584974,4.942683,5.010369,4573459.0,4636090.0,7348083.0,7069997.0
1,NP_HU,-6.087691,-451.545453,-22097.966183,-0.040403,-5.704617,0.346611,0.32697,2.397477,2.396508,1117593.0,1117142.0,365271.9,387369.8
2,PY_AR,1.959808,61035.332413,131408.977231,0.774959,1.189726,1.402412,1.408184,2.403514,2.422141,7875942.0,7936977.0,11176730.0,11045320.0
3,AP_PA,-1.117159,18120.753392,-42890.356423,1.96536,-2.934394,1.585284,1.509106,2.538482,2.588372,922007.0,940127.8,1418752.0,1461643.0
4,EF_GR,7.729787,138111.699112,33514.267634,6.11063,1.549559,0.956923,0.915791,2.178339,2.311449,2260188.0,2398299.0,2196340.0,2162826.0
5,FP_TH,3.69653,12711.235313,21956.00623,0.858698,2.895131,0.512316,0.52266,2.086541,2.104458,1480292.0,1493003.0,780333.0,758377.0
6,HF_EG,10.187228,306731.794673,174457.213054,4.395453,5.873791,0.425613,0.43164,3.575357,3.73251,6978388.0,7285120.0,3144553.0,2970096.0
7,PY_PE,-1.055107,22710.521546,-13998.599588,1.404214,-2.367838,0.365543,0.351946,2.114728,2.144424,1617312.0,1640023.0,577198.9,591197.5
8,TB_AE,2.196914,111908.709401,222609.221847,1.202605,0.992242,2.41093,2.405919,5.003414,5.063585,9305524.0,9417433.0,22657580.0,22434970.0
9,TB_JO,-0.896654,-36440.282263,3775.976886,-2.378375,1.541213,0.159906,0.166327,2.34172,2.286025,1532150.0,1495710.0,248776.2,245000.3


In [377]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind

def calculate_sustainable_growth(data):
    
    data = data.copy()

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=['flgp_pre', 'flgp_post'])
    theta_flgp = np.cov(data_flgp['flgp_pre'], data_flgp['flgp_post'])[0, 1] / np.var(data_flgp['flgp_pre'])
    data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # T-tests for significance
    t_stat_orders, p_value_orders = ttest_ind(
        data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'],
        data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'],
        equal_var=False
    )

    t_stat_flgp, p_value_flgp = ttest_ind(
        data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        equal_var=False
    )

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation with significance filter
    if p_value_orders < 0.05 and p_value_flgp < 0.05:
        sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    elif p_value_orders < 0.05:
        sustainable_growth = (incremental_orders_cuped / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    elif p_value_flgp < 0.05:
        sustainable_growth = ((incremental_flgp_cuped / non_holdout_flgp_per_order_cuped) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    else:
        sustainable_growth = 0

    return {
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped if p_value_orders < 0.05 else 0,
        'incremental_flgp_cuped': incremental_flgp_cuped if p_value_flgp < 0.05 else 0,
        'percentage_change_orders_cuped': percentage_change_orders_cuped if p_value_orders < 0.05 else 0,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped if p_value_flgp < 0.05 else 0,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped,
        't_stat_orders': t_stat_orders,
        'p_value_orders': p_value_orders,
        't_stat_flgp': t_stat_flgp,
        'p_value_flgp': p_value_flgp
    }


results = []
for entity in growth_df['entity_id'].unique():
    entity_data = growth_df[growth_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    result['entity_id'] = entity
    results.append(result)

pd.DataFrame(results)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

Unnamed: 0,sustainable_growth,incremental_orders_cuped,incremental_flgp_cuped,percentage_change_orders_cuped,percentage_change_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,scaled_holdout_total_orders_cuped,non_holdout_total_orders_cuped,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped,t_stat_orders,p_value_orders,t_stat_flgp,p_value_flgp,entity_id
0,5.20573,62630.521326,278085.989648,1.369434,3.933325,1.545875,1.584974,4.942683,5.010369,4573459.0,4636090.0,7348083.0,7069997.0,-2.40529,0.016169,-3.102915,0.001920087,TB_KW
1,0.0,0.0,0.0,0.0,0.0,0.512316,0.52266,2.086541,2.104458,1480292.0,1493003.0,780333.0,758377.0,-0.811431,0.417131,-0.841388,0.4001645,FP_TH
2,0.0,0.0,0.0,0.0,0.0,0.346611,0.32697,2.397477,2.396508,1117593.0,1117142.0,365271.9,387369.8,0.039099,0.968813,0.653789,0.5132763,NP_HU
3,2.093355,106787.858908,0.0,2.093355,0.0,1.157772,1.116935,1.547378,1.57977,5101277.0,5208065.0,5817068.0,5906114.0,-4.366475,1.3e-05,1.429839,0.1527738,HS_SA
4,152.711307,-50934.931689,567724.803717,-2.303958,270.442022,-0.094956,0.165662,3.900626,3.810757,2210758.0,2159823.0,357800.0,-209924.8,2.495507,0.012591,-8.735763,3.025296e-18,FP_SG
5,0.0,0.0,0.0,0.0,0.0,1.322854,1.256943,3.030671,2.996799,483970.9,478561.9,601524.9,640222.8,0.645188,0.518851,0.96297,0.3356803,FY_CY
6,0.0,0.0,0.0,0.0,0.0,0.696914,0.696624,2.695149,2.735235,1036619.0,1052037.0,732873.7,722434.4,-1.074723,0.282531,-0.711069,0.4770838,FP_MM
7,-2.378375,-36440.282263,0.0,-2.378375,0.0,0.159906,0.166327,2.34172,2.286025,1532150.0,1495710.0,248776.2,245000.3,2.561887,0.010421,-0.277358,0.7815132,TB_JO
8,0.0,0.0,0.0,0.0,0.0,0.630914,0.649315,3.748408,3.74849,1781791.0,1781830.0,1156968.0,1124157.0,-0.002414,0.998074,-0.746264,0.4555359,TB_OM
9,2.215725,43126.673303,0.0,2.215725,0.0,0.648727,0.657167,2.909673,2.974143,1946391.0,1989518.0,1307445.0,1262677.0,-2.534445,0.011273,-1.556027,0.1197412,TB_IQ


In [374]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind

def calculate_sustainable_growth(data):
    
    data = data.copy()

    # Apply CUPED to FLGP
    data_flgp = data.dropna(subset=['flgp_pre', 'flgp_post'])
    theta_flgp = np.cov(data_flgp['flgp_pre'], data_flgp['flgp_post'])[0, 1] / np.var(data_flgp['flgp_pre'])
    data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())

    # Apply CUPED to Orders
    data_orders = data.dropna(subset=['orders_pre', 'orders_post'])
    theta_orders = np.cov(data_orders['orders_pre'], data_orders['orders_post'])[0, 1] / np.var(data_orders['orders_pre'])
    data_orders['orders_post_cuped'] = data_orders['orders_post'] - theta_orders * (data_orders['orders_pre'] - data_orders['orders_pre'].mean())

    # Total FLGP and Orders (CUPED)
    holdout_total_flgp_cuped = data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()
    non_holdout_total_flgp_cuped = data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'].sum()

    holdout_total_orders_cuped = data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()
    non_holdout_total_orders_cuped = data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'].sum()

    # Normalize for Population Differences
    holdout_user_count = data['is_customer_holdout'].sum()
    non_holdout_user_count = (~data['is_customer_holdout']).sum()

    scaled_holdout_total_flgp_cuped = (holdout_total_flgp_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan
    scaled_holdout_total_orders_cuped = (holdout_total_orders_cuped / holdout_user_count) * non_holdout_user_count if holdout_user_count != 0 else np.nan

    # FLGP per Order and Orders per User (CUPED)
    holdout_flgp_per_order_cuped = holdout_total_flgp_cuped / holdout_total_orders_cuped if holdout_total_orders_cuped != 0 else np.nan
    non_holdout_flgp_per_order_cuped = non_holdout_total_flgp_cuped / non_holdout_total_orders_cuped if non_holdout_total_orders_cuped != 0 else np.nan

    holdout_orders_per_user_cuped = holdout_total_orders_cuped / holdout_user_count if holdout_user_count != 0 else np.nan
    non_holdout_orders_per_user_cuped = non_holdout_total_orders_cuped / non_holdout_user_count if non_holdout_user_count != 0 else np.nan

    # Incremental Differences (CUPED)
    incremental_orders_cuped = non_holdout_total_orders_cuped - scaled_holdout_total_orders_cuped
    incremental_flgp_cuped = non_holdout_total_flgp_cuped - scaled_holdout_total_flgp_cuped

    # Percentage Changes (CUPED)
    percentage_change_orders_cuped = ((incremental_orders_cuped) / abs(scaled_holdout_total_orders_cuped)) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan
    percentage_change_flgp_cuped = ((incremental_flgp_cuped) / abs(scaled_holdout_total_flgp_cuped)) * 100 if scaled_holdout_total_flgp_cuped != 0 else np.nan

    # Sustainable Growth Calculation
    sustainable_growth = ((incremental_orders_cuped + (incremental_flgp_cuped / non_holdout_flgp_per_order_cuped)) / scaled_holdout_total_orders_cuped) * 100 if scaled_holdout_total_orders_cuped != 0 else np.nan

    # T-tests for significance
    t_stat_orders, p_value_orders = ttest_ind(
        data_orders.loc[data_orders['is_customer_holdout'], 'orders_post_cuped'],
        data_orders.loc[~data_orders['is_customer_holdout'], 'orders_post_cuped'],
        equal_var=False
    )

    t_stat_flgp, p_value_flgp = ttest_ind(
        data_flgp.loc[data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        data_flgp.loc[~data_flgp['is_customer_holdout'], 'flgp_post_cuped'],
        equal_var=False
    )

    return {
        'sustainable_growth': sustainable_growth,
        'incremental_orders_cuped': incremental_orders_cuped,
        'incremental_flgp_cuped': incremental_flgp_cuped,
        'percentage_change_orders_cuped': percentage_change_orders_cuped,
        'percentage_change_flgp_cuped': percentage_change_flgp_cuped,
        'holdout_flgp_per_order_cuped': holdout_flgp_per_order_cuped,
        'non_holdout_flgp_per_order_cuped': non_holdout_flgp_per_order_cuped,
        'holdout_orders_per_user_cuped': holdout_orders_per_user_cuped,
        'non_holdout_orders_per_user_cuped': non_holdout_orders_per_user_cuped,
        'scaled_holdout_total_orders_cuped': scaled_holdout_total_orders_cuped,
        'non_holdout_total_orders_cuped': non_holdout_total_orders_cuped,
        'non_holdout_total_flgp_cuped': non_holdout_total_flgp_cuped,
        'scaled_holdout_total_flgp_cuped': scaled_holdout_total_flgp_cuped,
        't_stat_orders': t_stat_orders,
        'p_value_orders': p_value_orders,
        't_stat_flgp': t_stat_flgp,
        'p_value_flgp': p_value_flgp
    }


results = []
for entity in growth_df['entity_id'].unique():
    entity_data = growth_df[growth_df['entity_id'] == entity]
    result = calculate_sustainable_growth(entity_data)
    result['entity_id'] = entity
    results.append(result)

pd.DataFrame(results)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_flgp['flgp_post_cuped'] = data_flgp['flgp_post'] - theta_flgp * (data_flgp['flgp_pre'] - data_flgp['flgp_pre'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

Unnamed: 0,sustainable_growth,incremental_orders_cuped,incremental_flgp_cuped,percentage_change_orders_cuped,percentage_change_flgp_cuped,holdout_flgp_per_order_cuped,non_holdout_flgp_per_order_cuped,holdout_orders_per_user_cuped,non_holdout_orders_per_user_cuped,scaled_holdout_total_orders_cuped,non_holdout_total_orders_cuped,non_holdout_total_flgp_cuped,scaled_holdout_total_flgp_cuped,t_stat_orders,p_value_orders,t_stat_flgp,p_value_flgp,entity_id
0,5.20573,62630.521326,278085.989648,1.369434,3.933325,1.545875,1.584974,4.942683,5.010369,4573459.0,4636090.0,7348083.0,7069997.0,-2.40529,0.016169,-3.102915,0.001920087,TB_KW
1,3.69653,12711.235313,21956.00623,0.858698,2.895131,0.512316,0.52266,2.086541,2.104458,1480292.0,1493003.0,780333.0,758377.0,-0.811431,0.417131,-0.841388,0.4001645,FP_TH
2,-6.087691,-451.545453,-22097.966183,-0.040403,-5.704617,0.346611,0.32697,2.397477,2.396508,1117593.0,1117142.0,365271.9,387369.8,0.039099,0.968813,0.653789,0.5132763,NP_HU
3,0.530547,106787.858908,-89045.569885,2.093355,-1.507685,1.157772,1.116935,1.547378,1.57977,5101277.0,5208065.0,5817068.0,5906114.0,-4.366475,1.3e-05,1.429839,0.1527738,HS_SA
4,152.711307,-50934.931689,567724.803717,-2.303958,270.442022,-0.094956,0.165662,3.900626,3.810757,2210758.0,2159823.0,357800.0,-209924.8,2.495507,0.012591,-8.735763,3.025296e-18,FP_SG
5,-7.479041,-5409.04049,-38697.924111,-1.117637,-6.044446,1.322854,1.256943,3.030671,2.996799,483970.9,478561.9,601524.9,640222.8,0.645188,0.518851,0.96297,0.3356803,FY_CY
6,2.932932,15417.786582,10439.286924,1.487315,1.445015,0.696914,0.696624,2.695149,2.735235,1036619.0,1052037.0,732873.7,722434.4,-1.074723,0.282531,-0.711069,0.4770838,FP_MM
7,-0.896654,-36440.282263,3775.976886,-2.378375,1.541213,0.159906,0.166327,2.34172,2.286025,1532150.0,1495710.0,248776.2,245000.3,2.561887,0.010421,-0.277358,0.7815132,TB_JO
8,2.838187,39.229546,32810.726563,0.002202,2.918695,0.630914,0.649315,3.748408,3.74849,1781791.0,1781830.0,1156968.0,1124157.0,-0.002414,0.998074,-0.746264,0.4555359,TB_OM
9,5.715644,43126.673303,44767.575477,2.215725,3.545449,0.648727,0.657167,2.909673,2.974143,1946391.0,1989518.0,1307445.0,1262677.0,-2.534445,0.011273,-1.556027,0.1197412,TB_IQ
