### Import packages

In [1]:
import pandas as pd
from google.cloud import bigquery
from google.cloud import bigquery_storage
from datetime import datetime
import warnings
warnings.filterwarnings(action="ignore")

### User-defined global inputs

In [7]:
billing_project = "logistics-customer-staging" # The project that is displayed at the top of your BQ console
dataset = "cl" # For Global teams, use "cl". For local and regional teams, use "curated_data_shared"

# Entity of choice
entities_of_choice = ["HS_SA"] # Choose the entity ID(s) that have the tests you want to analyze

# You can define a custom test list here. If you don't, the script will analyze ALL AB tests in the country of choice with the word elasticity in them.
is_use_defined_test_names = True
custom_test_list = ["SA_20230712_R_00_O_Riyadh_ Locals elasticity test"]

# Test keyword
test_keyword_list = ["elasticity", "customer_location"] # If "is_use_defined_test_names" = False, the script will analyze all tests with these keywords in the test name

#############################################################################################################################################  
**<center>#--------STARTING HERE, YOU DON'T NEED TO DEFINE ANY OTHER INPUTS--------#</center>**  
#############################################################################################################################################

### Instantiate a BQ client

In [8]:
client = bigquery.Client(project=billing_project)
bqstorage_client = bigquery_storage.BigQueryReadClient()

### Pull the names of the elasticity tests

In [9]:
test_name_query = """
    SELECT DISTINCT
        entity_id,
        test_name,
        test_id,
        test_start_date,
        test_end_date,
        experiment_type
    FROM `fulfillment-dwh-production.{dataset}.dps_experiment_setups`
    WHERE LOWER(experiment_type) = "ab" AND entity_id IN ("{entities_of_choice}")
    ORDER BY test_end_date DESC
""".format(
    dataset=dataset,
    entities_of_choice='", "'.join(entities_of_choice)
)

df_test_names = pd.DataFrame(client.query(query=test_name_query).result().to_dataframe(progress_bar_type="tqdm", bqstorage_client=bqstorage_client))

# If is_use_defined_test_names == True, overwrite list_test_names with the custom_test_list. If it is False, filter the test names by the test_keyword_list
if is_use_defined_test_names == True:
    df_test_names = df_test_names[df_test_names["test_name"].isin(custom_test_list)]
else:
    df_test_names = df_test_names[df_test_names["test_name"].str.contains("|".join(test_keyword_list), case=False)]

# Create a list of test names out of df_test_names
list_test_names = df_test_names["test_name"].tolist()
list_test_ids = df_test_names["test_id"].tolist()

# Define the lookback window for the analysis based on the minimum test start date
lookback_period_start_date = datetime.strftime(df_test_names["test_start_date"].min().date(), "%Y-%m-%d")
lookback_period_end_date = datetime.strftime(df_test_names["test_end_date"].max().date(), "%Y-%m-%d")

Downloading: 100%|[32m██████████[0m|


### Pull the test data

In [20]:
test_data_query = """
	WITH cvr_raw_data AS (
		SELECT
			ent.segment AS region,
			x.entity_id,
			x.created_date,
			x.perseus_session_id,
			x.perseus_session_created_at,
			x.sessions.perseus_client_id,
			x.dps_session_id,
			x.sessions.variant AS test_variant,
			x.sessions.experiment_id,
			tst.test_name,
			x.perseus_location,
			
			e.vendor_code,
			e.event_action,
			e.event_time,
			e.vendor_delivery_fee,
			sch.scheme_id,
			sch.vendor_price_scheme_type
		FROM `fulfillment-dwh-production.{dataset}.dps_sessions_mapped_to_perseus_sessions` x
		LEFT JOIN UNNEST(events) AS e
		LEFT JOIN UNNEST(schemes) AS sch
		LEFT JOIN `fulfillment-dwh-production.{dataset}.dps_experiment_setups` tst ON x.entity_id = tst.entity_id AND x.sessions.experiment_id = tst.test_id
		INNER JOIN `fulfillment-dwh-production.curated_data_shared_coredata.global_entities` ent ON x.entity_id = ent.global_entity_id -- Get the region associated with every entity_id
		WHERE TRUE
			AND x.created_date BETWEEN DATE("{lookback_period_start_date}") AND DATE("{lookback_period_end_date}") -- Filter for the right lookback window
			AND tst.test_name IN ("{test_names}") -- Filter for the right tests
			AND x.sessions.variant != "Original" -- Exclude orders from ASAs
			AND e.vertical_parent_in_test = TRUE -- Only include sessions that have the same parent vertical as the one configured in the test
		-- This part eliminates duplicate events for the same session, event_action, and vendor code
		QUALIFY ROW_NUMBER() OVER (
			PARTITION BY x.entity_id, tst.test_name, x.perseus_session_id, x.sessions.perseus_client_id, e.event_action, e.vendor_code
			ORDER BY e.event_time DESC
		) = 1
    ),
    
    cvr_agg_data AS (
		SELECT
			region,
			entity_id,
			test_name,
			test_variant,
			COALESCE(ROUND(COUNT(DISTINCT CASE WHEN event_action = "transaction" THEN perseus_session_id END) / NULLIF(COUNT(DISTINCT perseus_session_id), 0), 5), 0) AS cvr,
			COALESCE(
				ROUND(
					COUNT(DISTINCT CASE WHEN event_action = "transaction" THEN perseus_session_id END) /
					NULLIF(COUNT(DISTINCT CASE WHEN event_action = "shop_details.loaded" THEN perseus_session_id END), 0)
				, 5)
			, 0) AS cvr3
		FROM cvr_raw_data
		GROUP BY 1, 2, 3, 4
    ),
    
    orders_raw_data AS (
		SELECT
			-- Identifiers and supplementary fields     
			-- Date and time
			a.created_date AS created_date_utc,
			
			-- Location of order
			ent.segment AS region,
			a.entity_id,
            a.city_name,
			a.city_id,
			a.zone_name,
			a.zone_id,

			-- Order/customer/vendor identifiers
			a.test_variant,
			a.test_id,
			b.test_name,
			a.platform_order_code,
			a.scheme_id,
			a.vendor_price_scheme_type,	-- The assignment type of the scheme to the vendor during the time of the order, such as "Automatic", "Manual", "Campaign", and "Country Fallback"
			a.vendor_id,
			
			-- Business KPIs
			a.delivery_fee_local
		FROM `fulfillment-dwh-production.{dataset}.dps_sessions_mapped_to_orders` a
		LEFT JOIN `fulfillment-dwh-production.{dataset}.dps_test_orders` b ON a.entity_id = b.entity_id AND a.order_id = b.order_id
		INNER JOIN `fulfillment-dwh-production.curated_data_shared_coredata.global_entities` ent ON a.entity_id = ent.global_entity_id -- Get the region associated with every entity_id
		WHERE TRUE
			AND a.created_date BETWEEN DATE("{lookback_period_start_date}") AND DATE("{lookback_period_end_date}")
			AND b.test_name IN ("{test_names}")
			AND a.is_own_delivery -- OD orders only
			AND a.test_variant != "Original" -- Exclude orders from ASAs
			AND a.is_match_test_vertical
	),
    
    orders_agg_data AS (
		SELECT
        	region,
			entity_id,
			test_name,
			test_variant,
            AVG(delivery_fee_local) AS avg_df_local,
		FROM orders_raw_data
		GROUP BY 1, 2, 3, 4
    )
    
    -- Join the CVR and order tables
    SELECT
		cvr.region,
		cvr.entity_id,
		cvr.test_name,
		cvr.test_variant,
		cvr.cvr,
		ord.avg_df_local
	FROM cvr_agg_data cvr
	LEFT JOIN orders_agg_data ord USING(entity_id, test_name, test_variant)
	ORDER BY 1, 2, 3, 4
""".format(
    dataset=dataset,
    lookback_period_start_date=lookback_period_start_date,
    lookback_period_end_date=lookback_period_end_date,
    test_names = '", "'.join(list_test_names)
)

df_test_data = pd.DataFrame(client.query(query=test_data_query).result().to_dataframe(bqstorage_client=bqstorage_client, progress_bar_type="tqdm"))

# Print the length of the data frame
print(f"The length of the data frame is: {len(df_test_data)}")

# Display the head of df_test_data
df_test_data.head()

Downloading: 100%|[32m██████████[0m|
The length of the data frame is: 5


Unnamed: 0,region,entity_id,test_name,test_variant,cvr,avg_df_local
0,MENA,HS_SA,SA_20230712_R_00_O_Riyadh_ Locals elasticity test,Control,0.37435,17.245147208
1,MENA,HS_SA,SA_20230712_R_00_O_Riyadh_ Locals elasticity test,Variation1,0.36915,18.985161982
2,MENA,HS_SA,SA_20230712_R_00_O_Riyadh_ Locals elasticity test,Variation2,0.37064,18.131288756
3,MENA,HS_SA,SA_20230712_R_00_O_Riyadh_ Locals elasticity test,Variation3,0.37822,16.358055118
4,MENA,HS_SA,SA_20230712_R_00_O_Riyadh_ Locals elasticity test,Variation4,0.37986,15.469319561
