# Introduction

In [1]:
# load packages
#import folium
#from folium import FeatureGroup, LayerControl
#from folium.plugins import TimestampedGeoJson
#from folium import plugins
import datetime
import json
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.io import gbq
from google.cloud import bigquery
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
#import geopandas
from shapely import wkt
import scipy
from scipy import signal
%matplotlib inline
from google.colab import drive # to allow colab save file in my drive
drive.mount('/content/drive')
from google.colab import auth
auth.authenticate_user()
print('Authenticated')
cwd=os.getcwd()
cwd
%unload_ext google.colab.data_table

Mounted at /content/drive
Authenticated
The google.colab.data_table extension is not loaded.


In [2]:
# define project id
project_id = "fulfillment-dwh-production"
client = bigquery.Client(project = project_id)
%cd "/content/drive/Shared drives/Global Pricing/2 - Entities/APAC/Singapore/7. DPS testing/1. DPS_ABtest_20200817"
%pwd
%ls

[Errno 2] No such file or directory: '/content/drive/Shared drives/Global Pricing/2 - Entities/APAC/Singapore/7. DPS testing/1. DPS_ABtest_20200817'
/content
adc.json  [0m[01;34mdrive[0m/  [01;34msample_data[0m/


# 2. Data analysis

## Sanity Check

In [None]:
# Pre-condition check
query_user = """
WITH city_data AS (
  SELECT p.entity_id
    , country_code
    , ci.name AS city_name
    , ci.id AS city_id
    , zo.shape AS zone_shape 
    , zo.name AS zone_name
    , zo.id AS zone_id
  FROM cl.countries co
  LEFT JOIN UNNEST(co.platforms) p
  LEFT JOIN UNNEST(co.cities) ci
  LEFT JOIN UNNEST(ci.zones) zo
  WHERE country_code = "sg"
  and zo.id = 35
)
 
SELECT  
cast(DATETIME(created_at, "Asia/Singapore") as date) as local_date
--, extract(hour from created_at) as hour
, customer.variant
, count(distinct customer.id) as user_count
FROM `fulfillment-dwh-production.cl.dynamic_pricing_user_sessions` s
left join city_data cd ON s.entity_id = cd.entity_id
WHERE cast(DATETIME(created_at, "Asia/Singapore") as date) between "2020-08-21" and "2020-08-31"
and created_date between "2020-08-20" and "2020-09-01"
and s.entity_id = "FP_SG"
and customer.variant in ("Variation1", "Control")
and ST_CONTAINS(cd.zone_shape, customer.location) IS TRUE
group by 1,2
    """

user = client.query(query_user).to_dataframe()
user.head()

Unnamed: 0,local_date,variant,user_count
0,2020-08-22,Variation1,4023
1,2020-08-29,Variation1,4026
2,2020-08-24,Control,13704
3,2020-08-30,Control,15777
4,2020-08-23,Control,16846


In [None]:
user_group = user.pivot_table("user_count", index=["local_date"], columns ="variant", aggfunc = "sum")
user_group = user_group.reset_index()
user_group["Control1"] = user_group["Control"]/4
user_group["delta"]=user_group["Variation1"]/user_group["Control1"]-1
#user_group['delta'] = user_group['delta'].astype(float).map(lambda n: '{:.2%}'.format(n))
user_group

variant,local_date,Control,Variation1,Control1,delta
0,2020-08-21,14115,3539,3528.75,0.002905
1,2020-08-22,15810,4023,3952.5,0.017837
2,2020-08-23,16846,4233,4211.5,0.005105
3,2020-08-24,13704,3497,3426.0,0.020724
4,2020-08-25,13820,3511,3455.0,0.016208
5,2020-08-26,13480,3451,3370.0,0.024036
6,2020-08-27,13042,3438,3260.5,0.05444
7,2020-08-28,14010,3580,3502.5,0.022127
8,2020-08-29,15712,4026,3928.0,0.024949
9,2020-08-30,15777,4175,3944.25,0.058503


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =user_group.local_date, y=user_group.Control1, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =user_group.local_date, y=user_group.Variation1, mode="lines", name ="variation"), secondary_y=False)
fig.add_trace(go.Scatter(x =user_group.local_date, y=user_group.delta, mode="lines", name ="delta", line = dict(color='royalblue', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Daily user amount", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

## 2.1. Economics Analysis 

In [6]:
query1 = """
WITH 

costs as (
  select
    p.entity_id,
    l.platform_order_code platform_order_code,
    sum(p.delivery_costs) delivery_costs,
    sum(p.delivery_costs_eur) delivery_costs_eur
  from 
    (select
      entity_id,
      country_code,
      created_date,
      order_id,
      delivery_costs,
      delivery_costs_eur,
      row_number() over(partition by entity_id, order_id order by created_date desc) as rank
    from cl.utr_timings) p
  left join cl.orders l on p.order_id = l.order_id and p.country_code = l.country_code
  where p.entity_id = "FP_SG" and p.country_code = "sg"
    and rank = 1
  group by 1,2)

select
zone_name
, zone_id
, vertical_type
, operating_system
--, created_date
, cast(DATETIME(created_at, timezone) as date) as local_date
, extract(hour from (DATETIME(created_at, timezone))) as local_hour
, cast(c.commission_local as float64) commission_local
, cast(c.commission_eur as float64) commission_eur
, vendor_code
, platform_order_code_ga
, variant
, dps_delivery_fee_local
, dps_surge_fee_local
, dps_travel_time_local
, gmv_eur
, gfv_eur
, delivery_fee_eur
, delivery_fee_local_accounting
, travel_time_distance_km
, mean_delay
, travel_time
, to_customer_time
, to_vendor_time
, delivery_distance
, actual_delivery_time
, order_delay_mins
, delivery_costs
, delivery_costs_eur
from cl._dps_sessions_mapped_to_orders o
left join pandata_raw_il_backend_latest.fct_order_commissions c on o.entity_id = c.global_entity_id and o.platform_order_code_ga = c.order_code
#left join `dhh---analytics-apac.pandata.fct_orders` p on o.platform_order_code = p.order_code_google and p.rdbms_id = 15
#left join cl.orders co on o.entity_id = co.entity.id and o.platform_order_code_ga = co.platform_order_code
left join costs cos on cos.entity_id =o.entity_id and cos.platform_order_code = o.platform_order_code
where o.entity_id = "FP_SG"
and o.zone_id = 35
and cast(DATETIME(created_at, timezone) as date) between "2020-08-21" and "2020-08-31"
and variant in ("Variation1","Control")
and platform_order_code_ga is not null
group by 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28
    """

df = client.query(query1).to_dataframe()
df.head()

Unnamed: 0,zone_name,zone_id,vertical_type,operating_system,local_date,local_hour,commission_local,commission_eur,vendor_code,platform_order_code_ga,variant,dps_delivery_fee_local,dps_surge_fee_local,dps_travel_time_local,gmv_eur,gfv_eur,delivery_fee_eur,delivery_fee_local_accounting,travel_time_distance_km,mean_delay,travel_time,to_customer_time,to_vendor_time,delivery_distance,actual_delivery_time,order_delay_mins,delivery_costs,delivery_costs_eur
0,Yishun,35,restaurants,Android,2020-08-27,8,1.32,0.82,v7kh,v7kh-n7w2,Variation1,1.99,-1.5,3.49,5.035664,3.804587,1.231078,1.99,0.59859,7.137513,2.7,4.53,0.47,0.689,9.3,-5.58,4.068,2.516
1,Yishun,35,restaurants,Android,2020-08-27,10,1.29,0.8,y6db,y6db-zdlj,Variation1,1.99,-1.5,3.49,4.633553,3.402476,1.231078,1.99,0.254566,3.018182,1.15,1.15,0.0,0.351,8.28,-4.62,4.572,2.828
2,Yishun,35,restaurants,Android,2020-08-27,17,3.33,2.06,w5bv,w5bv-gbia,Variation1,0.49,-0.5,0.99,6.736902,6.433772,0.30313,0.49,0.183075,1.891707,0.83,0.03,3.12,0.185,7.27,-6.62,4.679,2.895
3,Yishun,35,supermarket,iOS,2020-08-23,13,0.83,0.51,x4ee,x4ee-d1yg,Control,1.49,0.0,1.49,7.349532,6.428523,0.92101,1.49,0.574599,20.983419,2.6,9.77,9.57,0.833,38.13,4.23,4.582,2.825
4,Yishun,35,restaurants,Android,2020-08-30,11,1.57,0.97,t5mv,t5mv-o6dt,Variation1,1.99,-1.5,3.49,5.558785,4.328308,1.230476,1.99,0.811351,4.411518,3.66,5.9,6.42,0.977,16.72,-0.2,6.264,3.875


In [8]:
df.columns

Index(['zone_name', 'zone_id', 'vertical_type', 'operating_system',
       'local_date', 'local_hour', 'commission_local', 'commission_eur',
       'vendor_code', 'platform_order_code_ga', 'variant',
       'dps_delivery_fee_local', 'dps_surge_fee_local',
       'dps_travel_time_local', 'gmv_eur', 'gfv_eur', 'delivery_fee_eur',
       'delivery_fee_local_accounting', 'travel_time_distance_km',
       'mean_delay', 'travel_time', 'to_customer_time', 'to_vendor_time',
       'delivery_distance', 'actual_delivery_time', 'order_delay_mins',
       'delivery_costs', 'delivery_costs_eur', 'profit_eur'],
      dtype='object')

In [10]:
df["profit_eur"] = df["commission_eur"] +  df["delivery_fee_eur"]/1.07 - df["delivery_costs_eur"]
df_copy = df[['zone_name', 'zone_id', 
       'local_date', 'local_hour', 'commission_local', 'commission_eur',
       'vendor_code', 'platform_order_code_ga', 'variant',
        'gmv_eur', 'gfv_eur', 'delivery_fee_eur',
       'delivery_fee_local_accounting', 'travel_time_distance_km',
        'travel_time', 'delivery_distance', 'actual_delivery_time', 'order_delay_mins',
       'delivery_costs', 'delivery_costs_eur', 'profit_eur']].copy().drop_duplicates()

In [11]:
check = df_copy.pivot_table(values = ["delivery_fee_eur","profit_eur"], 
                            #index=["local_date", "hour_block"],
                        columns = "variant",
                       aggfunc ={"delivery_fee_eur":"mean","profit_eur":"sum"}).reset_index()
check

variant,index,Control,Variation1
0,delivery_fee_eur,1.621854,1.310138
1,profit_eur,-6776.889143,-5946.017491


### 2.1.1. Orders

In [None]:
# define stage of the setup
conditions = [
    (df['mean_delay'] <= 7.15),
    (df['mean_delay'] > 7.15) & (df['mean_delay'] <= 10.8),
    (df['mean_delay'] > 10.8)
    ]
values = ['step1', 'step2', 'default']
df["stage"] = np.select(conditions,values)
df.head()

Unnamed: 0,zone_name,zone_id,vertical_type,operating_system,local_date,local_hour,vendor_code,platform_order_code,platform_order_code_ga,variant,dps_delivery_fee_local,dps_surge_fee_local,dps_travel_time_local,gmv_eur,gfv_eur,delivery_fee_eur,delivery_fee_local_accounting,travel_time_distance_km,mean_delay,travel_time,to_customer_time,to_vendor_time,delivery_distance,actual_delivery_time,order_delay_mins,stage
0,Yishun,35,restaurants,Android,2020-08-27,20,g2ze,g2ze-xxj8,g2ze-xxj8,Control,3.49,0.0,3.49,5.252185,3.402476,1.84971,2.99,0.900381,7.351311,4.07,7.73,0.12,0.807,22.62,2.65,step2
1,Yishun,35,restaurants,iOS,2020-08-26,18,s0oz,s0oz-ln2f,s0oz-ln2f,Variation1,2.49,-1.0,3.49,17.412413,15.874388,1.538024,2.49,0.893564,5.211829,4.04,9.25,0.97,0.987,29.25,7.35,step1
2,Yishun,35,restaurants,Android,2020-08-28,19,x2tf,x2tf-qvlb,x2tf-qvlb,Variation1,1.99,-1.5,3.49,63.05246,61.819781,1.232679,1.99,0.474948,9.995449,2.15,5.85,9.47,0.508,58.82,17.95,step2
3,Yishun,35,restaurants,iOS,2020-08-25,20,x3cs,x3cs-k1yg,x3cs-k1yg,Control,3.49,0.0,3.49,7.851848,6.001807,1.850041,2.99,0.852141,9.198578,3.85,2.72,14.65,1.035,23.07,-0.83,step2
4,Yishun,35,restaurants,Android,2020-08-31,11,v4oy,v4oy-pg3h,v4oy-pg3h,Control,4.99,0.0,4.99,18.949879,15.866739,3.08314,4.99,2.841879,3.820641,12.84,13.9,7.6,3.016,25.95,-3.93,step1


In [None]:
order_sum = df.pivot_table("platform_order_code_ga", index=["local_date"], columns ="variant", aggfunc = "count")
order_sum = order_sum.reset_index()
order_sum["Control1"] = order_sum["Control"]/4
order_sum["delta"]=order_sum["Variation1"]/order_sum["Control1"] -1
order_sum
#order_sum["Variation1"].sum()/order_sum["Control1"].sum()

variant,local_date,Control,Variation1,Control1,delta
0,2020-08-21,3891,1082,972.75,0.11231
1,2020-08-22,4617,1210,1154.25,0.0483
2,2020-08-23,4771,1252,1192.75,0.049675
3,2020-08-24,3497,950,874.25,0.086646
4,2020-08-25,3642,1015,910.5,0.114772
5,2020-08-26,3582,1033,895.5,0.153546
6,2020-08-27,3741,1060,935.25,0.133387
7,2020-08-28,3994,1060,998.5,0.061592
8,2020-08-29,4753,1241,1188.25,0.044393
9,2020-08-30,4732,1316,1183.0,0.112426


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =order_sum.local_date, y=order_sum.Control1, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =order_sum.local_date, y=order_sum.Variation1, mode="lines", name ="variation1"), secondary_y=False)
fig.add_trace(go.Scatter(x =order_sum.local_date, y=order_sum.delta, mode="lines", name ="delta", line = dict(color='royalblue', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Daily user amount", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
order_hour = df.pivot_table("platform_order_code_ga", index=["local_hour"], columns ="variant", aggfunc = "count")
order_hour = order_hour.reset_index()
order_hour = order_hour.fillna(0)
order_hour["Control1"] = order_hour["Control"]/4
order_hour["delta"]=order_hour["Variation1"]/order_hour["Control1"] -1
order_hour["Control_pp"] = order_hour["Control1"]/order_hour["Control1"].sum()
order_hour["Variation_pp"] = order_hour["Variation1"]/order_hour["Variation1"].sum()
order_hour.drop(order_hour[order_hour.local_hour < 8].index, inplace=True)
order_hour

variant,local_hour,Control,Variation1,Control1,delta,Control_pp,Variation_pp
5,8,975.0,261.0,243.75,0.070769,0.022421,0.021942
6,9,1543.0,429.0,385.75,0.112119,0.035483,0.036066
7,10,2148.0,670.0,537.0,0.247672,0.049395,0.056326
8,11,4124.0,1128.0,1031.0,0.094083,0.094835,0.09483
9,12,4218.0,1131.0,1054.5,0.072546,0.096997,0.095082
10,13,3110.0,863.0,777.5,0.109968,0.071517,0.072551
11,14,2561.0,718.0,640.25,0.121437,0.058893,0.060361
12,15,2361.0,586.0,590.25,-0.0072,0.054293,0.049264
13,16,2675.0,736.0,668.75,0.100561,0.061514,0.061875
14,17,3844.0,1123.0,961.0,0.168574,0.088396,0.094409


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x =order_hour.local_hour, y=order_hour.Control1, name ="control"), secondary_y=False)
fig.add_trace(go.Bar(x =order_hour.local_hour, y=order_hour.Variation1,name ="variation1"), secondary_y=False)
fig.add_trace(go.Scatter(x =order_hour.local_hour, y=order_hour.delta, mode="lines", name ="delta", line = dict(color='gold', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Hour")
# Set y-axes titles
fig.update_yaxes(title_text="Order amount", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x =order_hour.local_hour, y=order_hour.Control1, name ="control"), secondary_y=False)
fig.add_trace(go.Bar(x =order_hour.local_hour, y=order_hour.Variation1,name ="variation"), secondary_y=False)
fig.add_trace(go.Scatter(x =order_hour.local_hour, y=order_hour.delta, mode="lines", name ="delta", line = dict(color='gold', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Hour")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x =order_hour.local_hour, y=order_hour.Control_pp, name ="control"), secondary_y=False)
fig.add_trace(go.Bar(x =order_hour.local_hour, y=order_hour.Variation_pp,name ="variation"), secondary_y=False)
#fig.add_trace(go.Scatter(x =order_hour.local_hour, y=order_hour.delta, mode="lines", name ="delta", line = dict(color='gold', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Hour")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =order_hour.local_hour, y=order_hour.Control_pp, name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =order_hour.local_hour, y=order_hour.Variation_pp,name ="variation"), secondary_y=False)
#fig.add_trace(go.Scatter(x =order_hour.local_hour, y=order_hour.delta, mode="lines", name ="delta", line = dict(color='gold', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Hour")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
order = df.groupby(["stage","local_date"], as_index = False).agg({"platform_order_code_ga":"count"})
order = order[order.stage != "0"]
#order = order.reset_index()
fig = px.bar(order, x="local_date",y="platform_order_code_ga", color="stage")
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Order amount", showgrid=False)

fig.show()

In [None]:
pivot = df.pivot_table("platform_order_code_ga", index=["stage","local_date"], columns ="variant", aggfunc = "count")
pivot["control1"] = pivot["Control"]/4
pivot["delta"] = pivot["Variation1"]/pivot["control1"]-1
pivot['delta'] = pivot['delta'].astype(float).map(lambda n: '{:.2%}'.format(n))
pivot = pivot.reset_index()
pivot.head()

variant,stage,local_date,Control,Variation1,control1,delta
0,default,2020-08-21,290,69,72.5,-4.83%
1,default,2020-08-22,3515,883,878.75,0.48%
2,default,2020-08-23,3888,1005,972.0,3.40%
3,default,2020-08-24,221,57,55.25,3.17%
4,default,2020-08-25,838,223,209.5,6.44%


In [None]:
pivot1 = df.pivot_table("platform_order_code_ga", index=["stage"], columns ="variant", aggfunc = "count")
pivot1["control1"] = pivot1["Control"]/4
pivot1["delta"] = pivot1["Variation1"]/pivot1["control1"]-1
pivot1['delta'] = pivot1['delta'].astype(float).map(lambda n: '{:.2%}'.format(n))
pivot1

variant,Control,Variation1,control1,delta
stage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
default,14119,3567,3529.75,1.06%
step1,16627,4763,4156.75,14.58%
step2,12740,3565,3185.0,11.93%


In [None]:
fig = px.line(pivot, x="local_date", y="delta", color="stage", text = "delta")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="delta (variation1 vs control)", showgrid=False)
fig.show()

### 2.1.2. Delivery Fee

In [None]:
df_avg = df.pivot_table("dps_delivery_fee", index=["local_date"], columns ="variant", aggfunc = "mean")
df_avg = df_avg.reset_index()
df_avg["delta"]=df_avg["Variation1"]/df_avg["Control"] -1
df_avg

variant,local_date,Control,Variation1,delta
0,2020-08-21,3.360708,2.414011,-0.281696
1,2020-08-22,3.406098,2.9727,-0.127242
2,2020-08-23,3.378992,3.112102,-0.078985
3,2020-08-24,3.36425,2.42229,-0.279991
4,2020-08-25,3.321451,2.580476,-0.223088
5,2020-08-26,3.348164,2.323138,-0.306146
6,2020-08-27,3.349389,2.317315,-0.308138
7,2020-08-28,3.352915,2.371294,-0.292766
8,2020-08-29,3.383627,2.944893,-0.129664
9,2020-08-30,3.39705,2.733006,-0.195476


In [None]:
df.head()

Unnamed: 0,zone_name,zone_id,vertical_type,operating_system,created_date,local_date,local_hour,vendor_code,platform_order_code,platform_order_code_ga,variant,dps_delivery_fee,dps_surge_fee,dps_travel_time,gmv_eur,gfv_eur,delivery_fee_eur,delivery_fee_local_accounting,travel_time_distance_km,delay,mean_delay,travel_time,to_customer_time,to_vendor_time,delivery_distance,actual_delivery_time,order_delay_mins,stage
0,Yishun,35,restaurants,iOS,2020-08-31,2020-08-31,18,v5oy,v5oy-lg8g,v5oy-lg8g,Control,4.24,0.0,4.24,10.154003,8.004975,2.149028,3.49,0.561262,4.882174,3.679979,2.53,5.48,0.0,0.79,13.05,-0.87,step1
1,Yishun,35,restaurants,Android,2020-08-31,2020-08-31,17,y0gg,y0gg-sh94,y0gg-sh94,Variation1,2.045556,-1.444444,3.49,6.274669,5.049292,1.225377,1.99,1.056809,5.069937,3.574998,4.76,8.77,0.0,1.38,13.98,-3.92,step1
2,Yishun,35,restaurants,Android,2020-08-31,2020-08-31,12,v2ej,v2ej-btm2,v2ej-btm2,Control,3.656667,0.0,3.656667,17.296905,14.839993,0.0,3.99,1.297857,10.050899,8.121974,5.84,,4.18,1.83,26.53,,step2
3,Yishun,35,restaurants,iOS,2020-08-31,2020-08-31,18,w2uu,w2uu-aiol,w2uu-aiol,Variation1,0.956667,-0.033333,0.99,14.772258,14.162649,0.60961,0.99,0.16583,5.106033,3.864903,0.75,0.0,1.83,0.19,12.43,-5.47,step1
4,Yishun,35,restaurants,Android,2020-08-31,2020-08-31,18,g6sf,g6sf-j5wv,g6sf-j5wv,Variation1,3.39,-0.6,3.99,6.151516,3.694604,2.456912,3.99,1.545201,5.545002,4.224064,6.95,5.57,5.87,1.96,17.13,-0.77,step1


In [None]:
df_sum = df.pivot_table(["dps_delivery_fee","platform_order_code_ga"], columns ="variant", aggfunc={'dps_delivery_fee' : 'sum', 'platform_order_code_ga' : 'count'})
df_sum

variant,Control,Variation1
dps_delivery_fee,142605.419616,30419.384133
platform_order_code_ga,43486.0,11895.0


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =df_avg.local_date, y=df_avg.Control, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =df_avg.local_date, y=df_avg.Variation1, mode="lines", name ="variation1"), secondary_y=False)
fig.add_trace(go.Scatter(x =df_avg.local_date, y=df_avg.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Daily avg. DF of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Avg. DF", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

### 2.1.3. Avg. Basket Value

In [None]:
food_avg = df.pivot_table("gfv_eur", index=["local_date"], columns ="variant", aggfunc = "mean")
food_avg = food_avg.reset_index()
food_avg["delta"]=food_avg["Variation1"]/food_avg["Control"] -1
food_avg

variant,local_date,Control,Variation1,delta
0,2020-08-21,11.758203,12.082276,0.027561
1,2020-08-22,13.434776,13.202012,-0.017326
2,2020-08-23,12.919205,13.201155,0.021824
3,2020-08-24,11.329478,10.855309,-0.041853
4,2020-08-25,11.567414,11.312833,-0.022008
5,2020-08-26,11.403276,11.700812,0.026092
6,2020-08-27,11.96628,11.346428,-0.0518
7,2020-08-28,12.346424,11.791457,-0.04495
8,2020-08-29,13.617534,12.863906,-0.055343
9,2020-08-30,13.425436,12.986135,-0.032722


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =food_avg.local_date, y=food_avg.Control, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =food_avg.local_date, y=food_avg.Variation1, mode="lines", name ="variation1"), secondary_y=False)
fig.add_trace(go.Scatter(x =food_avg.local_date, y=food_avg.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Daily avg. Basket Value of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Avg. Basket Value", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

## 2.2. Logistics Metrics

In [None]:
km_avg = df.pivot_table("travel_time_distance_km", index=["local_date"], columns ="variant", aggfunc = "mean")
km_avg = km_avg.reset_index()
km_avg["delta"]=km_avg["Variation1"]/km_avg["Control"] -1
km_avg

variant,local_date,Control,Variation1,delta
0,2020-08-21,1.529406,1.516239,-0.008609
1,2020-08-22,1.563623,1.599328,0.022835
2,2020-08-23,1.460238,1.526727,0.045533
3,2020-08-24,1.54856,1.477932,-0.045609
4,2020-08-25,1.525759,1.494771,-0.02031
5,2020-08-26,1.558604,1.532819,-0.016544
6,2020-08-27,1.525153,1.474327,-0.033325
7,2020-08-28,1.56683,1.444034,-0.078372
8,2020-08-29,1.592539,1.576208,-0.010255
9,2020-08-30,1.601607,1.559719,-0.026154


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =km_avg.local_date, y=km_avg.Control, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =km_avg.local_date, y=km_avg.Variation1, mode="lines", name ="variation1"), secondary_y=False)
fig.add_trace(go.Scatter(x =km_avg.local_date, y=km_avg.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Avg. Basket Value of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Avg. Distance", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
df["tt"] = df["travel_time"].round(1)
df.head(5)

Unnamed: 0,zone_name,zone_id,vertical_type,operating_system,created_date,local_date,local_hour,vendor_code,platform_order_code,platform_order_code_ga,variant,dps_delivery_fee,dps_surge_fee,dps_travel_time,gmv_eur,gfv_eur,delivery_fee_eur,delivery_fee_local_accounting,travel_time_distance_km,delay,mean_delay,travel_time,to_customer_time,to_vendor_time,delivery_distance,actual_delivery_time,order_delay_mins,stage,tt
0,Yishun,35,restaurants,iOS,2020-08-31,2020-08-31,18,v5oy,v5oy-lg8g,v5oy-lg8g,Control,4.24,0.0,4.24,10.154003,8.004975,2.149028,3.49,0.561262,4.882174,3.679979,2.53,5.48,0.0,0.79,13.05,-0.87,step1,2.5
1,Yishun,35,restaurants,Android,2020-08-31,2020-08-31,17,y0gg,y0gg-sh94,y0gg-sh94,Variation1,2.045556,-1.444444,3.49,6.274669,5.049292,1.225377,1.99,1.056809,5.069937,3.574998,4.76,8.77,0.0,1.38,13.98,-3.92,step1,4.8
2,Yishun,35,restaurants,Android,2020-08-31,2020-08-31,12,v2ej,v2ej-btm2,v2ej-btm2,Control,3.656667,0.0,3.656667,17.296905,14.839993,0.0,3.99,1.297857,10.050899,8.121974,5.84,,4.18,1.83,26.53,,step2,5.8
3,Yishun,35,restaurants,iOS,2020-08-31,2020-08-31,18,w2uu,w2uu-aiol,w2uu-aiol,Variation1,0.956667,-0.033333,0.99,14.772258,14.162649,0.60961,0.99,0.16583,5.106033,3.864903,0.75,0.0,1.83,0.19,12.43,-5.47,step1,0.8
4,Yishun,35,restaurants,Android,2020-08-31,2020-08-31,18,g6sf,g6sf-j5wv,g6sf-j5wv,Variation1,3.39,-0.6,3.99,6.151516,3.694604,2.456912,3.99,1.545201,5.545002,4.224064,6.95,5.57,5.87,1.96,17.13,-0.77,step1,7.0


In [None]:
df_first = df[df["local_date"].isin([datetime.date(2020, 8, 21), datetime.date(2020, 8, 22),datetime.date(2020, 8, 23), datetime.date(2020, 8, 24)])]
df_second = df[~df["local_date"].isin([datetime.date(2020, 8, 21), datetime.date(2020, 8, 22),datetime.date(2020, 8, 23), datetime.date(2020, 8, 24)])]

In [None]:
tt_order = df.pivot_table("platform_order_code_ga", index=["tt"], columns ="variant", aggfunc = "count")
tt_order["control1"] = tt_order["Control"]/4
tt_order["control1_pp"] = tt_order["control1"]/tt_order["control1"].sum()
tt_order["Variation1_pp"] = tt_order["Variation1"]/tt_order["Variation1"].sum()
tt_order = tt_order.reset_index()
tt_order.head()

variant,tt,Control,Variation1,control1,control1_pp,Variation1_pp
0,0.0,12.0,5.0,3.0,0.000276,0.00042
1,0.1,23.0,11.0,5.75,0.000529,0.000925
2,0.2,42.0,13.0,10.5,0.000966,0.001093
3,0.3,83.0,16.0,20.75,0.001909,0.001345
4,0.4,117.0,20.0,29.25,0.002691,0.001681


In [None]:
#first testing period (Aug. 21st - Aug. 24th)
tt_order_first = df_first.pivot_table("platform_order_code_ga", index=["tt"], columns ="variant", aggfunc = "count")
tt_order_first["control1"] = tt_order_first["Control"]/4
tt_order_first["control1_pp"] = tt_order_first["control1"]/tt_order_first["control1"].sum()
tt_order_first["Variation1_pp"] = tt_order_first["Variation1"]/tt_order_first["Variation1"].sum()
tt_order_first = tt_order_first.reset_index()
tt_order_first.head()

variant,tt,Control,Variation1,control1,control1_pp,Variation1_pp
0,0.0,4.0,1.0,1.0,0.000238,0.000223
1,0.1,6.0,5.0,1.5,0.000358,0.001113
2,0.2,20.0,4.0,5.0,0.001192,0.00089
3,0.3,30.0,5.0,7.5,0.001788,0.001113
4,0.4,48.0,8.0,12.0,0.002861,0.00178


In [None]:
#second testing period (Aug. 21st - Aug. 24th)
tt_order_second = df_second.pivot_table("platform_order_code_ga", index=["tt"], columns ="variant", aggfunc = "count")
tt_order_second["control1"] = tt_order_second["Control"]/4
tt_order_second["control1_pp"] = tt_order_second["control1"]/tt_order_second["control1"].sum()
tt_order_second["Variation1_pp"] = tt_order_second["Variation1"]/tt_order_second["Variation1"].sum()
tt_order_second = tt_order_second.reset_index()
tt_order_second.head()

variant,tt,Control,Variation1,control1,control1_pp,Variation1_pp
0,0.0,8.0,4.0,2.0,0.0003,0.00054
1,0.1,17.0,6.0,4.25,0.000636,0.000811
2,0.2,22.0,9.0,5.5,0.000824,0.001216
3,0.3,53.0,11.0,13.25,0.001984,0.001486
4,0.4,69.0,12.0,17.25,0.002583,0.001621


In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.control1, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.Variation1, mode="lines", name ="variation1"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Avg. Basket Value of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Avg. Distance", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.control1_pp, mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.Variation1_pp, mode="lines", name ="Variation1"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Order across tt of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Avg. Distance", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1% vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
import scipy
from scipy import signal
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_order.tt, y=signal.savgol_filter(tt_order.control1,53, 3), mode="markers", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_order.tt, y=signal.savgol_filter(tt_order.Variation1,53,3), mode="markers", name ="variation1"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Order across tt of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Travel Time")
# Set y-axes titles
fig.update_yaxes(title_text="Orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
# First period
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_order_first.tt, y=signal.savgol_filter(tt_order.control1_pp,53, 3), mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_order_first.tt, y=signal.savgol_filter(tt_order.Variation1_pp,53,3), mode="lines", name ="variation1"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_order.tt, y=tt_order.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Order across tt of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Travel Time (in minutes)")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
# second period
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_order_second.tt, y=signal.savgol_filter(tt_order_second.control1_pp,53, 3), mode="lines", name ="control"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_order_second.tt, y=signal.savgol_filter(tt_order_second.Variation1_pp,53,3), mode="lines", name ="variation"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_order_second.tt, y=tt_order_second.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Order across tt of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Travel Time (in minutes)")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

# Further application

In [None]:
query1 = """

select
o.platform_order_code,
z.name zone_name,
z.id zone_id,
vendor.id vendor_id,
vendor.vendor_code vendor_code,
vendor.name vendor_name,
cast(DATETIME(o.created_at, o.timezone) as date) as local_date,
extract(hour from (DATETIME(o.created_at, o.timezone))) as local_hour,
cast(o.order_placed_at as date) order_date,
p.drive_time_value,
st_distance(vendor.location,customer.location) as delivery_distance,
CASE 
    WHEN cast(DATETIME(o.created_at, o.timezone) as date) < "2020-08-20" THEN 'before_test'
    ELSE 'after_rollout'
  END
  AS period
from fulfillment-dwh-production.cl.orders o
left join unnest(deliveries) d
left join unnest(porygon) p
left join cl.countries on o.country_code = countries.country_code
left join unnest(cities) c on c.id = o.city_id
left join unnest(zones) z on z.id = o.zone_id
where
o.country_code = 'sg'
and o.order_status = 'completed'
and d.delivery_status = 'completed'
and (cast(DATETIME(o.created_at, o.timezone) as date) between '2020-08-13' and '2020-08-19' or cast(DATETIME(o.created_at, o.timezone) as date) between '2020-09-05' and '2020-09-11')
and p.vehicle_profile = 'default'
and z.id in (9,35)

    """

app = client.query(query1).to_dataframe()


In [None]:
app["travel_time"] = 4.51650006498*app["delivery_distance"]/1000
app["tt"] = app["travel_time"].round(1)
app.head()

Unnamed: 0,platform_order_code,zone_name,zone_id,vendor_id,vendor_code,vendor_name,local_date,local_hour,order_date,drive_time_value,delivery_distance,period,travel_time,tt
0,x6ts-jtyk,Woodlands,9,84227,x6ts,KPT 78 Tze Char (Woodlands),2020-09-09,0,2020-09-08,5,706.684957,after_rollout,3.191743,3.2
1,s2po-ruxk,Woodlands,9,1031,s2po,The Coffee Bean & Tea Leaf (Causeway Point),2020-09-09,3,2020-09-08,13,1632.607843,after_rollout,7.373673,7.4
2,x1sy-8e0n,Woodlands,9,86558,x1sy,pandamart (Woodlands 11),2020-09-09,0,2020-09-08,16,2720.859751,after_rollout,12.288763,12.3
3,x1sy-vr47,Woodlands,9,86558,x1sy,pandamart (Woodlands 11),2020-09-09,2,2020-09-08,16,2852.490722,after_rollout,12.883275,12.9
4,s2er-vwy5,Woodlands,9,10805,s2er,Hai Zhong Bao Live Seafood (Woodlands),2020-09-08,21,2020-09-08,13,2164.943378,after_rollout,9.777967,9.8


## Pre/post analytics

In [None]:
app_woodlands = app[app["zone_id"]==9]
app_yishun = app[app["zone_id"]==35]
app_yishun.head()

Unnamed: 0,platform_order_code,zone_name,zone_id,vendor_id,vendor_code,vendor_name,local_date,local_hour,order_date,drive_time_value,delivery_distance,period,travel_time,tt
10,v8fr-d663,Yishun,35,87126,v8fr,Srisun Signature,2020-09-09,0,2020-09-08,6,761.972768,after_rollout,3.44145,3.4
11,d3yx-hlh1,Yishun,35,112189,d3yx,Al Zara,2020-09-09,4,2020-09-08,4,282.179623,after_rollout,1.274464,1.3
13,x0ud-0bfl,Yishun,35,87659,x0ud,Haji S.M Hatheem Family Food,2020-09-08,23,2020-09-08,9,770.623375,after_rollout,3.480521,3.5
21,y6io-s0wd,Yishun,35,98308,y6io,M.A.D Kitchen (Yishun),2020-09-09,2,2020-09-08,14,2341.142741,after_rollout,10.573771,10.6
26,z9fd-c0lv,Yishun,35,99872,z9fd,7-Eleven (Sembawang Close),2020-09-09,2,2020-09-08,6,675.540209,after_rollout,3.051077,3.1


In [None]:
hour_woodlands = app_woodlands.pivot_table("platform_order_code", index=["local_hour"], columns ="period", aggfunc = "count")
hour_woodlands = hour_woodlands.reset_index()
hour_woodlands = hour_woodlands.fillna(0)
hour_woodlands["delta"]=hour_woodlands["after_rollout"]/hour["before_test"] -1
hour_woodlands["before_pp"] = hour_woodlands["before_test"]/hour["before_test"].sum()
hour_woodlands["after_pp"] = hour_woodlands["after_rollout"]/hour["after_rollout"].sum()

In [None]:
hour_yishun = app_yishun.pivot_table("platform_order_code", index=["local_hour"], columns ="period", aggfunc = "count")
hour_yishun = hour_yishun.reset_index()
hour_yishun = hour_yishun.fillna(0)
hour_yishun["delta"]=hour_yishun["after_rollout"]/hour["before_test"] -1
hour_yishun["before_pp"] = hour_yishun["before_test"]/hour["before_test"].sum()
hour_yishun["after_pp"] = hour_yishun["after_rollout"]/hour["after_rollout"].sum()
#hour_yishun.drop(hour_yishun[hour_yishun.local_hour < 8].index, inplace=True)

In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =hour_woodlands.local_hour, y=hour_woodlands.before_pp, name ="before A/B testing"), secondary_y=False)
fig.add_trace(go.Scatter(x =hour_woodlands.local_hour, y=hour_woodlands.after_pp,name ="after rollout"), secondary_y=False)
#fig.add_trace(go.Scatter(x =hour_woodlands.local_hour, y=hour.delta, mode="lines", name ="delta", line = dict(color='gold', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Hour")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =hour_yishun.local_hour, y=hour_yishun.before_pp, name ="before A/B testing"), secondary_y=False)
fig.add_trace(go.Scatter(x =hour_yishun.local_hour, y=hour_yishun.after_pp,name ="after rollout"), secondary_y=False)
#fig.add_trace(go.Scatter(x =hour_yishun.local_hour, y=hour.delta, mode="lines", name ="delta", line = dict(color='gold', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Daily users in Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Hour")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
tt_woodlands = app_woodlands.pivot_table("platform_order_code", index=["tt"], columns ="period", aggfunc = "count")
tt_woodlands["before_pp"] = tt_woodlands["before_test"]/(tt_woodlands["before_test"].sum())
tt_woodlands["after_pp"] = tt_woodlands["after_rollout"]/(tt_woodlands["after_rollout"].sum())
tt_woodlands = tt_woodlands.reset_index()
tt_woodlands

period,tt,after_rollout,before_test,before_pp,after_pp
0,0.0,23.0,14.0,0.000335,0.000556
1,0.1,8.0,15.0,0.000359,0.000193
2,0.2,61.0,77.0,0.001845,0.001474
3,0.3,56.0,62.0,0.001486,0.001353
4,0.4,97.0,83.0,0.001989,0.002344
...,...,...,...,...,...
237,23.7,1.0,,,0.000024
238,23.8,1.0,6.0,0.000144,0.000024
239,23.9,1.0,1.0,0.000024,0.000024
240,24.1,2.0,,,0.000048


In [None]:
tt_yishun = app_yishun.pivot_table("platform_order_code", index=["tt"], columns ="period", aggfunc = "count")
tt_yishun["before_pp"] = tt_yishun["before_test"]/(tt_yishun["before_test"].sum())
tt_yishun["after_pp"] = tt_yishun["after_rollout"]/(tt_yishun["after_rollout"].sum())
tt_yishun = tt_yishun.reset_index()
tt_yishun.head()

period,tt,after_rollout,before_test,before_pp,after_pp
0,0.0,12.0,6.0,0.00014,0.00028
1,0.1,23.0,33.0,0.00077,0.000537
2,0.2,37.0,40.0,0.000934,0.000863
3,0.3,80.0,78.0,0.001821,0.001867
4,0.4,115.0,110.0,0.002568,0.002683


In [None]:
# TT woodlands
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_woodlands.tt, y=signal.savgol_filter(tt_woodlands.before_pp,53, 3), mode="lines", name ="before test"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_woodlands.tt, y=signal.savgol_filter(tt_woodlands.after_pp,53,3), mode="lines", name ="after rollout"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_woodlands.tt, y=tt_woodlands.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
#fig.update_layout(title_text="Order across tt of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Travel Time (in minutes)")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
#fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()

In [None]:
# TT woodlands
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x =tt_yishun.tt, y=signal.savgol_filter(tt_yishun.before_pp,53, 3), mode="lines", name ="before test"), secondary_y=False)
fig.add_trace(go.Scatter(x =tt_yishun.tt, y=signal.savgol_filter(tt_yishun.after_pp,53,3), mode="lines", name ="after rollout"), secondary_y=False)
#fig.add_trace(go.Scatter(x =tt_yishun.tt, y=tt_yishun.delta, mode="lines", name ="delta", line = dict(color='gray', width=4, dash='dash')), secondary_y=True)
# Add figure title
fig.update_layout(title_text="Order across tt of Control and Test")
# Set x-axis title
fig.update_xaxes(title_text="Travel Time (in minutes)")
# Set y-axes titles
fig.update_yaxes(title_text="Share of orders", showgrid=False, secondary_y=False)
fig.update_yaxes(title_text="Delta % difference of Variation1 vs Control", showgrid=False,secondary_y=True)
fig.show()