In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('ab_test_stats.csv')

In [None]:
df.columns

Index(['test_name', 'target_group', 'test_variant', 'orders', 'users',
       'avg_df', 'avg_fv', 'avg_commission', 'avg_delivery_cost',
       'avg_distance_km', 'avg_gmv', 'avg_revenue', 'avg_travel_time',
       'avg_to_customer_time', 'vendor_rdf_fee', 'trade_mktg_fee',
       'vendor_hplus_fee', 'user_paid_fee', 'DF_Revenue'],
      dtype='object')

In [15]:
# define which columns to sum vs. mean
agg = {
    'orders': 'sum',
    'users':  'sum',
    'avg_df': 'mean',
    'avg_fv': 'mean',
    'avg_commission':      'mean',
    'avg_delivery_cost':   'mean',
    'avg_distance_km':     'mean',
    'avg_gmv':             'mean',
    'avg_revenue':         'mean',
    'avg_travel_time':     'mean',
    'avg_to_customer_time':'mean',
    'vendor_rdf_fee':      'mean',
    'trade_mktg_fee':      'mean',
    'vendor_hplus_fee':    'mean',
    'user_paid_fee':       'mean',
    'DF_Revenue':          'mean'
}

# perform the aggregation
kpis = (
    df
    .groupby(['test_name', 'target_group', 'test_variant'], as_index=False)
    .agg(agg)
    # optional: round floats to 2 decimals
    .round(2)
)

# kpis now has one row per test_name/target_group/test_variant



In [16]:
kpis

Unnamed: 0,test_name,target_group,test_variant,orders,users,avg_df,avg_fv,avg_commission,avg_delivery_cost,avg_distance_km,avg_gmv,avg_revenue,avg_travel_time,avg_to_customer_time,vendor_rdf_fee,trade_mktg_fee,vendor_hplus_fee,user_paid_fee,DF_Revenue
0,SA_20250414_L_G0_O_Tier_1_RDF_Removing_MOV,Target Group 2,Control,10883,9768,1.89,75.78,4.82,11.22,3.32,75.78,15.89,22.98,9.44,4.93,0.0,4.15,1.55,10.63
1,SA_20250414_L_G0_O_Tier_1_RDF_Removing_MOV,Target Group 2,Variation1,11063,9887,2.87,74.88,4.75,16.16,3.25,74.88,20.67,22.52,9.32,4.78,0.0,3.98,2.28,11.04
2,SA_20250414_L_G0_O_Tier_1_RDF_Removing_MOV,Target Group 3,Control,25653,20028,1.42,93.61,4.08,12.33,5.11,93.61,16.29,48.05,12.82,2.58,0.0,2.1,1.19,5.87
3,SA_20250414_L_G0_O_Tier_1_RDF_Removing_MOV,Target Group 3,Variation1,23819,18985,1.62,96.6,4.21,17.33,5.12,96.6,21.38,48.9,12.9,2.59,0.0,2.25,1.35,6.19
4,SA_20250414_L_G0_O_Tier_1_RDF_Removing_MOV,Target Group 4,Control,146,140,2.42,47.55,2.85,13.22,6.15,47.55,15.79,27.35,11.92,5.98,0.0,4.87,2.02,12.87
5,SA_20250414_L_G0_O_Tier_1_RDF_Removing_MOV,Target Group 4,Variation1,133,128,3.77,48.09,2.91,18.12,5.65,48.09,20.73,28.92,11.67,5.89,0.0,5.41,2.92,14.22
6,SA_20250414_L_G0_O_Tier_1_Removing_MOV,Target Group 1,Control,288,258,8.03,56.33,4.36,11.37,3.54,56.33,15.2,35.34,9.9,0.0,0.0,0.0,6.47,6.47
7,SA_20250414_L_G0_O_Tier_1_Removing_MOV,Target Group 1,Variation1,272,238,9.48,59.17,4.64,12.57,3.52,59.17,16.75,35.84,9.96,0.0,0.0,0.0,7.46,7.46
8,SA_20250414_L_G0_O_Tier_1_Removing_MOV,Target Group 2,Control,46821,33596,3.06,98.13,4.22,13.64,5.19,98.13,17.73,48.93,13.13,2.18,0.0,1.64,2.57,6.39
9,SA_20250414_L_G0_O_Tier_1_Removing_MOV,Target Group 2,Variation1,46164,33250,2.98,96.9,4.18,13.73,5.18,96.9,17.8,48.85,13.2,2.21,0.0,1.68,2.51,6.39


In [17]:
kpis.to_csv('kpis.csv', index=False)

In [18]:
# app.py
import streamlit as st
import pandas as pd
import plotly.express as px

@st.cache_data
def load_data():
    # replace with your own data‐loading logic (e.g. pd.read_csv or SQL)
    return pd.read_csv("kpis.csv")

df = load_data()

st.title("A/B Test Metrics Dashboard")

# —— Sidebar filters ——
test = st.sidebar.selectbox("Test name", df["test_name"].unique())
tg   = st.sidebar.selectbox("Target group", df[df.test_name == test]["target_group"].unique())
sub  = df[(df.test_name == test) & (df.target_group == tg)]

metric = st.sidebar.selectbox(
    "Which metric to compare?",
    ["orders","users","avg_df","avg_fv","avg_commission",
     "avg_delivery_cost","avg_distance_km","avg_gmv","avg_revenue",
     "avg_travel_time","avg_to_customer_time","vendor_rdf_fee",
     "trade_mktg_fee","vendor_hplus_fee","user_paid_fee","DF_Revenue"]
)

# —— Main chart ——
fig = px.bar(
    sub,
    x="test_variant",
    y=metric,
    color="test_variant",
    text_auto=True,
    title=f"{metric} by Variant"
)
st.plotly_chart(fig, use_container_width=True)

# —— Multi‐metric view (optional) ——
if st.sidebar.checkbox("Show multiple metrics"):
    to_plot = st.sidebar.multiselect("Select metrics", df.columns[4:], default=[metric])
    dfm = sub.melt(
        id_vars=["test_variant"], 
        value_vars=to_plot, 
        var_name="metric", 
        value_name="value"
    )
    fig2 = px.line(
        dfm, x="metric", y="value", color="test_variant", markers=True,
        title="Comparison across metrics"
    )
    st.plotly_chart(fig2, use_container_width=True)

# —— Data table ——
st.subheader("Underlying data")
st.dataframe(sub.reset_index(drop=True))


2025-04-24 15:12:48.657 
  command:

    streamlit run /Users/khalid.alnujaidi/Desktop/exploration_code/venv/lib/python3.13/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-04-24 15:12:48.664 Session state does not function when running a script without `streamlit run`


DeltaGenerator()

In [19]:
kpis.columns

Index(['test_name', 'target_group', 'test_variant', 'orders', 'users',
       'avg_df', 'avg_fv', 'avg_commission', 'avg_delivery_cost',
       'avg_distance_km', 'avg_gmv', 'avg_revenue', 'avg_travel_time',
       'avg_to_customer_time', 'vendor_rdf_fee', 'trade_mktg_fee',
       'vendor_hplus_fee', 'user_paid_fee', 'DF_Revenue'],
      dtype='object')