Assume that you have run five display ad campaigns, each with 1000 exposures. The data are given here in the same format as in the in-class example: the raw clicks data are in clicks.dataset.2.xlsx and the post-click transaction profit volumes are in volumes.dataset.2.xlsx.

In [1]:
import pandas as pd
import jax
import jax.numpy as jnp
# from jax.experimental import stax


dataVol = pd.read_csv("./Data/volumes.dataset.2.csv")
dataClicks=pd.DataFrame(data={'ad':[1,2,3,4,5],'clicks':[52,38,51,45,25]})
NOoFExposures=10000

In [2]:
dataVol

Unnamed: 0,cust,ad,volume
0,1,1,32
1,2,1,54
2,3,1,31
3,4,1,24
4,5,1,42
...,...,...,...
206,207,5,47
207,208,5,61
208,209,5,63
209,210,5,102


In [3]:
import jax
import jax.numpy as jnp
import pandas as pd
import plotly.express as px

# ---------------------------
# INPUTS
# ---------------------------
total_clicks = 1000
successes = jnp.array(dataClicks['clicks'])  # Success clicks for Product A, B, C
n_samples = 10_000
audience_size = 1000

# RNG setup
key = jax.random.PRNGKey(100)
keys = jax.random.split(key, num=5)

# ---------------------------
# SIMULATE BETA DISTRIBUTIONS
# ---------------------------
samples = []
for i in range(5):
    alpha = successes[i]
    beta = total_clicks - alpha
    samples.append(jax.random.beta(keys[i], a=alpha, b=beta, shape=(n_samples,)))

# Stack into shape (n_samples, 3)
samples_matrix = jnp.stack(samples, axis=1)  # (10000, 3)

# Pick product with highest CTR per sample
winner_index = jnp.argmax(samples_matrix, axis=1)  # (10000,)

# Count wins for each product
product_wins = jnp.bincount(winner_index, length=5)

# ---------------------------
# CONVERT TO TRANSACTION VOLUME
# ---------------------------
# For each sample, take CTR of winning product and multiply by audience
winning_ctrs = jnp.take_along_axis(samples_matrix, winner_index[:, None], axis=1).squeeze()
transaction_volume = jnp.round(winning_ctrs * audience_size)

# ---------------------------
# VISUALIZATION
# ---------------------------
# Convert to DataFrame
df = pd.DataFrame({
    'Winning CTR': winning_ctrs,
    'Winning Product': winner_index,
    'Transaction Volume': transaction_volume
})

# Bar chart for win counts
df_count = pd.DataFrame({
    'Product': ['Product A', 'Product B', 'Product C','Product D','Product E'],
    'Wins': list(map(int, product_wins))
})

fig1 = px.bar(df_count, x='Product', y='Wins', title="Winning Counts (Best CTR in Each Sample)", text='Wins')
fig1.update_layout(template="plotly_white")
fig1.show()

# Histogram of Transaction Volumes
fig2 = px.histogram(df, x='Transaction Volume', nbins=50,
                    title="Predicted Transaction Volume Distribution (Winning Product)",
                    labels={'Transaction Volume': 'Predicted Buys'})
fig2.update_layout(template="plotly_white")
fig2.show()

I0000 00:00:1744958110.601088  467174 service.cc:145] XLA service 0x10fc580f0 initialized for platform METAL (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1744958110.601096  467174 service.cc:153]   StreamExecutor device (0): Metal, <undefined>
I0000 00:00:1744958110.602375  467174 mps_client.cc:406] Using Simple allocator.
I0000 00:00:1744958110.602389  467174 mps_client.cc:384] XLA backend will use up to 12884443136 bytes on device 0 for SimpleAllocator.


Metal device set to: Apple M3 Pro

systemMemory: 18.00 GB
maxCacheSize: 6.00 GB



In [4]:
samples_matrix

Array([[0.04370388, 0.03995913, 0.04895058, 0.0456165 , 0.03344988],
       [0.05496478, 0.04341011, 0.0463552 , 0.05018673, 0.02383765],
       [0.04487404, 0.03763629, 0.05204305, 0.05106136, 0.02428964],
       ...,
       [0.06380044, 0.04932219, 0.05372023, 0.03889121, 0.03254651],
       [0.05715976, 0.04009878, 0.04663163, 0.05524452, 0.02390727],
       [0.04824201, 0.02508076, 0.06058313, 0.04437528, 0.02085335]],      dtype=float32)

In [5]:
winner_index

Array([2, 0, 2, ..., 0, 0, 2], dtype=int32)

In [6]:
product_wins

Array([4633,  153, 4012, 1202,    0], dtype=int32)

In [7]:
winning_ctrs

Array([0.04895058, 0.05496478, 0.05204305, ..., 0.06380044, 0.05715976,
       0.06058313], dtype=float32)

In [8]:
import pandas as pd

VolumeD= pd.read_csv('./Data/volumes.dataset.2.csv')
VolumeDjnp = jnp.array(VolumeD)

In [9]:
import pandas as pd
import numpy as np
from scipy import stats
import plotly.express as px

# ---- Step 2: Generate T-distribution samples per ad ----
samples_dict = {}
n_samples = 5000

for ad_id, group in VolumeD.iloc[:,1:].groupby('ad'):
    volumes = group['volume'].values
    mean = np.mean(volumes)
    std = np.std(volumes, ddof=1)
    dfree = len(volumes) - 1

    # Sample from T-distribution
    t_samples = stats.t.rvs(dfree, loc=mean, scale=std, size=n_samples)
    samples_dict[ad_id] = t_samples

# ---- Step 3: Flatten for visualization ----
flat_samples = []

for ad_id, samples in samples_dict.items():
    flat_samples.extend([{'ad': ad_id, 'sampled_volume': v} for v in samples])

samples_df = pd.DataFrame(flat_samples)

In [10]:
import plotly.express as px

fig = px.histogram(
    samples_df,
    x="sampled_volume",
    color="ad",
    nbins=60,
    title="T-distribution Sampling of Ad Volumes (5000 Samples per Ad)",
    labels={"sampled_volume": "Simulated Volume"},
    opacity=0.7,
    barmode="overlay"
)
fig.update_layout(template="plotly_white")
fig.show()

In [12]:
samples_df.mean(axis=1)

0        12.049506
1        19.761933
2        17.027611
3        26.166573
4        22.021808
           ...    
24995    60.223014
24996    58.430832
24997    61.187930
24998    50.111244
24999    43.715093
Length: 25000, dtype: float64

In [20]:
samples_df.groupby('ad')['sampled_volume'].mean()

ad
1    42.289741
2    56.627875
3    41.335482
4    48.198777
5    90.354618
Name: sampled_volume, dtype: float64