Assume that you have run five display ad campaigns, each with 1000 exposures. The data are given here in the same format as in the in-class example: the raw clicks data are in clicks.dataset.2.xlsx and the post-click transaction profit volumes are in volumes.dataset.2.xlsx.

In [24]:
import pandas as pd
import jax
import jax.numpy as jnp
import pandas as pd
import numpy as np
from scipy import stats
import plotly.express as px
import jax
import jax.numpy as jnp
import pandas as pd
import plotly.express as px
import pandas as pd
import plotly.express as px
import jax.scipy.special as jsp
# from jax.experimental import stax

dataVol = pd.read_csv("./Data/volumes.dataset.2.csv")
dataClicks=pd.DataFrame(data={'ad':[1,2,3,4,5],'clicks':[52,38,51,45,25]})
total_clicks = 1_000
n_samples = 100_000
VolumeD= pd.read_csv('./Data/volumes.dataset.2.csv')
VolumeDjnp = jnp.array(VolumeD)

In [2]:
dataVol

Unnamed: 0,cust,ad,volume
0,1,1,32
1,2,1,54
2,3,1,31
3,4,1,24
4,5,1,42
...,...,...,...
206,207,5,47
207,208,5,61
208,209,5,63
209,210,5,102


In [50]:
# ---------------------------
# INPUTS
# ---------------------------

successes = jnp.array(dataClicks['clicks'])  # Success clicks for Product A, B, C
# RNG setup
key = jax.random.PRNGKey(100)
keys = jax.random.split(key, num=5)

# Step 1: Initial Prior (assume prior knowledge or weak prior)
alpha_prior = 1  # 1 success
beta_prior = 1   # 1 failure → equivalent to uniform

# Function to compute Beta PDF
def beta_pdf(x, alpha, beta):
    x=jnp.array(x)
    return (x**(alpha - 1)) * ((1 - x)**(beta - 1)) / jsp.beta(alpha, beta)

# ---------------------------
# SIMULATE BETA DISTRIBUTIONS
# ---------------------------
samples = []
ctrMatrix=jnp.empty(100000).reshape(-1,1)
ctrMatrixProb=jnp.empty(100000).reshape(-1,1)
for i in range(5):
    alpha_post = alpha_prior+successes[i]
    beta_post = beta_prior + (total_clicks - successes[i])
    x= jax.random.beta(keys[i], a=alpha_post, b=beta_post, shape=(n_samples,))
    # samples.append(x)
    # ctrMatrix =jnp.concatenate(x.reshape(-1,1))
    # samples = jnp.array(samples)
    # Compute PDF values for these samples
    probabilities=jax.scipy.stats.beta.pdf(jnp.array(x),alpha_post,beta_post)
    ctrMatrix =jnp.concatenate([ctrMatrix,x.reshape(-1,1)],axis=1)
    ctrMatrixProb =jnp.concatenate([ctrMatrixProb,probabilities.reshape(-1,1)],axis=1)
    # probabilities = beta_pdf(samples, alpha_post, beta_post)


# Stack into shape (n_samples, 3)
# samples= jnp.array(samples)
# probabilities = jnp.array(probabilities)
# ctrMatrix = jnp.stack([samples,probabilities], axis=1)  # (10000, 3)
ctrMatrix = ctrMatrix[:,1:]
ctrMatrixProb = ctrMatrixProb[:,1:]
# Pick product with highest CTR per sample
winner_index = jnp.argmax(ctrMatrix[:,1:], axis=1)  # (10000,)

# Count wins for each product
product_wins = jnp.bincount(winner_index, length=5)

# ---------------------------
# CONVERT TO TRANSACTION VOLUME
# ---------------------------
# For each sample, take CTR of winning product and multiply by audience
winning_ctrs = jnp.take_along_axis(ctrMatrix, winner_index[:, None], axis=1).squeeze()
# ---------------------------
# VISUALIZATION
# ---------------------------
# Convert to DataFrame
ctrDF = pd.DataFrame({
    'Winning CTR': winning_ctrs,
    'Winning Product': winner_index,
})

productmapDict={0:'Ad 1',1:'Ad 2',2:'Ad 3',3:'Ad 4',4:'Ad 5'}

ctrDF['AdCampaign']=ctrDF['Winning Product'].apply(lambda x: productmapDict[x])
# Bar chart for win counts
df_count = pd.DataFrame({
    'Product': ['Ad 1', 'Ad 2', 'Ad 3','Ad 4','Ad 5'],
    'Wins': list(map(int, product_wins))
})

fig1 = px.bar(df_count, x='Product', y='Wins', title="Winning Counts (Higher CTR in Each Customer)", text='Wins')
fig1.update_layout(template="plotly_white")
fig1.show()
#fig1 = px.bar(ctrDF, x='Winning CTR', color='Winning Product', title="Winning Counts (Higher CTR in Each Customer)", text='Winning CTR')
#fig1.update_layout(template="plotly_white")
#fig1.show()

# Histogram of Transaction Volumes
fig2 = px.histogram(ctrDF, x='Winning CTR', nbins=70,color='AdCampaign',
                    title="Predicted Winning CTR Distribution (Winning Click Through Rates)",
                    labels={'Transaction Volume': 'Predicted Buys'})
fig2.update_layout(template="plotly_white")
fig2.show()

In [46]:
ctrMatrix.shape

(100000, 11)

In [5]:
# ---- Step 2: Generate T-distribution samples per ad ----
samples_dict = {}
n_samples = 100_000

for ad_id, group in VolumeD.iloc[:,1:].groupby('ad'):
    volumes = group['volume'].values
    mean = np.mean(volumes)
    std = np.std(volumes, ddof=1)
    dfree = len(volumes) - 1

    # Sample from T-distribution
    t_samples = stats.t.rvs(dfree, loc=mean, scale=std, size=n_samples)
    samples_dict[ad_id] = t_samples

# ---- Step 3: Flatten for visualization ----
flat_samples = []

for ad_id, samples in samples_dict.items():
    flat_samples.extend([{'ad': ad_id, 'VolumeOfAds': np.ceil(v)} for v in samples])

AdVolData = pd.DataFrame(flat_samples)


In [6]:
# Step 1: Create a sample index column (same for each ad)
AdVolData['Ad_index'] = AdVolData.groupby('ad').cumcount()

# Step 2: Pivot table: rows = sample_index, columns = ad, values = volume
pivot_df = AdVolData.pivot(index='Ad_index', columns='ad', values='VolumeOfAds')

# Step 3: For each sample_index, find which ad had the highest volume
pivot_df['PrefAd'] = pivot_df.idxmax(axis=1)  # This gives the ad with highest volume

# Step 4: Count how many times each ad was sold
sold_counts = pivot_df['PrefAd'].value_counts().sort_index()

# Step 5: Display results
sold_counts_df = sold_counts.reset_index()
sold_counts_df.columns = ['Ad', 'PrefCstCount']
# print(sold_counts_df)

In [7]:
fig1 = px.bar(sold_counts_df, x='Ad', y='PrefCstCount', title="Customer Count with Higher Volume in Each Ad", text='PrefCstCount')
fig1.update_layout(template="plotly_white")
fig1.show()

# Histogram of Transaction Volumes
fig2 = px.histogram(AdVolData, x='VolumeOfAds', nbins=200,color='ad',
                    title="Distribution Volume of Ads across 5 ads",
                    labels={'Transaction Volume': 'Predicted Buys'})
fig2.update_layout(template="plotly_white")
fig2.show()

In [8]:
pivot_df.head(10)

ad,1,2,3,4,5,PrefAd
Ad_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,36.0,51.0,40.0,47.0,94.0,5
1,42.0,87.0,50.0,37.0,110.0,5
2,32.0,44.0,51.0,47.0,66.0,5
3,33.0,38.0,55.0,79.0,125.0,5
4,19.0,64.0,51.0,36.0,87.0,5
5,45.0,65.0,20.0,44.0,52.0,2
6,58.0,76.0,44.0,38.0,139.0,5
7,38.0,52.0,51.0,30.0,79.0,5
8,45.0,45.0,38.0,51.0,127.0,5
9,56.0,46.0,26.0,36.0,105.0,5


In [9]:
jnp.array(pivot_df.iloc[:,:-1])

Array([[ 36.,  51.,  40.,  47.,  94.],
       [ 42.,  87.,  50.,  37., 110.],
       [ 32.,  44.,  51.,  47.,  66.],
       ...,
       [ 46.,  58.,  52.,  54., 111.],
       [ 46.,  24.,  48.,  52.,  48.],
       [ 39.,  69.,  27.,  55., 104.]], dtype=float32)

In [17]:
ctrMatrix

Array([[0.04370388, 0.03995913, 0.04895058, 0.0456165 , 0.03344988],
       [0.05496478, 0.04341011, 0.0463552 , 0.05018673, 0.02383765],
       [0.04487404, 0.03763629, 0.05204305, 0.05106136, 0.02428964],
       ...,
       [0.05774256, 0.03865443, 0.05328989, 0.03522538, 0.02233337],
       [0.05646257, 0.04433277, 0.04103401, 0.04929304, 0.01482342],
       [0.04651435, 0.03373861, 0.05960698, 0.04188326, 0.02363315]],      dtype=float32)

In [16]:
ctrMatrix*jnp.array(pivot_df.iloc[:,:-1])

Array([[1.5733396, 2.0379155, 1.9580231, 2.1439757, 3.1442893],
       [2.3085208, 3.7766798, 2.3177598, 1.8569089, 2.6221414],
       [1.4359694, 1.6559967, 2.6541955, 2.3998837, 1.6031165],
       ...,
       [2.656158 , 2.2419567, 2.7710743, 1.9021703, 2.4790037],
       [2.597278 , 1.0639865, 1.9696324, 2.5632381, 0.7115243],
       [1.8140595, 2.3279638, 1.6093884, 2.303579 , 2.4578476]],      dtype=float32)