# Marketplace Paid Feature Pricing Optimization


## Scenario Summary
- The product we're pushing is boosts on the first 10 listings; we'll assume in the following that sellers upload or activate their listings in arbitrary order (i.e., regardless of which of the listings for that month would benefit the most from a boost)
- We assume that the business is static, and doesn't change over time; no seasonality either 

## Imports

In [1]:
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import arviz as az



In [2]:
# Enable the underscore notation for polars columns, similar to ibis and dplyr (i.e., `pl.col("column_name")` -> `_.column_name`)
class PolarsColumnNamespace:
    def __getattr__(self, name):
        return pl.col(name)

# Enable _ as a shorthand for that class
_ = PolarsColumnNamespace()

In [3]:
# Samples from a beta distribution parameterized with mean and sd
def sample_beta_mean_sd(mean, sd, size=None):
    assert(0 < mean < 1)
    var = sd ** 2
    common = mean * (1 - mean) / (sd ** 2) - 1
    alpha = mean * common
    beta = (1 - mean) * common
    if alpha <= 0 or beta <= 0:
        raise ValueError("Invalid combination of mean and sd resulting in non-positive alpha/beta")
    return np.random.beta(alpha, beta, size=size)


In [4]:
def sample_lognormal_mean_sd(mean, sd, size=None):
    sigma = np.sqrt(np.log(1 + (sd / mean) ** 2))
    mu = np.log(mean) - 0.5 * sigma ** 2
    return np.random.lognormal(mean=mu, sigma=sigma, size=size)

def sample_lognormal_quantiles(q5, q95, size=None):
    z5, z95 = -1.64485, 1.64485
    sigma = (np.log(q95) - np.log(q5)) / (z95 - z5)
    mu = np.log(q5) - sigma * z5
    return np.random.lognormal(mean=mu, sigma=sigma, size=size)

In [5]:
from scipy.special import expit, logit
from scipy.stats import zscore

def correlated_var(x, r):
    """
    Generate a new variable correlated with x at level r while preserving
    x's original mean and standard deviation.
    """
    n = len(x)
    noise = np.random.normal(0, 1, n)

    # Standardize x
    z_x = zscore(x)

    # Generate correlated variable in z-score space
    y = r * z_x + np.sqrt(1 - r ** 2) * noise
    
    # Rescale z_y to match the original mean and std of x
    return y * x.std() + x.mean()

def correlated_rate(rate, corr_logit_space):
    """
    """
    # Convert actual boost effects to logit space
    rate_logit = logit(rate)

    # Create correlated perceived boost effect in logit space
    rate_noisy_logit = correlated_var(rate_logit, r = corr_logit_space)

    # Convert perceived boost effect back to probability space
    rate_noisy = expit(rate_noisy_logit)

    return rate_noisy

## Synthetic Data Generation

In [6]:
# Imports
import numpy as np
import polars as pl

from scipy.special import expit, logit
from scipy.stats import zscore


In [7]:
def perceived_boost(actual_boost):
    """
    """
    # Convert actual boost effects to logit space
    actual_boost_logit = logit(actual_boost)

    # Create noisy perceived boost effect in logit space
    actual_boost_noisy_logit = 1.25 * zscore(actual_boost_logit) * actual_boost_logit.std() + actual_boost_logit.mean() + np.random.normal(0, .2, len(actual_boost))

    # Convert perceived boost effect back to probability space
    return expit(actual_boost_noisy_logit)

- `par_` stands for parameter
- `obs_` stands for observed 

In [None]:
def generate_sellers(obs_months_active, min_months_active, max_months_active, avg_margin_rate, avg_boost_effect):
    """
    """

    seller_id = np.arange(n_sellers)
    obs_months_active = np.random.randint(min_months_active, max_months_active + 1, size=n_sellers)

    # Listings per month: 90% between 5-50
    par_mean_active_listings_per_month = sample_lognormal_quantiles(q5 = 5, q95 = 50, size=n_sellers)

    # Quantity sold per active listing per month: 90% between 3-30
    par_mean_volume_per_listing_per_month = sample_lognormal_quantiles(q5 = 3, q95 = 30, size=n_sellers)

    # Revenue: mean per unit sold, 90% between 20-500, sd as 30% of mean
    par_mean_revenue_per_unit = sample_lognormal_quantiles(q5 = 20, q95 = 500, size=n_sellers)

    # Margin by seller
    par_margin_rate = sample_beta_mean_sd(avg_margin_rate, 0.15, size=n_sellers)

    # Volume, revenue, and profit per month
    par_mean_volume_per_month = par_mean_active_listings_per_month * par_mean_volume_per_listing_per_month
    par_mean_revenue_per_month = par_mean_volume_per_month * par_mean_revenue_per_unit
    par_mean_profit_per_month = par_mean_revenue_per_month * par_margin_rate
    
    # Profit per month for the first n listings
    par_mean_profit_per_month_first_n_listings = par_margin_rate * par_mean_revenue_per_unit * par_mean_volume_per_month.clip(max=10)

    # Boost effects rates: actual and perceived; assumed to be unrelated [to-do: make them correlated]
    par_actual_boost_effect = sample_beta_mean_sd(mean = avg_boost_effect, sd = 0.075, size=n_sellers)
    # Assume that people underestimate or overestimate the magnitude of the boost as it affects the business by various amounts
    par_perceived_boost_effect = perceived_boost(par_actual_boost_effect)

    # Boost effect: amounts, actual and perceived on the first up to 10 listings per month (that's equivalent to an arbitrary 10 listings under the current assumptions) 
    par_mean_actual_boost_amount_per_month = par_actual_boost_effect * par_mean_profit_per_month_first_n_listings
    par_mean_perceived_boost_amount_per_month = par_perceived_boost_effect * par_mean_profit_per_month_first_n_listings


    seller_data = pl.DataFrame({
        'seller_id': seller_id,
        'obs_months_active': obs_months_active,
        
        # Listings per month [→ Poisson λ]
        'par_mean_active_listings_per_month': par_mean_active_listings_per_month,
        
        # Quantity sold per active listing per month [→ Poisson]
        'par_mean_volume_per_listing_per_month': par_mean_volume_per_listing_per_month,

        # Quantity sold per month [→ Poisson]
        'par_mean_volume_per_month': par_mean_volume_per_month,

        # Revenue per unit sold [→ Gaussian, or maybe Lognormal]
        'par_mean_revenue_per_unit': par_mean_revenue_per_unit,
        'par_sd_revenue_per_unit': par_mean_revenue_per_unit * 0.3,

        # Boost effect: rates, actual and perceived; assumed to be unrelated
        'par_actual_boost_effect': par_actual_boost_effect,
        'par_perceived_boost_effect': par_perceived_boost_effect,
        
        # Revenue per month
        'par_mean_revenue_per_month': par_mean_revenue_per_month,
        
        # Margin by seller
        'par_margin_rate': par_margin_rate,
        
        # Profit per month
        'par_mean_profit_per_month': par_mean_profit_per_month,
        'par_mean_profit_per_month_first_n_listings': par_mean_profit_per_month,
        
        # Boost effect: amounts, actual and perceived
        'par_mean_actual_boost_amount_per_month': par_mean_actual_boost_amount_per_month,
        'par_mean_perceived_boost_amount_per_month': par_mean_perceived_boost_amount_per_month

    })
    
    return seller_data


def approximate_profit_per_month_distribution_first_n(n_inter, max_listings, par_mean_active_listings_per_month, par_mean_volume_per_listing_per_month, par_mean_revenue_per_unit, par_sd_revenue_per_unit, par_margin_rate, par_actual_boost_effect):
    """
    """
    
    sim_active_listings_per_month = np.random.poisson(par_mean_active_listings_per_month, size=n_inter)
 
    for i in range(n_inter):
        n_relevant_listings_per_month = sim_active_listings_per_month[i].clip(max=max_listings)
        sim_volume_per_listing_per_month = [np.random.poisson(rate, size=n_months).sum() for rate, n_months in par_active_listings_per_month]
        
        np.random.poisson(par_mean_volume_per_listing_per_month, size=n_relevant_listings_per_month)
        
        par_mean_volume_per_listing_per_month
        par_mean_revenue_per_unit, par_sd_revenue_per_unit,
        par_margin_rate,
        par_actual_boost_effect


def sample_seller(obs_months_active, par_mean_active_listings_per_month, par_mean_volume_per_listing_per_month, par_mean_revenue_per_unit, par_sd_revenue_per_unit):
    """
    """
    
    par_active_listings_per_month = zip(par_mean_active_listings_per_month, obs_months_active)
    obs_active_listing_month_pairs = [np.random.poisson(rate, size=n_months).sum() for rate, n_months in par_active_listings_per_month]
    obs_active_listings_per_month = pl.Series(obs_active_listing_month_pairs) / obs_months_active

    par_volume_per_listing_per_month = zip(par_mean_volume_per_listing_per_month, obs_active_listing_month_pairs)
    obs_volume_total = [np.random.poisson(rate, size=n_listing_month_pairs).sum() for rate, n_listing_month_pairs in par_volume_per_listing_per_month]
    obs_volume_per_month = pl.Series(obs_volume_total) / obs_months_active

    par_revenue = zip(par_mean_revenue_per_unit, par_sd_revenue_per_unit, obs_volume_total)
    obs_revenue_total = [sample_lognormal_mean_sd(mean, sd, size=n_volume_total).sum() for mean, sd, n_volume_total in par_revenue]
    obs_revenue_per_month = pl.Series(obs_revenue_total) / obs_months_active

    return pl.DataFrame({
        'obs_listings_per_month': obs_active_listings_per_month,
        'obs_volume_per_month': obs_volume_per_month,
        'obs_revenue_per_month': obs_revenue_per_month
    })
    
def sample_sellers(seller_data):
    """
    """
    obs_samples = seller_data.pipe(lambda s: sample_seller(
        s["obs_months_active"],
        s["par_mean_active_listings_per_month"],
        s["par_mean_volume_per_listing_per_month"],
        s["par_mean_revenue_per_unit"],
        s["par_sd_revenue_per_unit"]
    ))
    return pl.concat([seller_data, obs_samples], how="horizontal")

In [11]:
# Synthetic Data Generation
np.random.seed(42)
n_sellers = 10000

# User months active parameters
min_months_active = 3
max_months_active = 4 * 12

# Assume that the average margin is 10%; of course, it varies by seller - somewhat more realistically, we could specify one by listing category
avg_margin_rate = 0.15

# Assume that the average boost effect on revenue is +15%; of course here, too there must be lots of variance by category and individually
avg_boost_effect = 0.1

# Generate the seller parameters
seller_data = generate_sellers(n_sellers, min_months_active, max_months_active, avg_margin_rate, avg_boost_effect)

# Sample the seller observations
seller_data = sample_sellers(seller_data)

# Round to 1 decimal place for printing
seller_data.with_columns([
    pl.col(col).round(2).alias(col) 
    for col, dtype in zip(seller_data.columns, seller_data.dtypes)
    if dtype == pl.Float64
])

seller_id,obs_months_active,par_mean_active_listings_per_month,par_mean_volume_per_listing_per_month,par_mean_volume_per_month,par_mean_revenue_per_unit,par_sd_revenue_per_unit,par_actual_boost_effect,par_perceived_boost_effect,par_mean_revenue_per_month,par_margin_rate,par_mean_profit_per_month,par_mean_actual_boost_amount_per_month,par_mean_perceived_boost_amount_per_month,obs_listings_per_month,obs_volume_per_month,obs_revenue_per_month
i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
0,41,8.67,22.06,191.25,94.1,28.23,0.03,0.02,17997.82,0.01,127.99,0.19,0.12,9.44,204.59,19257.25
1,31,22.27,5.27,117.27,105.7,31.71,0.14,0.2,12395.35,0.05,651.71,7.75,11.26,21.61,114.35,12052.79
2,17,13.63,12.87,175.42,139.22,41.77,0.04,0.04,24421.76,0.01,262.22,0.67,0.6,12.41,157.0,21603.87
3,45,29.25,7.84,229.27,23.19,6.96,0.12,0.12,5316.09,0.04,188.18,0.95,0.95,29.6,235.78,5477.44
4,10,56.46,26.35,1487.66,87.87,26.36,0.12,0.13,130716.21,0.66,86700.53,70.77,77.71,58.0,1544.9,135857.17
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9995,18,12.91,4.41,56.9,33.68,10.1,0.22,0.28,1916.44,0.03,56.6,2.22,2.75,12.56,52.83,1792.79
9996,47,7.59,22.87,173.65,128.18,38.46,0.02,0.01,22258.85,0.08,1750.8,2.07,1.42,7.72,176.66,22570.05
9997,17,10.63,5.58,59.31,28.02,8.41,0.19,0.23,1661.68,0.06,105.01,3.38,4.1,10.59,57.88,1617.01
9998,3,7.99,8.91,71.25,164.45,49.34,0.07,0.1,11716.68,0.1,1133.11,11.8,15.79,6.67,66.0,10592.61


In [None]:
# determine the objective expected value of the package to the seller
objective_expected_value = seller_data['θ_actual_boost_amount_per_month']

# determine the subjective expected value of the package to the seller
subjective_expected_value = seller_data['θ_perceived_boost_amount_per_month']

# determine the willingness to pay based on the subjective expected value
seller_data['willingness_to_pay'] = subjective_expected_value

In [12]:
86700.53 * 0.12

10404.0636

In [None]:


## Assume sellers with more listings and higher item price gain more value
#seller_data['potential_value'] = (
#    0.5 * seller_data['listings'] + 0.05 * seller_data['avg_item_price'] + 2 * seller_data['past_promotions']
#)


In [None]:

## Assume sellers with more listings and higher item price gain more value
#seller_data['potential_value'] = (
#    0.5 * seller_data['listings'] + 0.05 * seller_data['avg_item_price'] + 2 * seller_data['past_promotions']
#)

## True WTP: linear in potential value + noise
#seller_data['true_wtp'] = seller_data['potential_value'] + np.random.normal(0, 2, n_sellers)

## ## Experiment Simulation
#price_points = np.array([5, 10, 15, 20, 25, 30])
#assigned_prices = np.random.choice(price_points, n_sellers)
#seller_data['price_offer'] = assigned_prices

## Purchase probability drops sigmoidally with price vs. WTP
#def purchase_prob(wtp, price):
#    return 1 / (1 + np.exp(price - wtp))

#seller_data['purchase_prob'] = purchase_prob(seller_data['true_wtp'], seller_data['price_offer'])
#seller_data['purchase'] = np.random.binomial(1, seller_data['purchase_prob'])

## Model Willingness to Pay (WTP) using Bayesian Logistic Regression


In [None]:
with pm.Model() as model:
    alpha = pm.Normal('alpha', 0, 5)
    beta_price = pm.Normal('beta_price', 0, 5)
    beta_listings = pm.Normal('beta_listings', 0, 1)
    beta_item_price = pm.Normal('beta_item_price', 0, 0.1)
    beta_past_promos = pm.Normal('beta_past_promos', 0, 1)

    mu = (
        alpha
        + beta_price * seller_data['price_offer']
        + beta_listings * seller_data['listings']
        + beta_item_price * seller_data['avg_item_price']
        + beta_past_promos * seller_data['past_promotions']
    )

    p = pm.math.sigmoid(mu)
    purchase_obs = pm.Bernoulli('purchase_obs', p=p, observed=seller_data['purchase'])

    trace = pm.sample(1000, tune=1000, target_accept=0.9, cores=2)


## Posterior Analysis

In [None]:
az.plot_trace(trace)
plt.show()

In [None]:
az.summary(trace, hdi_prob=0.95)

## Predict Optimal Prices

In [None]:
posterior_means = trace.posterior.mean(dim=("chain", "draw"))

alpha_est = posterior_means['alpha'].values.item()
beta_price_est = posterior_means['beta_price'].values.item()
beta_listings_est = posterior_means['beta_listings'].values.item()
beta_item_price_est = posterior_means['beta_item_price'].values.item()
beta_past_promos_est = posterior_means['beta_past_promos'].values.item()

def predict_prob(price, listings, avg_item_price, past_promotions):
    mu = (
        alpha_est
        + beta_price_est * price
        + beta_listings_est * listings
        + beta_item_price_est * avg_item_price
        + beta_past_promos_est * past_promotions
    )
    return 1 / (1 + np.exp(-mu))

# Predict take rate and revenue at each price point for a sample seller
sample_seller = seller_data.iloc[0]
price_grid = np.linspace(5, 30, 50)
probs = [
    predict_prob(price, sample_seller['listings'], sample_seller['avg_item_price'], sample_seller['past_promotions'])
    for price in price_grid
]
revenue = price_grid * np.array(probs)

In [None]:

plt.plot(price_grid, revenue)
plt.xlabel('Price')
plt.ylabel('Expected Revenue')
plt.title('Optimal Pricing Curve for Sample Seller')
plt.show()

In [None]:
# ## Appendix / TODO
# - Explore heterogeneous treatment effects: model interactions with seller features
# - Segment sellers by value and compute personalized price points
# - Add auction dynamics for limited promoted slots
# - Simulate market competition effects
# - Implement doubly robust causal estimators to correct for potential selection bias
# - Build a dashboard/report with MyST or Voila for stakeholder presentation
