I plan on targeting all segments except for segments 3,12, and 27 based on their E[p|x,m] being under the threshold of .002. After multiplying our m_s values by 100 to account for the full size of the segment given that the m_s column makes up only 1% of the segment it belongs to, the total amount of mailers to be sent out is 1,270,800. It will cost $254,160 to run the campaign.

In [None]:
# Import block
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.optimize import minimize
from scipy.optimize import Bounds
from scipy.stats import binom
from math import comb
import scipy.special as sc

In [None]:
def nbetabinomLL(par, data):
    a = par[0]
    b = par[1]
    x = list(data['x_s'])
    m = list(data['m_s'])
    logbeta_ab = (math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b))
    ll_sum = 0
    for idx in range(len(df)):
        ll_sum = ll_sum + (math.lgamma(a + x[idx]) + math.lgamma(b + m[idx] - x[idx]) - math.lgamma(a + b + m[idx]))\
        + math.log(math.comb(m[idx], x[idx])) - logbeta_ab    
    return -ll_sum

def optimizeBB(data):
    bnds = ((0.0001,None),(0.0001,None)) 
    res = minimize(nbetabinomLL, (1,1), (data), method="nelder-mead",
                   options={'xatol': 1e-08, 'disp': True},bounds=bnds)
    print('MLE for a,b is', res.x, 'and LL is',-nbetabinomLL(res.x,data))
    return res

def betabinomial(row):
    ldenom = math.exp(math.lgamma(a)+math.lgamma(b)-math.lgamma(a+b))
    return comb(row["m_s"],row["x_s"]) * math.exp(sc.gammaln([a + row["x_s"]]) + 
                                                  sc.gammaln([b + row["m_s"] - 
                                                              row["x_s"]]) - 
                                                  sc.gammaln([a + b + row["m_s"]]))/ldenom

def expected(row):
    return (a + row['x_s'])/(a + b + row['m_s'])

def full_seg(row):
    return row['m_s'] * 100

In [None]:
#First read in the dataset and assign its contents to a pandas dataframe
df = pd.read_csv('Targeting.csv')

#Takes x over m, outputs it to Test RR
df['Test RR'] = df['x_s']/df['m_s']

#Variables for our specific problem
mailing_cost = 2000/10000
margin = 100
threshold = mailing_cost/margin
total_mailers = sum(df['m_s'])
total_cost = mailing_cost * total_mailers
total_sales = sum(df['x_s'])
profit = total_sales * margin
probability = .1

#Initializing a and b as 1 so that they can be global variables for our program
#and I'll update them after optimization
a = 1
b = 1
b_ab = math.exp(sc.gammaln([a]) + sc.gammaln([b]) - sc.gammaln([a+b]))

In [None]:
print('Threshold: ' , threshold)
print('Probability: ', probability)
print('Mailing Cost: ', mailing_cost)
print('Total Cost:', total_cost)
print('Total Sales: ', total_sales)
print('Profit: ', profit)

In [None]:
df['Initial Rollout'] = df['Test RR'] > threshold

In [None]:
res = optimizeBB(df)

In [None]:
#Updating a and b before applying the betabinomial function
a = res.x[0]
b = res.x[1]
a, b

In [None]:
#Creating column containing the probability of observing the 
#Test RR in the rest of the segment
df["P(X=x_s|m_s)"] = df.apply(betabinomial, axis=1)

In [None]:
#Log Likelihood column
df['LL'] = np.log(df['P(X=x_s|m_s)'])

In [None]:
#Maximized log likelihood
total_LL = sum(df['LL'])
total_LL

In [None]:
#Average response for the whole dataset. Does not
#account for heterogeneity
avg_r_prob = a/(a+b)
avg_r_prob

In [None]:
#This will end up being our denominator value for the next column
b_ab = math.exp(sc.gammaln([a]) + sc.gammaln([b]) - sc.gammaln([a+b]))
b_ab

In [None]:
#Expected probability, includes heterogeneity
df['E[p|x,m]'] = df.apply(expected, axis = 1)

In [None]:
#Column of which segments we'll roll out to given
#our best effort at predicting segment behavior
df['New Rollout'] = df['E[p|x,m]'] > threshold

In [None]:
#New dataframe so as not to mutate the df dataframe
#containing only the segments that we plan to roll
#out to
rslt_df = df.loc[(df['New Rollout'] == True)]

In [None]:
#We only had 1% of the segment's population, so to 
#calculate how many mailers we'll be sending we need
#to multiply that value times 100
rslt_df['mailers'] = df.apply(full_seg, axis = 1)

In [None]:
rslt_df

In [None]:
total_mailers = sum(rslt_df['mailers'])
total_mailers

In [None]:
total_cost = total_mailers * mailing_cost
total_cost