In [1]:
import pandas as pd
import numpy as np
import pulp

## Parameters

- Pa = Profit when customer accept campaign
- Pi = Probabily of a customer accept the campaign
- Cc = Contact cost
- Bu = Budget

## Decision Variable

- Xi = binary variable that says whether the customer should be a contact or not, takes the values 1 or 0.

## Objective Function 

Estimated Profit = $\sum$(Xi $\times$ Pa $\times$ Pi) - $\sum$(Xi $\times$ Cc)

## Constrain

Bu $\geqslant$ $\sum$(Xi $\times$ Cc)

In [2]:
## Configs

# paths
data_path = '../02_data/processed/02_customer_acceptance_prob.csv'
save_path = '../02_data/output/customer_selection.csv'

In [3]:
# Importing Data
data = pd.read_csv(data_path)
data.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response,Cluster,acceptance_prob
0,5524,1957,Graduation,Single,58138.0,0,0,2012-09-04,58,635,...,0,0,0,0,0,3,11,1,2,0.416709
1,2174,1954,Graduation,Single,46344.0,1,1,2014-03-08,38,11,...,0,0,0,0,0,3,11,0,5,0.010446
2,4141,1965,Graduation,Together,71613.0,0,0,2013-08-21,26,426,...,0,0,0,0,0,3,11,0,1,0.019209
3,6182,1984,Graduation,Together,26646.0,1,0,2014-02-10,26,11,...,0,0,0,0,0,3,11,0,5,0.01035
4,5324,1981,PhD,Married,58293.0,1,0,2014-01-19,94,173,...,0,0,0,0,0,3,11,0,3,0.002892


In [4]:
# Creating a Optimizing object:

class CustomerSelection:
    
    def __init__(self, dataframe, unit_cost, unit_profit, budget):
        
        self.dataframe = dataframe
        self.unit_cost = unit_cost
        self.unit_profit = unit_profit
        self.budget = budget
        self.problem = None
        self.decision_variables = []
        self.total_customers = ""
        self.total_cost = ""
        
    def definie_problem(self): # Defining problem
        self.problem = pulp.LpProblem('SelectingBestCustomers', pulp.LpMaximize)
        
    def create_decision_variables(self): # Creating decision variables
        for rownum, row in self.dataframe.iterrows():
            variablestr = str('x' + str(rownum)) # Create naming of variables
            variable = pulp.LpVariable(str(variablestr), lowBound = 0, upBound = 1, cat= 'Binary') # Make variables binary
            self.decision_variables.append(variable)
    
        print ("Total number of decision_variables: " + str(len(self.decision_variables)))
        
    def create_optimization_function(self): # Create optimization function
        for rownum, row in self.dataframe.iterrows():
            for i, customer in enumerate(self.decision_variables):
                if rownum == i:
                    self.total_customers += (self.unit_profit*row['acceptance_prob'] - self.unit_cost)*customer 
            
        self.problem += self.total_customers
        
        print ("Optimization function created")
        
    def create_constrain(self): # Creating constrain - The budget must be grater then total contact cost
        for i, customer in enumerate(self.decision_variables):
            self.total_cost += self.unit_cost*customer
            
        self.problem += (self.budget >= self.total_cost)
        
        print('Constrain Created')
        
    def solve(self): # running optimization
        optimization_result = self.problem.solve()
        assert optimization_result == pulp.LpStatusOptimal
        print("Status:", pulp.LpStatus[self.problem.status])
        print("Optimal Solution to the problem: ", pulp.value(self.problem.objective))
        
    def filter_dataframe(self): # Storing decision variables result
        self.dataframe['selected_customer'] = 0

        for var in self.problem.variables():
            row_index = int(var.name[1:])
            self.dataframe.loc[row_index, 'selected_customer'] = var.varValue
        
        self.dataframe = self.dataframe[['ID', 'acceptance_prob' ,'selected_customer']]
        self.dataframe = self.dataframe[self.dataframe['selected_customer'] == 1]
        
        print('investing %i in the customers generated in the output dataframe, the estimated profit is %.2f' \
              %(self.dataframe.shape[0]*3, pulp.value(self.problem.objective)))
        
        return self.dataframe

In [12]:
optmizer = CustomerSelection(data, 3, 11 , 6720)
optmizer.definie_problem()
optmizer.create_decision_variables()
optmizer.create_optimization_function()
optmizer.create_constrain()
optmizer.solve()

Total number of decision_variables: 2240
Optimization function created
Constrain Created
Status: Optimal
Optimal Solution to the problem:  1469.299935956628


In [13]:
final_df = optmizer.filter_dataframe()

investing 1008 in the customers generated in the output dataframe, the estimated profit is 1469.30


In [14]:
final_df

Unnamed: 0,ID,acceptance_prob,selected_customer
0,5524,0.416709,1
9,5899,0.552721,1
15,2114,0.905036,1
21,5376,0.954095,1
29,1966,0.531422,1
...,...,...,...
2221,7366,0.917414,1
2224,4286,0.388461,1
2228,8720,0.744309,1
2230,7004,0.295380,1


In [9]:
final_df.to_csv(save_path, index=False)