In [1]:
import pandas as pd
import numpy as np
import pulp

## Parameters

- Pa = Profit when customer accept campaign
- Pi = Probabily of a customer accept the campaign
- Cc = Contact cost
- Bu = Budget

## Decision Variable

- Xi = binary variable that says whether the customer should be a contact or not, takes the values 1 or 0.

## Objective Function 

Estimated Profit = $\sum$(Xi $\times$ Pa $\times$ Pi) - $\sum$(Xi $\times$ Cc)

## Constrain

Bu $\geqslant$ $\sum$(Xi $\times$ Cc)

In [2]:
## Configs

# paths
data_path = '../02_data/processed/02_customer_acceptance_prob.csv'
save_path = '../02_data/output/customer_selection.csv'
save_model = 

In [3]:
# Importing Data
data = pd.read_csv(data_path)
data.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response,Cluster,acceptance_prob
0,5524,1957,Graduation,Single,58138.0,0,0,2012-09-04,58,635,...,0,0,0,0,0,3,11,1,1,0.422973
1,2174,1954,Graduation,Single,46344.0,1,1,2014-03-08,38,11,...,0,0,0,0,0,3,11,0,2,0.004031
2,4141,1965,Graduation,Together,71613.0,0,0,2013-08-21,26,426,...,0,0,0,0,0,3,11,0,4,0.02672
3,6182,1984,Graduation,Together,26646.0,1,0,2014-02-10,26,11,...,0,0,0,0,0,3,11,0,2,0.009109
4,5324,1981,PhD,Married,58293.0,1,0,2014-01-19,94,173,...,0,0,0,0,0,3,11,0,5,0.01491


In [4]:
# Creating a Optimizing object:

class CustomerSelection:
    
    def __init__(self, dataframe, unit_cost, unit_profit, budget):
        
        self.dataframe = dataframe
        self.unit_cost = unit_cost
        self.unit_profit = unit_profit
        self.budget = budget
        self.problem = None
        self.decision_variables = []
        self.total_customers = ""
        self.total_cost = ""
        
    def definie_problem(self): # Defining problem
        self.problem = pulp.LpProblem('SelectingBestCustomers', pulp.LpMaximize)
        
    def create_decision_variables(self): # Creating decision variables
        for rownum, row in self.dataframe.iterrows():
            variablestr = str('x' + str(rownum)) # Create naming of variables
            variable = pulp.LpVariable(str(variablestr), lowBound = 0, upBound = 1, cat= 'Binary') # Make variables binary
            self.decision_variables.append(variable)
    
        print ("Total number of decision_variables: " + str(len(self.decision_variables)))
        
    def create_optimization_function(self): # Create optimization function
        for rownum, row in self.dataframe.iterrows():
            for i, customer in enumerate(self.decision_variables):
                if rownum == i:
                    self.total_customers += (self.unit_profit*row['acceptance_prob'] - self.unit_cost)*customer 
            
        self.problem += self.total_customers
        
        print ("Optimization function: "+str(self.total_customers))
        
    def create_constrain(self): # Creating constrain - The budget must be grater then total contact cost
        for i, customer in enumerate(self.decision_variables):
            self.total_cost += self.unit_cost*customer
            
        self.problem += (self.budget >= self.total_cost)
        
        print('Constrain Created')
        
    def solve(self): # running optimization
        optimization_result = self.problem.solve()
        assert optimization_result == pulp.LpStatusOptimal
        print("Status:", pulp.LpStatus[self.problem.status])
        print("Optimal Solution to the problem: ", pulp.value(self.problem.objective))
        
    def filter_dataframe(self): # Storing decision variables result
        self.dataframe['selected_customer'] = 0

        for var in self.problem.variables():
            row_index = int(var.name[1:])
            self.dataframe.loc[row_index, 'selected_customer'] = var.varValue
        
        self.dataframe = self.dataframe[['ID', 'acceptance_prob' ,'selected_customer']]
        self.dataframe = self.dataframe[self.dataframe['selected_customer'] == 1]
        
        print('investing %i in the customers generated in the output dataframe, the estimated profit is %.2f' \
              %(self.dataframe.shape[0]*3, pulp.value(self.problem.objective)))
        
        return self.dataframe

In [5]:
optmizer = CustomerSelection(data, 3, 11 , 6720)
optmizer.definie_problem()
optmizer.create_decision_variables()

Total number of decision_variables: 2240


In [6]:
optmizer.create_optimization_function()

Optimization function: 1.6527029567762037*x0 - 2.9556585434312117*x1 - 2.3014088013180443*x10 - 2.6928937839164098*x100 + 7.345317977161283*x1000 + 7.381326708539817*x1001 + 7.100588178956594*x1002 - 2.586116623213067*x1003 - 2.631178605503602*x1004 + 2.5355595281673846*x1005 - 2.8893073717739948*x1006 - 0.49056845582271924*x1007 - 1.2042398441873177*x1008 - 2.881176547062271*x1009 - 2.944443588313192*x101 + 7.324251286987124*x1010 - 2.9808589610180065*x1011 - 2.2298365290180664*x1012 - 1.8709590316768068*x1013 - 2.8798539525603775*x1014 - 2.9249541032744224*x1015 + 0.039463430253088294*x1016 - 2.950134664701407*x1017 - 1.9424494475664489*x1018 - 2.6666979067520686*x1019 - 2.8277431498215133*x102 - 2.9133618121068134*x1020 - 2.8057655663990966*x1021 - 2.8066310272533634*x1022 - 2.976236189983385*x1023 - 2.4776580773855477*x1024 - 2.4404560335912584*x1025 + 1.126570630861445*x1026 - 2.8924199852698242*x1027 - 2.5146125280317233*x1028 - 2.725390361110017*x1029 - 2.617239714875245*x103 + 

In [7]:
optmizer.create_constrain()
optmizer.solve()

Constrain Created
Status: Optimal
Optimal Solution to the problem:  1492.1256252381909


In [8]:
final_df = optmizer.filter_dataframe()

investing 1059 in the customers generated in the output dataframe, the estimated profit is 1492.13


In [9]:
final_df

Unnamed: 0,ID,acceptance_prob,selected_customer
0,5524,0.422973,1
9,5899,0.347743,1
15,2114,0.943818,1
21,5376,0.940550,1
27,5255,0.468359,1
...,...,...,...
2221,7366,0.878084,1
2224,4286,0.382029,1
2228,8720,0.447184,1
2230,7004,0.383014,1


In [10]:
final_df.to_csv(save_path, index=False)