In [1]:
# !pip install docplex
# !pip install pulp

In [2]:
import pandas as pd
import scipy
import sklearn
import numpy as np
import pickle
import setuptools
import wheel
import matplotlib.pyplot as plt
import docplex.mp.model as cpx
import pulp as plp
import random

In [3]:
#import raw data and convert rank to scoring
mt_rank = pd.read_csv('PAIR_2018_INPUT_IDENTITY_ADJUSTED.csv').fillna(0)
mt_scoring = mt_rank.copy()
scoring_transform = pd.DataFrame({'rank':[1,2,3,4,5,6,7,8],'score':[6,3,2,1,0,0,0,0]})
mt_scoring.replace(scoring_transform.set_index('rank').score.to_dict(),inplace=True)

In [4]:
#Pair within each room
mt_scoring['RoomA'] = mt_scoring['PAIR1']+mt_scoring['PAIR2']+mt_scoring['PAIR4']+mt_scoring['PAIR6']+mt_scoring['PAIR12']
mt_scoring['RoomB'] = mt_scoring['PAIR7']+mt_scoring['PAIR10']+mt_scoring['PAIR13']+mt_scoring['PAIR18']
mt_scoring['RoomC'] = mt_scoring['PAIR8']+mt_scoring['PAIR9']+mt_scoring['PAIR15']+mt_scoring['PAIR17']
mt_scoring['RoomD'] = mt_scoring['PAIR3']+mt_scoring['PAIR5']+mt_scoring['PAIR11']+mt_scoring['PAIR14']+mt_scoring['PAIR16']

In [5]:
#mt_scoring

In [6]:
dset = mt_scoring[['CANDIDATE_ID','RoomA','RoomB','RoomC','RoomD']]

In [7]:
dset.head() # shape == [candidate, room]

Unnamed: 0,CANDIDATE_ID,RoomA,RoomB,RoomC,RoomD
0,1001,6.0,0.0,6.0,0.0
1,1002,2.0,0.0,9.0,1.0
2,1003,7.0,2.0,0.0,3.0
3,1004,5.0,0.0,7.0,0.0
4,1005,0.0,4.0,0.0,8.0


In [8]:
n = len(dset) #No. of Candidates
m = 4 #No. of room
set_I = range(0, n)
set_J = range(1, m+1)

In [9]:
rank = {(i,j): dset.iloc[i,j] for i in set_I for j in set_J} # i == candidate's index, j == room's index
l = {(i,j): 0 for i in set_I for j in set_J}
u = {(i,j): 1 for i in set_I for j in set_J}
a = {(i,j): 1 for i in set_I for j in set_J}

# maximum number of candidates in each room
b = {1: 30, 2: 24, 3: 24, 4: 30}
b2 = {1: 25, 2: 20, 3: 20, 4: 25}

# b3 == everyone joins only one room
b3 = {(i): 1 for i in set_I}

# satisfaction score in each tier of candidates (sort by application score)
b4 = {(i): 6 for i in set_I[0:25]}
b4.update({(i): 5 for i in set_I[25:50]})
b4.update({(i): 4 for i in set_I[50:75]})
b4.update({(i): 3 for i in set_I[75:105]})

In [10]:
#transform rank in to score
c = {}
for key in rank:
    c[key] = rank[key]

In [11]:
opt_model = plp.LpProblem(name="MIP_Model")

In [12]:
#assign lower bound and upper bound to decision variable (0, 1)
x_vars  = {(i,j):
plp.LpVariable(cat=plp.LpInteger,lowBound=l[i,j],upBound= u[i,j],name="x_{0}_{1}".format(i,j)) for i in set_I for j in set_J}

In [13]:
#Room minimum and maximum candidate constraint 
constraints = {j : opt_model.addConstraint(plp.LpConstraint(e=plp.lpSum(a[i,j] * x_vars[i,j] for i in set_I),sense=plp.LpConstraintLE,rhs=b[j],name="constraint_{0}".format(j)))for j in set_J}
constraints = {j : opt_model.addConstraint(plp.LpConstraint(e=plp.lpSum(a[i,j] * x_vars[i,j] for i in set_I),sense=plp.LpConstraintGE,rhs=b2[j],name="constraint2_{0}".format(j)))for j in set_J}

In [14]:
#candidate must join only one room
constraints = {i : opt_model.addConstraint(plp.LpConstraint(e=plp.lpSum(a[i,j] * x_vars[i,j] for j in set_J),sense=plp.LpConstraintEQ,rhs=b3[i],name="constraint3_{0}".format(i)))for i in set_I}

In [15]:
#minimum score
constraints = {i : opt_model.addConstraint(plp.LpConstraint(e=plp.lpSum(c[i,j] * x_vars[i,j] for j in set_J),sense=plp.LpConstraintGE,rhs=b4[i],name="constraint4_{0}".format(i)))for i in set_I}

In [16]:
objective = plp.lpSum(x_vars[i,j] * c[i,j] for i in set_I for j in set_J)
# for maximization
opt_model.sense = plp.LpMaximize
opt_model.setObjective(objective)

In [17]:
opt_model.solve()

1

In [18]:
# Total satisfaction score

summary = []
for i in set_I:
    for j in set_J:
        summary.append(plp.value(x_vars[i,j]*c[i,j]))
sum(summary)  

809.0

In [19]:
plp.LpStatus[opt_model.status]

'Optimal'

In [20]:
opt_df = pd.DataFrame.from_dict(x_vars, orient="index", 
                                columns = ["variable_object"])
opt_df.index = pd.MultiIndex.from_tuples(opt_df.index,names=["column_i", "column_j"])
opt_df.reset_index(inplace=True)

# PuLP
opt_df["solution_value"] = opt_df["variable_object"].apply(lambda item: item.varValue)
    

#opt_df.drop(columns=["variable_object"], inplace=True)
#opt_df.to_csv("./optimization_solution.csv")

In [21]:
#pd.set_option('display.max_rows', 150)
opt_df.pivot(index='column_i',columns='column_j',values='solution_value')

column_j,1,2,3,4
column_i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,1.0
5,0.0,0.0,1.0,0.0
6,1.0,0.0,0.0,0.0
7,0.0,1.0,0.0,0.0
8,0.0,1.0,0.0,0.0
9,0.0,0.0,1.0,0.0


In [22]:
opt_model

MIP_Model:
MAXIMIZE
6.0*x_0_1 + 6.0*x_0_3 + 1.0*x_100_1 + 2.0*x_100_2 + 9.0*x_100_4 + 8.0*x_101_1 + 1.0*x_101_2 + 3.0*x_101_3 + 1.0*x_102_1 + 6.0*x_102_2 + 5.0*x_102_4 + 6.0*x_103_2 + 4.0*x_103_3 + 2.0*x_103_4 + 8.0*x_104_1 + 3.0*x_104_2 + 1.0*x_104_3 + 9.0*x_10_1 + 1.0*x_10_2 + 2.0*x_10_4 + 4.0*x_11_1 + 2.0*x_11_2 + 6.0*x_11_3 + 5.0*x_12_1 + 7.0*x_12_2 + 2.0*x_13_1 + 1.0*x_13_3 + 9.0*x_13_4 + 3.0*x_14_2 + 9.0*x_14_3 + 6.0*x_15_1 + 4.0*x_15_2 + 2.0*x_15_3 + 2.0*x_16_1 + 9.0*x_16_2 + 1.0*x_16_4 + 4.0*x_17_1 + 8.0*x_17_2 + 9.0*x_18_1 + 2.0*x_18_2 + 1.0*x_18_3 + 3.0*x_19_1 + 9.0*x_19_3 + 2.0*x_1_1 + 9.0*x_1_3 + 1.0*x_1_4 + 7.0*x_20_1 + 5.0*x_20_3 + 6.0*x_21_1 + 2.0*x_21_2 + 4.0*x_21_3 + 5.0*x_22_1 + 6.0*x_22_2 + 1.0*x_22_4 + 9.0*x_23_2 + 3.0*x_23_3 + 8.0*x_24_1 + 3.0*x_24_3 + 1.0*x_24_4 + 1.0*x_25_1 + 6.0*x_25_3 + 5.0*x_25_4 + 2.0*x_26_2 + 10.0*x_26_4 + 4.0*x_27_2 + 8.0*x_27_4 + 7.0*x_28_2 + 5.0*x_28_4 + 11.0*x_29_1 + 1.0*x_29_3 + 7.0*x_2_1 + 2.0*x_2_2 + 3.0*x_2_4 + 8.0*x_30_2 + 4.0*x_30_

In [25]:
sum(summary)

809.0