In [1]:
from pulp import *
import pandas as pd
import numpy as np
import itertools

In [9]:
human = pd.read_csv("datasets/simple_hr/hr_data - human.csv")
section = pd.read_csv("datasets/simple_hr/hr_data - section.csv")

In [10]:
human.head()

Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d
0,1,5,1,1,0,0
1,2,3,0,0,1,0
2,3,4,0,1,0,1
3,4,2,1,0,0,0
4,5,5,0,0,1,1


In [11]:
section.head()

Unnamed: 0,id,num,skill_a,skill_b,skill_c,skill_d,dist
0,1,5,1,0,0,0,1
1,2,5,0,1,0,0,-1
2,3,4,0,0,1,0,1
3,4,3,0,0,0,1,-1
4,5,3,0,1,0,0,-1


In [12]:
def to_uni_vec( x ):
    dist = np.sqrt(x.skill_a + x.skill_b + x.skill_c + x.skill_d)
    vec = {
        "skill_a_uni" : x.skill_a/dist, 
        "skill_b_uni" : x.skill_b/dist, 
        "skill_c_uni" : x.skill_c/dist, 
        "skill_d_uni" : x.skill_d/dist, 
    }
    return pd.Series(vec)

human = pd.concat( [human, human.apply( to_uni_vec, axis=1 )], axis=1 )
human.head()

Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni
0,1,5,1,1,0,0,0.707107,0.707107,0.0,0.0
1,2,3,0,0,1,0,0.0,0.0,1.0,0.0
2,3,4,0,1,0,1,0.0,0.707107,0.0,0.707107
3,4,2,1,0,0,0,1.0,0.0,0.0,0.0
4,5,5,0,0,1,1,0.0,0.0,0.707107,0.707107


In [13]:
section = pd.concat( [section, section.apply( to_uni_vec, axis=1 )], axis=1 )
section.head()

Unnamed: 0,id,num,skill_a,skill_b,skill_c,skill_d,dist,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni
0,1,5,1,0,0,0,1,1.0,0.0,0.0,0.0
1,2,5,0,1,0,0,-1,0.0,1.0,0.0,0.0
2,3,4,0,0,1,0,1,0.0,0.0,1.0,0.0
3,4,3,0,0,0,1,-1,0.0,0.0,0.0,1.0
4,5,3,0,1,0,0,-1,0.0,1.0,0.0,0.0


In [14]:
def calc_cos_sim(x, section):
    vec = {}
    for index, row in section.iterrows():
        cos = 1 - (x.skill_a_uni * row.skill_a_uni + x.skill_b_uni * row.skill_b_uni + x.skill_c_uni * row.skill_c_uni +x.skill_d_uni * row.skill_d_uni)
        vec["cos_%d"%row.id] = cos
    return  pd.Series(vec)

In [15]:
human = pd.concat( [human, human.apply(lambda x: calc_cos_sim(x, section), axis=1 )], axis=1 )
human.head()

Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni,cos_1,cos_2,cos_3,cos_4,cos_5
0,1,5,1,1,0,0,0.707107,0.707107,0.0,0.0,0.292893,0.292893,1.0,1.0,0.292893
1,2,3,0,0,1,0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0
2,3,4,0,1,0,1,0.0,0.707107,0.0,0.707107,1.0,0.292893,1.0,0.292893,0.292893
3,4,2,1,0,0,0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
4,5,5,0,0,1,1,0.0,0.0,0.707107,0.707107,1.0,1.0,0.292893,0.292893,1.0


In [16]:
m = LpProblem() # 部署ごとのコストを最小化する
for i, row in section.iterrows():
    human["section_%d"%row.id] = [LpVariable("v%d_%d"%(row.id,j), cat="Integer", upBound=1, lowBound=0) for j in human.index]

In [20]:
# 最小化
cost = None
for i, row in section.iterrows():
    cost += lpDot(human.cost, human["section_%d"%row.id])
    
#     スキルの中心点Vecを求める
    for skill in ["a", "b", "c", "d"]:
        skill_center = lpDot(human["skill_"+skill+"_uni"], human["section_%d"%row.id]) / row.num
        # 中心点が近い
        cost +=  -skill_center * row["skill_"+skill+"_uni"]
    cost += 1 # cos類似度Max
    # 分散を最小化 1=収束、 -1=発散
    cost += row.dist * lpDot(human["cos_%d"%row.id], human["section_%d"%row.id])
    
m+= cost

In [22]:
for index, row in section.iterrows():
    m += lpSum(human["section_%d"%row.id])==row.num

In [23]:
for index, row in human.iterrows():
    m += (row.section_1+row.section_2+row.section_3+row.section_4+row.section_5)==1

In [24]:
pulp.LpStatus[m.solve()]

'Optimal'

In [25]:
print(m)

NoName:
MINIMIZE
5.15147186258*v1_0 + 4.0*v1_1 + 5.0*v1_10 + 4.0*v1_11 + 3.0*v1_12 + 5.0*v1_13 + 7.30717967697*v1_14 + 2.8*v1_15 + 3.0*v1_16 + 5.15147186258*v1_17 + 7.0*v1_18 + 4.15147186258*v1_19 + 5.0*v1_2 + 1.8*v1_3 + 6.0*v1_4 + 0.8*v1_5 + 3.0*v1_6 + 4.0*v1_7 + 7.0*v1_8 + 1.8*v1_9 + 4.56568542495*v2_0 + 2.0*v2_1 + 3.8*v2_10 + 2.0*v2_11 + 1.0*v2_12 + 3.0*v2_13 + 6.46188021535*v2_14 + 2.0*v2_15 + 1.0*v2_16 + 4.0*v2_17 + 5.56568542495*v2_18 + 3.0*v2_19 + 3.56568542495*v2_2 + 1.0*v2_3 + 4.0*v2_4 + 0.0*v2_5 + 1.0*v2_6 + 2.0*v2_7 + 5.46188021535*v2_8 + 1.0*v2_9 + 6.0*v3_0 + 2.75*v3_1 + 5.0*v3_10 + 4.0*v3_11 + 3.0*v3_12 + 3.75*v3_13 + 8.0*v3_14 + 4.0*v3_15 + 1.75*v3_16 + 5.11611652352*v3_17 + 7.0*v3_18 + 5.0*v3_19 + 5.0*v3_2 + 3.0*v3_3 + 5.11611652352*v3_4 + 2.0*v3_5 + 1.75*v3_6 + 4.0*v3_7 + 6.27831216351*v3_8 + 3.0*v3_9 + 4.0*v4_0 + 2.0*v4_1 + 3.0*v4_10 + 2.66666666667*v4_11 + 1.66666666667*v4_12 + 3.0*v4_13 + 6.38490017946*v4_14 + 2.0*v4_15 + 1.0*v4_16 + 4.0*v4_17 + 5.47140452079*v4_18 +

In [27]:
human['section_1_value'] = human.section_1.apply(value)
human['section_2_value'] = human.section_2.apply(value)
human['section_3_value'] = human.section_3.apply(value)
human['section_4_value'] = human.section_4.apply(value)
human['section_5_value'] = human.section_5.apply(value)

In [28]:
section

Unnamed: 0,id,num,skill_a,skill_b,skill_c,skill_d,dist,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni
0,1,5,1,0,0,0,1,1.0,0.0,0.0,0.0
1,2,5,0,1,0,0,-1,0.0,1.0,0.0,0.0
2,3,4,0,0,1,0,1,0.0,0.0,1.0,0.0
3,4,3,0,0,0,1,-1,0.0,0.0,0.0,1.0
4,5,3,0,1,0,0,-1,0.0,1.0,0.0,0.0


In [36]:
def calc_center(section_num):
    a,b,c,d = 0,0,0,0
    for index, row in human[human["section_%d_value"%section_num]==1].iterrows():
        a += row.skill_a_uni
        b += row.skill_b_uni
        c += row.skill_c_uni
        d += row.skill_d_uni
    a,b,c,d = a/5, b/5, c/5, d/5
    print([a,b,c,d])

In [37]:
calc_center(1)
human[human.section_1_value==1]

[0.9154700538379252, 0.11547005383792516, 0.0, 0.11547005383792516]


Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni,...,section_1,section_2,section_3,section_4,section_5,section_1_value,section_2_value,section_3_value,section_4_value,section_5_value
3,4,2,1,0,0,0,1.0,0.0,0.0,0.0,...,v1_3,v2_3,v3_3,v4_3,v5_3,1.0,0.0,0.0,0.0,0.0
5,6,1,1,0,0,0,1.0,0.0,0.0,0.0,...,v1_5,v2_5,v3_5,v4_5,v5_5,1.0,0.0,0.0,0.0,0.0
9,10,2,1,0,0,0,1.0,0.0,0.0,0.0,...,v1_9,v2_9,v3_9,v4_9,v5_9,1.0,0.0,0.0,0.0,0.0
14,15,7,1,1,0,1,0.57735,0.57735,0.0,0.57735,...,v1_14,v2_14,v3_14,v4_14,v5_14,1.0,0.0,0.0,0.0,0.0
15,16,3,1,0,0,0,1.0,0.0,0.0,0.0,...,v1_15,v2_15,v3_15,v4_15,v5_15,1.0,0.0,0.0,0.0,0.0


In [38]:
calc_center(2)
human[human.section_2_value==1]

[0.282842712474619, 0.0, 0.282842712474619, 0.682842712474619]


Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni,...,section_1,section_2,section_3,section_4,section_5,section_1_value,section_2_value,section_3_value,section_4_value,section_5_value
4,5,5,0,0,1,1,0.0,0.0,0.707107,0.707107,...,v1_4,v2_4,v3_4,v4_4,v5_4,0.0,1.0,0.0,0.0,0.0
7,8,3,0,0,0,1,0.0,0.0,0.0,1.0,...,v1_7,v2_7,v3_7,v4_7,v5_7,0.0,1.0,0.0,0.0,0.0
11,12,3,0,0,0,1,0.0,0.0,0.0,1.0,...,v1_11,v2_11,v3_11,v4_11,v5_11,0.0,1.0,0.0,0.0,0.0
17,18,5,1,0,1,0,0.707107,0.0,0.707107,0.0,...,v1_17,v2_17,v3_17,v4_17,v5_17,0.0,1.0,0.0,0.0,0.0
19,20,4,1,0,0,1,0.707107,0.0,0.0,0.707107,...,v1_19,v2_19,v3_19,v4_19,v5_19,0.0,1.0,0.0,0.0,0.0


In [39]:
calc_center(3)
human[human.section_3_value==1]

[0.0, 0.0, 0.8, 0.0]


Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni,...,section_1,section_2,section_3,section_4,section_5,section_1_value,section_2_value,section_3_value,section_4_value,section_5_value
1,2,3,0,0,1,0,0.0,0.0,1.0,0.0,...,v1_1,v2_1,v3_1,v4_1,v5_1,0.0,0.0,1.0,0.0,0.0
6,7,2,0,0,1,0,0.0,0.0,1.0,0.0,...,v1_6,v2_6,v3_6,v4_6,v5_6,0.0,0.0,1.0,0.0,0.0
13,14,4,0,0,1,0,0.0,0.0,1.0,0.0,...,v1_13,v2_13,v3_13,v4_13,v5_13,0.0,0.0,1.0,0.0,0.0
16,17,2,0,0,1,0,0.0,0.0,1.0,0.0,...,v1_16,v2_16,v3_16,v4_16,v5_16,0.0,0.0,1.0,0.0,0.0


In [40]:
calc_center(4)
human[human.section_4_value==1]

[0.1414213562373095, 0.45689141007523465, 0.11547005383792516, 0.11547005383792516]


Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni,...,section_1,section_2,section_3,section_4,section_5,section_1_value,section_2_value,section_3_value,section_4_value,section_5_value
0,1,5,1,1,0,0,0.707107,0.707107,0.0,0.0,...,v1_0,v2_0,v3_0,v4_0,v5_0,0.0,0.0,0.0,1.0,0.0
8,9,6,0,1,1,1,0.0,0.57735,0.57735,0.57735,...,v1_8,v2_8,v3_8,v4_8,v5_8,0.0,0.0,0.0,1.0,0.0
10,11,4,0,1,0,0,0.0,1.0,0.0,0.0,...,v1_10,v2_10,v3_10,v4_10,v5_10,0.0,0.0,0.0,1.0,0.0


In [41]:
calc_center(5)
human[human.section_5_value==1]

[0.0, 0.282842712474619, 0.0, 0.48284271247461896]


Unnamed: 0,id,cost,skill_a,skill_b,skill_c,skill_d,skill_a_uni,skill_b_uni,skill_c_uni,skill_d_uni,...,section_1,section_2,section_3,section_4,section_5,section_1_value,section_2_value,section_3_value,section_4_value,section_5_value
2,3,4,0,1,0,1,0.0,0.707107,0.0,0.707107,...,v1_2,v2_2,v3_2,v4_2,v5_2,0.0,0.0,0.0,0.0,1.0
12,13,2,0,0,0,1,0.0,0.0,0.0,1.0,...,v1_12,v2_12,v3_12,v4_12,v5_12,0.0,0.0,0.0,0.0,1.0
18,19,6,0,1,0,1,0.0,0.707107,0.0,0.707107,...,v1_18,v2_18,v3_18,v4_18,v5_18,0.0,0.0,0.0,0.0,1.0
