In [51]:
import pandas as pd
import numpy as np
import networkx as nx
from networkx.algorithms import bipartite

In [2]:
DEBUG = True

MAIN_JOB_CSV_FILE = '../datasets/result3-1.csv'
MAIN_HUNTER_CSV_FILE = '../datasets/result3-2.csv'

job_frame = pd.read_csv(MAIN_JOB_CSV_FILE, encoding='GBK')
hunter_frame = pd.read_csv(MAIN_HUNTER_CSV_FILE, encoding='GBK')

In [3]:
job_frame.head(1)

Unnamed: 0,招聘信息 ID,求职者 ID,岗位匹配度
0,1561971141940215809,1468079045689344000,0.56


In [4]:
hunter_frame.head(1)

Unnamed: 0,求职者 ID,招聘信息 ID,公司名称,求职者满意度
0,1461534785997504512,1565240659105939456,彩讯科技股份有限公司,0.72


In [5]:
job_ids = job_frame['招聘信息 ID'].values.tolist() + hunter_frame['招聘信息 ID'].values.tolist()
job_ids = np.unique(job_ids)
job_ids_map = {job_id: index for index, job_id in enumerate(job_ids)}
len(job_ids_map)

76

In [6]:
hunter_ids = job_frame['求职者 ID'].values.tolist() + hunter_frame['求职者 ID'].values.tolist()
hunter_ids = np.unique(hunter_ids)
hunter_ids_map = {hutner_id: index for index, hutner_id in enumerate(hunter_ids)}
len(hunter_ids_map)

58

In [7]:
job_frame['招聘信息 ID'] = job_frame['招聘信息 ID'].map(job_ids_map)
job_frame['求职者 ID'] = job_frame['求职者 ID'].map(hunter_ids_map)

In [8]:
job_frame.head(1)

Unnamed: 0,招聘信息 ID,求职者 ID,岗位匹配度
0,29,16,0.56


In [9]:
hunter_frame['求职者 ID'] = hunter_frame['求职者 ID'].map(hunter_ids_map)
hunter_frame['招聘信息 ID'] = hunter_frame['招聘信息 ID'].map(job_ids_map)

In [10]:
hunter_frame.head(1)

Unnamed: 0,求职者 ID,招聘信息 ID,公司名称,求职者满意度
0,2,34,彩讯科技股份有限公司,0.72


In [52]:
JOB_RECORD_CSV = '../datasets/result1-1.csv'

job_record_frame = pd.read_csv(JOB_RECORD_CSV, encoding='GBK')
# job_record_frame['招聘信息 ID'] = job_record_frame['招聘信息 ID'].map(job_ids_map)
job_rec_count = {} # job_record_frame.loc[..., ['招聘信息 ID', '招募人数']]
for idx, row in job_record_frame.iterrows():
    job_rec_count[int(row['招聘信息 ID'])] = row['招募人数']
len(job_rec_count)

1568

In [13]:
job_matrix = np.zeros((len(job_ids_map), len(hunter_ids_map)))

for idx, row in job_frame.iterrows():
    job_matrix[int(row['招聘信息 ID'])][int(row['求职者 ID'])] = row['岗位匹配度']

job_matrix.shape

(76, 58)

In [14]:
hunter_matrix = np.zeros((len(hunter_ids_map), len(job_ids_map)))

for idx, row in hunter_frame.iterrows():
    hunter_matrix[int(row['求职者 ID'])][int(row['招聘信息 ID'])] = row['求职者满意度']

hunter_matrix.shape

(58, 76)

In [15]:
data_matrix = job_matrix * hunter_matrix.T

data_matrix.shape

(76, 58)

In [31]:
if DEBUG:
    for i in range(len(job_ids_map)):
        for j in range(len(hunter_ids_map)):
            if data_matrix[i][j] < 1e-8: continue
            job_id = job_ids[i]
            hunter_id = hunter_ids[j]
            # print(job_id, hunter_id, data_matrix[i][j])

In [82]:
edges = []

# for i in range(len(job_ids)):
#     capacity = job_rec_count[int(job_ids[i])]
#     if capacity == 0: continue
#     edges.append(('s', str(job_ids[i]), {'capacity': capacity, 'weight': 0}))

for i in range(len(job_ids)):
    capacity = job_rec_count[int(job_ids[i])]
    for _ in range(capacity):
        for j in range(len(hunter_ids)):
            if data_matrix[i][j] < 1e-8:
                edges.append((job_ids[i], hunter_ids[j], {'weight': 0}))    
            else:
                edges.append((job_ids[i], hunter_ids[j], {'weight': -data_matrix[i][j]}))
                    #  {'capacity': 1, 'weight': data_matrix[i][j]}))

# for i in range(len(hunter_ids)):
#     capacity = job_rec_count[int(job_ids[i])]
#     if capacity == 0: continue
#     edges.append((str(hunter_ids[i]), 't', {'capacity': 1, 'weight': 0}))
            
binet = nx.Graph()
binet.add_nodes_from(job_ids, bipartite=0)
binet.add_nodes_from(hunter_ids, bipartite=1)
binet.add_edges_from(edges)

nx.is_connected(binet)

True

In [86]:
edges

[(1542733237841952768, 1461512488951611392, {'weight': 0}),
 (1542733237841952768, 1461530285551255552, {'weight': 0}),
 (1542733237841952768, 1461534785997504512, {'weight': 0}),
 (1542733237841952768, 1461601893229920256, {'weight': 0}),
 (1542733237841952768, 1461614384609624064, {'weight': 0}),
 (1542733237841952768, 1461648125990141952, {'weight': 0}),
 (1542733237841952768, 1461659304984707072, {'weight': -0.0648}),
 (1542733237841952768, 1461663453243637760, {'weight': 0}),
 (1542733237841952768, 1461666278635864064, {'weight': 0}),
 (1542733237841952768, 1462718849626537984, {'weight': -0.2646}),
 (1542733237841952768, 1463685809768103936, {'weight': 0}),
 (1542733237841952768, 1466233482740105216, {'weight': -0.0825}),
 (1542733237841952768, 1467789372454731776, {'weight': 0}),
 (1542733237841952768, 1467792600214929408, {'weight': -0.096}),
 (1542733237841952768, 1467797054184095744, {'weight': 0}),
 (1542733237841952768, 1467802081468481536, {'weight': 0}),
 (154273323784195

In [83]:
# nx.draw_networkx_edge_labels(binet, nx.spring_layout(binet))
matched_result = nx.bipartite.minimum_weight_full_matching(binet)

In [85]:
for job_id in job_ids:
    if job_id in matched_result:
        print(job_id, matched_result[job_id], 
              data_matrix[job_ids_map[job_id]][hunter_ids_map[hunter_id]])

1542733237841952768 1462718849626537984 0.1188
1547141682217222144 1463685809768103936 0.0
1547141682221416448 1479721504424984576 0.0
1549231524551983104 1472772112841310208 0.0
1549231524551983105 1582598236647063552 0.0
1551507309048496128 1487091440004759552 0.0
1551793389706543105 1471049670871613440 0.0
1551808344770084864 1467789372454731776 0.0
1551808344774279168 1632947816114552832 0.0
1552975408276832256 1541793867911790592 0.0
1552975408281026560 1506565365747023872 0.0
1554023803393277952 1507237125567938560 0.0
1555095262983225344 1470307844552261632 0.0
1555095262987419648 1480029519657172992 0.0
1555102215444758528 1461512488951611392 0.0
1555102215444758529 1473849820266496000 0.0
1556465022669029376 1530030540420415488 0.0
1556465022673223680 1461530285551255552 0.0
1556545875319324672 1487239888775544832 0.0
1557673013896413185 1466233482740105216 0.0
1558010601735847936 1500714639468658688 0.0
1558010601740042240 1472476777166274560 0.0
1559484302188740608 147032985