In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from networkx.algorithms import bipartite

In [2]:
DEBUG = True

MAIN_JOB_CSV_FILE = '../datasets/result3-1.csv'
MAIN_HUNTER_CSV_FILE = '../datasets/result3-2.csv'

job_frame = pd.read_csv(MAIN_JOB_CSV_FILE, encoding='GBK')
hunter_frame = pd.read_csv(MAIN_HUNTER_CSV_FILE, encoding='GBK')

In [3]:
job_frame.head(1)

Unnamed: 0,招聘信息 ID,求职者 ID,岗位匹配度
0,1561971141940215809,1468079045689344000,0.56


In [4]:
hunter_frame.head(1)

Unnamed: 0,求职者 ID,招聘信息 ID,公司名称,求职者满意度
0,1461534785997504512,1565240659105939456,彩讯科技股份有限公司,0.72


In [5]:
job_ids = job_frame['招聘信息 ID'].values.tolist() + hunter_frame['招聘信息 ID'].values.tolist()
job_ids = np.unique(job_ids)
job_ids_map = {job_id: index for index, job_id in enumerate(job_ids)}
len(job_ids_map)

76

In [6]:
hunter_ids = job_frame['求职者 ID'].values.tolist() + hunter_frame['求职者 ID'].values.tolist()
hunter_ids = np.unique(hunter_ids)
hunter_ids_map = {hutner_id: index for index, hutner_id in enumerate(hunter_ids)}
len(hunter_ids_map)

58

In [7]:
job_frame['招聘信息 ID'] = job_frame['招聘信息 ID'].map(job_ids_map)
job_frame['求职者 ID'] = job_frame['求职者 ID'].map(hunter_ids_map)

In [8]:
job_frame.head(1)

Unnamed: 0,招聘信息 ID,求职者 ID,岗位匹配度
0,29,16,0.56


In [9]:
hunter_frame['求职者 ID'] = hunter_frame['求职者 ID'].map(hunter_ids_map)
hunter_frame['招聘信息 ID'] = hunter_frame['招聘信息 ID'].map(job_ids_map)

In [10]:
hunter_frame.head(1)

Unnamed: 0,求职者 ID,招聘信息 ID,公司名称,求职者满意度
0,2,34,彩讯科技股份有限公司,0.72


In [11]:
JOB_RECORD_CSV = '../datasets/result1-1.csv'

job_record_frame = pd.read_csv(JOB_RECORD_CSV, encoding='GBK')
# job_record_frame['招聘信息 ID'] = job_record_frame['招聘信息 ID'].map(job_ids_map)
job_rec_count = {} # job_record_frame.loc[..., ['招聘信息 ID', '招募人数']]
for idx, row in job_record_frame.iterrows():
    job_rec_count[int(row['招聘信息 ID'])] = row['招募人数']
len(job_rec_count)

1568

In [12]:
job_matrix = np.zeros((len(job_ids_map), len(hunter_ids_map)))

for idx, row in job_frame.iterrows():
    job_matrix[int(row['招聘信息 ID'])][int(row['求职者 ID'])] = row['岗位匹配度']

job_matrix.shape

(76, 58)

In [13]:
hunter_matrix = np.zeros((len(hunter_ids_map), len(job_ids_map)))

for idx, row in hunter_frame.iterrows():
    hunter_matrix[int(row['求职者 ID'])][int(row['招聘信息 ID'])] = row['求职者满意度']

hunter_matrix.shape

(58, 76)

In [14]:
data_matrix = job_matrix * hunter_matrix.T

data_matrix.shape

(76, 58)

In [15]:
if DEBUG:
    for i in range(len(job_ids_map)):
        for j in range(len(hunter_ids_map)):
            if data_matrix[i][j] < 1e-8: continue
            job_id = job_ids[i]
            hunter_id = hunter_ids[j]
            # print(job_id, hunter_id, data_matrix[i][j])

In [16]:
edges = []

# for i in range(len(job_ids)):
#     capacity = job_rec_count[int(job_ids[i])]
#     if capacity == 0: continue
#     edges.append(('s', str(job_ids[i]), {'capacity': capacity, 'weight': 0}))

mutil_job_ids = []
for i in range(len(job_ids)):
    capacity = job_rec_count[int(job_ids[i])]
    for count in range(capacity):
        mutil_job_ids.append("{}_{}".format(job_ids[i], count))
        for j in range(len(hunter_ids)):
            if data_matrix[i][j] < 1e-8:
                edges.append((mutil_job_ids[-1], hunter_ids[j], {'weight': 0}))    
                # pass
            else:
                edges.append((mutil_job_ids[-1], hunter_ids[j], {'weight': -data_matrix[i][j]}))
                    #  {'capacity': 1, 'weight': data_matrix[i][j]}))

# for i in range(len(hunter_ids)):
#     capacity = job_rec_count[int(job_ids[i])]
#     if capacity == 0: continue
#     edges.append((str(hunter_ids[i]), 't', {'capacity': 1, 'weight': 0}))
            
binet = nx.Graph()
binet.add_nodes_from(mutil_job_ids, bipartite=0)
binet.add_nodes_from(hunter_ids, bipartite=1)
binet.add_edges_from(edges)

nx.is_connected(binet)

True

In [17]:
# nx.draw_networkx_edge_labels(binet, nx.spring_layout(binet))
matched_result = nx.bipartite.minimum_weight_full_matching(binet)

In [18]:
bidirections = {}
for job_id, hunter_id in matched_result.items():
    if not isinstance(job_id, str): continue
    job_id = int(job_id.split('_')[0])
    if job_id not in bidirections:
        bidirections[job_id] = []
    bidirections[job_id].append(hunter_id)

In [19]:
bidirection_json = {"招聘信息ID": [], "求职者ID": [], "岗位匹配度": [], "求职者满意度": []}

for job_id, hunter_ids in bidirections.items():
    job_degree = job_ids_map[job_id]
    for hunter_id in hunter_ids:
        hunter_degree = hunter_ids_map[hunter_id]
        job_info = job_frame[(job_frame['招聘信息 ID'] == job_degree) & (job_frame['求职者 ID'] == hunter_degree)]['岗位匹配度'].values.tolist()
        hunter_info = hunter_frame[(hunter_frame['招聘信息 ID'] == job_degree) & (hunter_frame['求职者 ID'] == hunter_degree)]['求职者满意度'].values.tolist()
        if (len(job_info) == 0 or len(hunter_info) == 0): continue
        bidirection_json['招聘信息ID'].append('\t' + str(job_id))
        bidirection_json['求职者ID'].append('\t' + str(hunter_id))
        bidirection_json['岗位匹配度'].append(job_info[0])
        bidirection_json['求职者满意度'].append(hunter_info[0])
        

In [21]:
bidirection_frame = pd.DataFrame(bidirection_json).sort_values(by=["招聘信息ID", "岗位匹配度"], ascending=[True, False])
bidirection_frame.to_csv('../datasets/result4.csv', index=False, encoding="GBK", errors='ignore')