In [None]:
import pandas as pd
import numpy as np
import ast
import networkx as nx
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from network_scorer import create_count_df, create_network, resume_jd_matching_score

In [None]:
degree_dict = {}
degree_dict["bachelor"] = 1
degree_dict["bs"] = 1
degree_dict["master"] = 1
degree_dict["ms"] = 1
degree_dict["phd"] = 1
degree_dict["doctor"] = 1

In [None]:
resume_df = pd.read_csv('dataset/resume_final_data.csv', index_col = 0)
resume_df['org_rank_factor'] = resume_df['org_rank'].apply(lambda x: 1+1/x if x > 0 else 1)
resume_df['degrees_factor'] = resume_df['degrees'].apply(lambda x: degree_dict[x] if x in degree_dict else 1)
resume_df['year_experience_factor'] = resume_df['year_experience'].apply(lambda x: 1.2 if x > 3 else 0.8)
resume_df.head()

In [None]:
jd_df = pd.read_csv('dataset/job_description_final_data.csv', index_col = 0)
jd_df.head()

In [None]:
#hard_skills
col_name = 'hard_skills'
hs_count_df, hs_all_value_list = create_count_df(jd_df, col_name)
hs_network = create_network(hs_count_df)

In [None]:
#soft_skills
col_name = 'soft_skills'
ss_count_df, ss_all_value_list = create_count_df(jd_df, col_name)
ss_network = create_network(ss_count_df)

In [None]:
#majors
col_name = 'majors'
mj_count_df, mj_all_value_list = create_count_df(jd_df, col_name)
mj_network = create_network(mj_count_df)

In [None]:
# Hard skill graph visualization
top_skill_idx = np.argsort(np.diag(hs_count_df))[::-1][:20]
hs_count_df_temp = hs_count_df.iloc[top_skill_idx,top_skill_idx]
top_skill_list = [hs_count_df.columns[i] for i in top_skill_idx]

count_matrix = 10**np.array(hs_count_df_temp) + 0.00001
node_sizes = np.diag(count_matrix)

graph = nx.from_numpy_array(1/count_matrix, create_using=nx.DiGraph)
pos = nx.spring_layout(graph, weight='weight', k=0.005)
labeldict = {i: node for i, node in enumerate(top_skill_list)}
widths = nx.get_edge_attributes(graph, 'weight')
graph.remove_edges_from(nx.selfloop_edges(graph))

nx.draw(graph, pos, labels=labeldict, with_labels = True,
        node_size=[size * 0.00010 for size in node_sizes], node_color="#79adbb", 
        font_size=6, font_color="black", font_weight="bold", edge_color="grey", edgecolors='#79adbb',
        linewidths=0.3, arrowsize=1, width=[1/w/3000000 for w in widths.values()])

In [None]:
# Soft skill graph visualization
top_skill_idx = np.argsort(np.diag(ss_count_df))[::-1][:30]
ss_count_df_temp = ss_count_df.iloc[top_skill_idx,top_skill_idx]
top_skill_list = [ss_count_df.columns[i] for i in top_skill_idx]

count_matrix = 10**np.array(ss_count_df_temp) + 0.00001
node_sizes = np.diag(count_matrix)

graph = nx.from_numpy_array(1/count_matrix, create_using=nx.DiGraph)
pos = nx.spring_layout(graph, weight='weight', k=0.1)
labeldict = {i: node for i, node in enumerate(top_skill_list)}
widths = nx.get_edge_attributes(graph, 'weight')
graph.remove_edges_from(nx.selfloop_edges(graph))

nx.draw(graph, pos, labels=labeldict, with_labels = True,
        node_size=[size * 0.0008 for size in node_sizes], node_color="#79adbb", 
        font_size=6, font_color="black", font_weight="bold", edge_color="grey", edgecolors='#79adbb',
        linewidths=0.3, arrowsize=1, width=[1/w/600000 for w in widths.values()])

In [None]:
# Majors
top_skill_idx = np.argsort(np.diag(mj_count_df))[::-1][:30]
mj_count_df_temp = mj_count_df.iloc[top_skill_idx,top_skill_idx]
top_skill_list = [mj_count_df.columns[i] for i in top_skill_idx]

count_matrix = 2.71**np.array(mj_count_df_temp) + 0.00001
node_sizes = np.diag(count_matrix)

graph = nx.from_numpy_array(1/count_matrix, create_using=nx.DiGraph)
pos = nx.spring_layout(graph, weight='weight', k=1)
labeldict = {i: node for i, node in enumerate(top_skill_list)}
widths = nx.get_edge_attributes(graph, 'weight')
graph.remove_edges_from(nx.selfloop_edges(graph))

plt.clf()
nx.draw(graph, pos, labels=labeldict, with_labels = True,
        node_size=[size * 3 for size in node_sizes], node_color="#79adbb", 
        font_size=10, font_color="black", font_weight="bold", edge_color="grey", edgecolors='#79adbb',
        linewidths=0.3, arrowsize=1, width=[1/w/300 for w in widths.values()])

In [None]:
i = 502
j = 502
r_val_list = [val for val in set(ast.literal_eval(resume_df.iloc[i]['hard_skills']))]
jd_val_list = [val for val in set(ast.literal_eval(jd_df.iloc[j]['hard_skills']))]
print("Hard skill score:", resume_jd_matching_score(
    r_val_list, jd_val_list, hs_all_value_list, hs_network, hs_count_df, show_sub_score=True))
r_val_list = [val for val in set(ast.literal_eval(resume_df.iloc[i]['soft_skills']))]
jd_val_list = [val for val in set(ast.literal_eval(jd_df.iloc[j]['soft_skills']))]
print("Soft skill score:", resume_jd_matching_score(
    r_val_list, jd_val_list, ss_all_value_list, ss_network, ss_count_df, show_sub_score=False))
r_val_list = [val for val in set(ast.literal_eval(resume_df.iloc[i]['majors']))]
jd_val_list = [val for val in set(ast.literal_eval(jd_df.iloc[j]['majors']))]
print("Major score:", resume_jd_matching_score(
    r_val_list, jd_val_list, mj_all_value_list, mj_network, mj_count_df, show_sub_score=False))

In [None]:
i = 50
j = 502
r_val_list = [val for val in set(ast.literal_eval(resume_df.iloc[i]['hard_skills']))]
jd_val_list = [val for val in set(ast.literal_eval(jd_df.iloc[j]['hard_skills']))]
print("Hard skill score:", resume_jd_matching_score(
    r_val_list, jd_val_list, hs_all_value_list, hs_network, hs_count_df, show_sub_score=True))
r_val_list = [val for val in set(ast.literal_eval(resume_df.iloc[i]['soft_skills']))]
jd_val_list = [val for val in set(ast.literal_eval(jd_df.iloc[j]['soft_skills']))]
print("Soft skill score:", resume_jd_matching_score(
    r_val_list, jd_val_list, ss_all_value_list, ss_network, ss_count_df, show_sub_score=False))
r_val_list = [val for val in set(ast.literal_eval(resume_df.iloc[i]['majors']))]
jd_val_list = [val for val in set(ast.literal_eval(jd_df.iloc[j]['majors']))]
print("Major score:", resume_jd_matching_score(
    r_val_list, jd_val_list, mj_all_value_list, mj_network, mj_count_df, show_sub_score=False))

In [None]:
Scores = [135.90610916674015, 21.223488706513322,21.00757281141345, 135.90610916674015+21.223488706513322+21.00757281141345,
          26.737758649753303, 22.48819290582201, 7.282761179605593, 26.737758649753303+22.48819290582201+7.282761179605593]
Resume_flags = ['Resume 1'] * 4 + ['Resume 2'] * 4
Methods = ['Hard Skills', 'Soft Skills', 'Majors', 'All Scores'] * 2
sns.barplot(y=Scores, x=Methods, hue=Resume_flags, palette="ch:s=-2.5,r=1")
plt.title('Resume Performance')

In [None]:
method_list = ['hard_skills', 'soft_skills', 'majors']
methods = method_list * resume_df.shape[0]
scores = []
for i in tqdm(range(resume_df.shape[0])):
    for col in method_list:
        r_val_list = resume_df.iloc[i][col]
        j = 502
        jd_val_list = jd_df.iloc[j][col]
        r_val_list = [val for val in set(ast.literal_eval(r_val_list))]
        jd_val_list = [val for val in set(ast.literal_eval(jd_val_list))]
        if col == 'hard_skills':
            scores.append(resume_jd_matching_score(
                r_val_list, jd_val_list, hs_all_value_list, hs_network, hs_count_df)
            )
        elif col == 'soft_skills':
            scores.append(resume_jd_matching_score(
                r_val_list, jd_val_list, ss_all_value_list, ss_network, ss_count_df)
            )
        elif col == 'majors':
            scores.append(resume_jd_matching_score(
                r_val_list, jd_val_list, mj_all_value_list, mj_network, mj_count_df)
            )

In [None]:
agg_scores = []
agg = 0
for i, score in enumerate(scores):
    if i % 3 == 0:
        org_rank_factor = resume_df.iloc[int(i/3)]['org_rank_factor']
        degrees_factor =  resume_df.iloc[int(i/3)]['degrees_factor']
        year_experience_factor = resume_df.iloc[int(i/3)]['year_experience_factor']
        agg_scores.append(agg*org_rank_factor*degrees_factor*year_experience_factor)
        agg = score
    else: 
        agg += score

In [None]:
all_scores = scores+agg_scores
all_methods = methods+['Total Score']*len(agg_scores)
sns.boxplot(y=all_scores, x=all_methods, color='purple')