# Importing needed libraries

**if there are errors importing these libraries, just type in terminal : <code> pip install library_name </code>** <br> 
Sample: pip install pandas <br>
libraries used: pandas, nltk, sklearn, scipy, numpy <br>
To Do: push a requirements.txt for easy install

In [1]:
import pandas as pd
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import jaccard_score
from sklearn.metrics.pairwise import pairwise_distances
from scipy.spatial import distance
import numpy as np
import timeit
import seaborn as sns

In [2]:
# configure pandas for the datasets
pd.set_option('display.max_colwidth', 0)
pd.set_option('display.max_columns', 0)

In [3]:
# prepare stop words 
stop_words = stopwords.words('english')

# Getting Data Sources

In [4]:
raw_cv_data = pd.read_csv('UpdatedResumeDataSet.csv')
raw_job_data = pd.read_csv('dice_com-job_us_sample.csv')

# Cleaning and Preprocessing Data Sources 

**removing resume & job duplicates**

In [5]:
resume_no_dup = raw_cv_data.drop_duplicates('Resume',keep='first')

#preprocess and dropping job duplicates
jobs_data_from_raw =  raw_job_data[["jobdescription", "skills", "uniq_id", "jobtitle"]].copy()
no_duplicates_job = jobs_data_from_raw.drop_duplicates('jobdescription',keep='first')
no_duplicates_job = no_duplicates_job.drop_duplicates('skills',keep=False)

* remove the job data that contains irrelevant skill content 

In [6]:
# remove see job description data
no_duplicates_job = no_duplicates_job[no_duplicates_job["skills"].str.contains("job description", case=False)==False]
no_duplicates_job = no_duplicates_job[no_duplicates_job["skills"].str.contains("full time", case=False)==False]
no_duplicates_job = no_duplicates_job[no_duplicates_job["skills"].str.contains("see below", case=False)==False]
no_duplicates_job = no_duplicates_job[no_duplicates_job["skills"].str.contains("null", case=False)==False]

 * after manual checking, there are unneeded job data that should be removed <br> reason: I forgot 

In [7]:
no_duplicates_job = no_duplicates_job.drop([no_duplicates_job.index[11670],no_duplicates_job.index[16427],no_duplicates_job.index[18] ])

* clean data

In [8]:
# clean/preprocess and lowercase the skills column
no_duplicates_job['skills'] = no_duplicates_job['skills'].str.lower()
no_duplicates_job["skills"] = no_duplicates_job["skills"].str.replace(","," ")
no_duplicates_job["skills"] = no_duplicates_job["skills"].str.replace('"', " ")
no_duplicates_job["skills"] = no_duplicates_job["skills"].str.replace(r"[^a-zA-Z0-9+#]"," ", regex=True)

#clean jobs
no_duplicates_job['jobdescription'] = no_duplicates_job['jobdescription'].str.lower()
no_duplicates_job["jobdescription"] = no_duplicates_job["jobdescription"].str.replace(r"[^a-zA-Z0-9+#]"," ", regex=True)

# change job index
no_duplicates_job.index = no_duplicates_job["uniq_id"]

# clean resumes
resume_no_dup_cleaned = resume_no_dup.applymap(lambda s:s.lower() if type(s) == str else s)
resume_no_dup_cleaned = resume_no_dup_cleaned.replace(r"[^a-zA-Z0-9+#]",' ',regex=True)

*For double checking if duplicates were dropped* <br>
raw_cv_data.info() : 900+ <br>
resume_no_dup.info(): 166 resume <br> <br>
raw_job_data: 22000 <br>
no_duplicates_job: 18339

# Extracting Features (One hot encoding)

We need to convert the datasets into vector representations through one hot encoding <br> 
* Before that, we need the set of job skills to act as a column in our matrix <br> <br>

| Resume | Skill 1 | Skill 2 | Skill 3 | ... | Skill n |
| --- | --- | --- | --- | --- | --- |
| Resume id 1 | 1 | 0 | 1 | ... | 1 |
| Resume id 2 | 0 | 0 | 1 | ... | 1 |

<br> 

| Job | Skill 1 | Skill 2 | Skill 3 | ... | Skill n |
| --- | --- | --- | --- | --- | --- |
| Job id 1 | 1 | 0 | 1 | ... | 1 |
| Job id 2 | 0 | 0 | 1 | ... | 1 |

In [9]:
# get skills from job dataset through the skills column 
raw_skills = []

for i in no_duplicates_job['skills']:
    raw_skills.extend(i.split(' '))
    
# get the set of skills 
skills_set = set(raw_skills)
skills_set.remove('')

# remove stop words
for i in stop_words:
    skills_set.discard(i)

In [10]:
# let's validate that these set of skills are the same as what we have used in the TF-IDF program

skill_file = open('a_file.txt', 'r')
skills_from_file = []

for i in skill_file:
    skills_from_file.append(str.rstrip(i))
error = 0
for i in skills_set:
    if i not in skills_from_file:
        print('Error: Can not find this word: {word}'.format(word=i))
print('No errors all good')

No errors all good


In [11]:
# let's just use the skills from the TF-IDF program because I don't know if the order matters, let's just play safe
skills_set = skills_from_file.copy()

**Let's extract features** <br>
* prepare a dataframe for resume and jobs

In [12]:
resume_vectors = pd.DataFrame(index=resume_no_dup_cleaned.index, columns=skills_set)
job_vectors = pd.DataFrame(index=no_duplicates_job.index, columns=skills_set)

* vectorize resume

In [13]:
for i in resume_vectors.index:
    current_resume_content = resume_no_dup_cleaned.loc[i]['Resume']
    resume_vectors.loc[i] = [1 if skill in current_resume_content else 0 for skill in skills_set]

In [14]:
resume_vectors

Unnamed: 0,bam,jqueryjava,gravity,prediscovery,quickbooks,veteran,skils,petabytes,actionscript,variable,schneider,pp,implementations,middle,trizetto,play,password,raleigh,mgt,iiq,symbol,domumentum,kmv,custom,bond,dynamics,itil,19103,flask,redseal,unable,idd,ccsk,es2015,novell,gold,wire,jabber,conf,tao,...,hcissp,capture,crx,abg,hockey,activities,quote,tranfer,ncpdp,documents,parametric,paralegal,dashboarding,solidworks,electrical,lot,v10,vin,military,gaming,fireeye,microelectronics,electron,13c,bonus,safetrace,com,consultant,maintaining,vrealise,udm,delta,asme,4+years,exper,z,acceptable,thorough,seven,physical
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
895,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
896,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
897,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


* vectorize jobs

In [15]:
for i in job_vectors.index:
    current_jobdescription = no_duplicates_job.loc[i]['jobdescription']
    job_vectors.loc[i] = [1 if skill in current_jobdescription else 0 for skill in skills_set]

In [16]:
job_vectors

Unnamed: 0_level_0,bam,jqueryjava,gravity,prediscovery,quickbooks,veteran,skils,petabytes,actionscript,variable,schneider,pp,implementations,middle,trizetto,play,password,raleigh,mgt,iiq,symbol,domumentum,kmv,custom,bond,dynamics,itil,19103,flask,redseal,unable,idd,ccsk,es2015,novell,gold,wire,jabber,conf,tao,...,hcissp,capture,crx,abg,hockey,activities,quote,tranfer,ncpdp,documents,parametric,paralegal,dashboarding,solidworks,electrical,lot,v10,vin,military,gaming,fireeye,microelectronics,electron,13c,bonus,safetrace,com,consultant,maintaining,vrealise,udm,delta,asme,4+years,exper,z,acceptable,thorough,seven,physical
uniq_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
8aec88cba08d53da65ab99cf20f6f9d9,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
46baa1f69ac07779274bcd90b85d9a72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0
45efa1f6bc65acc32bbbb953a1ed13b7,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
e0ac9d926dda5e95162ef05adea7318c,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0
e7e326053c586bd94e59f1fd74de4a1b,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1acc1a7a845f0b9c233ad42941f0462d,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
86e27ce6b7e631e55d69d142c7d43df2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0
4287c7ee3317ccf1edd76e238cf8e584,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
d7512f0181d69f83f96db38cd77a4d08,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1


**RESUME AND JOB VECTORS FOR NN**

In [17]:
#resume_vectors_NN = pd.DataFrame(index=range(0, 166), columns=skills_set).fillna(0)

In [18]:
#resume_vectors_NN

# Generating List of Recommendations for all resumes

**WE-UCF (cosine)**

In [19]:
# user-item matrix (WE-UCF cosine)
resume_job_cos_sim = cosine_similarity(resume_vectors, job_vectors)
resume_job_matrix = pd.DataFrame(resume_job_cos_sim, index=resume_vectors.index ,columns = job_vectors.index)

In [20]:
resume_job_matrix

uniq_id,8aec88cba08d53da65ab99cf20f6f9d9,46baa1f69ac07779274bcd90b85d9a72,45efa1f6bc65acc32bbbb953a1ed13b7,e0ac9d926dda5e95162ef05adea7318c,e7e326053c586bd94e59f1fd74de4a1b,b0dadecf4c3c2beecb9c773ca11ecda4,28f5e0c1cc3314813e674f0c32b04d1b,5e0ff38f5eaf44726f4e3d1dd257a244,e4a1ff1b6c0fda5f345e57cf1acb40dd,d0c81a2e3e5d666f3d730f1048c49132,51279d060da242e3baea98b26ddd641e,48b7ca0c2f6191fb48e7ecf19fbd3322,7ad2eadde69e07fee0e38c1a251dd81f,f3af6886ca0d133abda2ddf9b84633e4,d3073d47b79938269b22bdea4dc0b9b8,b7fab2d3de5e129310b382d8f51508f6,4868383e3f99535778f354fcb734d57c,81f8732626888727fcc1093b6c084839,1bef3cf6259e80d18222407858ad9052,72a3f0f0f24a0a78c5fac3c2376f5979,4a3f8742fae151757eb290a774968371,b1db84e23f424b1481b29212749abadc,51a0050d469e787dba2102383797536d,f2ade5d053b992593e2bdacd83a18a7f,2831bbdba025f87efca38b9761f26b95,2b2322459de27834b0c3c86737fb16a8,b923d29624fb4dc9e193aa30f8253ee6,5be5533c9ed448d1ef92016a56573869,ebdaa795cb443c6b7881b7a15c4c25b1,d92f267244249065890ee6bde5275f49,8cbcc9ef3be25e4409fcb4ab55d6503d,e2f1a046c937631529aec11a56f8590a,a2ef697606d460795dda16de672a5d5d,d1130a38c05cd6f6df40a2636a76ea5b,8cb4914f42c5b805d19be0952b3c88e9,8c50099ba0e24e089c12c6e68bb2bdc6,d6920e2979999da6d892774e2be97b75,065002d37b05ad5fa6478b709b94a3a4,b2db6c4923a59708ff6c562b08c23c73,d997f70cf7082ec18ffbb3e7c382001c,...,263e10b8aa545c6406211bb06136e787,a2f612a294080ff97b26ea18ee9ef62c,b24a51456f6575effeec94fb8c34f318,14945b616ed0d0be7a90cd0aa11d5614,e2c8714be39ad48a4f899fc327d99325,993e98d89cc6657df46f6bf6c84f2ca7,3184538e7963c8d5b5f87c34a84fb023,2f2e09bd5cab7c513cf982d68426ad2f,7b94bee8c0b73184ec83e0325900e5e6,17a9c1590e0f878e7ac25e0156287dfc,4574705b4e9054c48f01aacc0efa3964,dc9018c1e30468233e0220e4ab7fa22f,0cce919bcf70c6015ff1c71bea138438,03d79c5f3413b424fefb0a69fbba6ea0,5a47b4ab7731f842c60e5405455b9842,eb93c8d237a8aacc2b741517f40ee190,e501c02a4038dd49d420411fc2632ac8,3a0534473ab9c5cc03899a77703bb9e2,83d1393d14ae7edbefb719b5f37ecd6d,f030dd810d86411a93cb5682c0ac35aa,fd1e05e481af4a76fcc5a7c49914825b,788d893cd6b11dfd5ea921666ee1e009,b6b3d382ba66dfe7f80abaaa89a92e3d,ab42980a1be610f35939d37527fb72c2,8c4e3cfa8afc91c1d002e881b6493b29,c5fc49888e42880339d7753fd7559990,fc7085f0ab79099a6d87490d27def0c8,6cae8f5b52c79e884168d6f9b4962025,74262c496c6c04e38a7fbbb4e2e500d4,95bebff86da8f10232e75d24a94297d3,9a415ba24303aa16475f85438fc78921,76f85e4cf8b60279811613c7cbe72663,ba32786b50e8b4d468cb9d813066fea5,541b0f16ecd86fdbd7ee2b04523dc65e,ee46660dd5318edb27b229f51341fcf8,1acc1a7a845f0b9c233ad42941f0462d,86e27ce6b7e631e55d69d142c7d43df2,4287c7ee3317ccf1edd76e238cf8e584,d7512f0181d69f83f96db38cd77a4d08,ec375268b494b3bcbed1635d64226112
0,0.483155,0.515483,0.422187,0.337680,0.457980,0.482045,0.484705,0.518270,0.542398,0.457705,0.480854,0.438580,0.503884,0.469288,0.514115,0.511118,0.442318,0.495947,0.487173,0.387824,0.500502,0.504497,0.468418,0.467683,0.478625,0.508189,0.379517,0.517972,0.472568,0.460806,0.472949,0.437806,0.495810,0.443383,0.477730,0.505560,0.407868,0.436610,0.446973,0.519465,...,0.447867,0.488958,0.310930,0.506509,0.463354,0.451813,0.363039,0.470306,0.368123,0.446697,0.494966,0.485994,0.382931,0.416921,0.419649,0.495818,0.462872,0.385486,0.412484,0.526199,0.460537,0.449248,0.457854,0.469153,0.455743,0.441778,0.302848,0.419607,0.495800,0.411116,0.245944,0.458117,0.354734,0.381339,0.452127,0.403994,0.519176,0.473767,0.520379,0.501252
1,0.443722,0.411592,0.409967,0.365997,0.393545,0.396598,0.441472,0.437451,0.420469,0.434214,0.428290,0.388896,0.388674,0.423091,0.437462,0.397334,0.423411,0.439835,0.409248,0.400820,0.417866,0.399382,0.389952,0.393103,0.446057,0.448841,0.373557,0.415744,0.409894,0.446658,0.397191,0.429102,0.391693,0.439474,0.417251,0.415424,0.482252,0.406212,0.434221,0.415855,...,0.397732,0.445003,0.334207,0.408412,0.438233,0.426908,0.384206,0.425822,0.369168,0.419305,0.409615,0.454984,0.380030,0.422780,0.435887,0.422256,0.406212,0.393272,0.416354,0.422496,0.412648,0.424646,0.430126,0.375201,0.414937,0.430517,0.397229,0.470923,0.423106,0.385980,0.246755,0.395791,0.402607,0.423380,0.448288,0.383380,0.402441,0.428071,0.402449,0.429373
2,0.488391,0.452487,0.445605,0.388922,0.466352,0.495132,0.492997,0.512941,0.433319,0.461412,0.490195,0.405177,0.448051,0.468770,0.469409,0.475327,0.449425,0.505043,0.460194,0.396373,0.471151,0.473586,0.488427,0.437536,0.480450,0.462914,0.424309,0.482283,0.458314,0.480695,0.468017,0.478780,0.467929,0.449750,0.499367,0.486456,0.453801,0.479139,0.463997,0.457500,...,0.470228,0.472434,0.346479,0.437333,0.444339,0.474778,0.389429,0.501516,0.377665,0.423711,0.501600,0.497297,0.436578,0.423103,0.441038,0.459862,0.483455,0.439476,0.483813,0.495995,0.493859,0.430279,0.494927,0.429373,0.479009,0.476635,0.430076,0.449096,0.490483,0.434354,0.275322,0.443195,0.430098,0.452207,0.483446,0.420867,0.469238,0.479043,0.465403,0.503409
3,0.513972,0.517252,0.438167,0.349406,0.486257,0.494525,0.535821,0.544122,0.526764,0.485520,0.554882,0.426584,0.537930,0.530060,0.555176,0.527366,0.504096,0.514648,0.519749,0.435191,0.501848,0.535027,0.527956,0.524276,0.505236,0.508531,0.410995,0.525148,0.540059,0.497696,0.487427,0.508581,0.503833,0.443277,0.465052,0.548470,0.433032,0.471690,0.482454,0.565463,...,0.479517,0.517109,0.302382,0.527618,0.498057,0.442991,0.391550,0.535113,0.433015,0.463649,0.513796,0.477471,0.418368,0.478792,0.500683,0.510864,0.463775,0.399421,0.436990,0.562167,0.492979,0.514791,0.529560,0.535965,0.483586,0.508730,0.345784,0.438520,0.497602,0.453585,0.282043,0.521379,0.379845,0.385787,0.501483,0.430991,0.551302,0.455362,0.580039,0.518899
4,0.368788,0.365975,0.410074,0.363496,0.373089,0.364628,0.435860,0.391658,0.366382,0.362106,0.357927,0.392046,0.339534,0.370413,0.359164,0.378346,0.363498,0.341597,0.377419,0.389957,0.353851,0.350885,0.360933,0.335074,0.407058,0.407307,0.383475,0.393548,0.363978,0.378717,0.378248,0.369569,0.359329,0.417011,0.368098,0.348878,0.451846,0.393350,0.357772,0.305555,...,0.400246,0.364152,0.352163,0.351018,0.370633,0.466099,0.445789,0.360136,0.396164,0.400725,0.432242,0.463101,0.413136,0.402617,0.388206,0.363832,0.404697,0.429959,0.446233,0.362390,0.405250,0.357844,0.346280,0.347044,0.361760,0.398987,0.396376,0.431728,0.393952,0.388858,0.344628,0.345536,0.461781,0.436659,0.377805,0.395751,0.343771,0.421543,0.334970,0.375913
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894,0.461373,0.423688,0.462260,0.363979,0.462182,0.430584,0.451873,0.478686,0.451819,0.461748,0.466063,0.431176,0.442414,0.490908,0.450270,0.464638,0.463545,0.453194,0.476903,0.440735,0.473988,0.468509,0.486383,0.423297,0.472411,0.430730,0.427288,0.452341,0.454257,0.467121,0.432476,0.460537,0.457941,0.441875,0.409832,0.448504,0.478787,0.434370,0.444844,0.462457,...,0.455631,0.475937,0.365994,0.432454,0.453077,0.439468,0.387617,0.484270,0.419933,0.453683,0.458636,0.459460,0.414242,0.452751,0.483680,0.453077,0.443969,0.392728,0.453020,0.456419,0.448893,0.466272,0.474342,0.442351,0.454620,0.466098,0.403821,0.442289,0.461298,0.485591,0.276980,0.482740,0.437820,0.393361,0.481860,0.442289,0.446720,0.441213,0.458964,0.448185
895,0.521754,0.483184,0.431412,0.354898,0.517422,0.465957,0.547665,0.526793,0.529800,0.485198,0.500582,0.437877,0.528758,0.516153,0.531954,0.500680,0.449238,0.524768,0.487270,0.441430,0.500253,0.532180,0.500563,0.431379,0.487727,0.529308,0.429434,0.515314,0.508319,0.471176,0.461921,0.468553,0.518982,0.484894,0.488701,0.469793,0.468932,0.427460,0.469305,0.522448,...,0.462672,0.495508,0.346410,0.500663,0.465590,0.459180,0.373801,0.493696,0.433050,0.461090,0.483540,0.420908,0.437876,0.424535,0.485150,0.498847,0.468562,0.377389,0.443710,0.512255,0.481445,0.475677,0.508192,0.500626,0.471375,0.490317,0.421648,0.461835,0.455357,0.456295,0.299610,0.501240,0.438375,0.439375,0.508024,0.437399,0.478185,0.452225,0.526243,0.487577
896,0.465836,0.440589,0.391842,0.364391,0.446023,0.401196,0.453761,0.492528,0.476388,0.435415,0.493136,0.396126,0.458338,0.454966,0.489626,0.452794,0.429445,0.471239,0.415721,0.441908,0.453315,0.459603,0.419984,0.454708,0.440603,0.480978,0.360042,0.457113,0.449754,0.436389,0.392793,0.429078,0.445482,0.432724,0.402969,0.445125,0.426019,0.426234,0.445568,0.476995,...,0.400617,0.460144,0.335352,0.461834,0.426826,0.398556,0.350026,0.442387,0.387430,0.433973,0.445814,0.403983,0.364225,0.435891,0.435124,0.459075,0.426234,0.355853,0.400810,0.466674,0.442592,0.450970,0.445956,0.502024,0.419323,0.438944,0.370636,0.416484,0.421584,0.406188,0.264899,0.456286,0.381995,0.381955,0.440711,0.421502,0.467962,0.383920,0.468548,0.448190
897,0.422319,0.394684,0.424317,0.388909,0.475552,0.428213,0.511755,0.419664,0.381651,0.369299,0.423820,0.386407,0.382143,0.436987,0.401178,0.446346,0.388218,0.432534,0.401610,0.395095,0.415557,0.401236,0.438660,0.366427,0.408784,0.425218,0.403917,0.386549,0.383866,0.432390,0.375916,0.383425,0.401478,0.428319,0.402614,0.377695,0.439487,0.403192,0.444297,0.412259,...,0.424412,0.394758,0.321270,0.373283,0.386292,0.457588,0.406831,0.406203,0.377168,0.403521,0.412647,0.435693,0.436563,0.400135,0.392691,0.436563,0.456167,0.352941,0.440972,0.413433,0.462309,0.360487,0.418021,0.370411,0.407207,0.417815,0.451781,0.398923,0.440304,0.440991,0.309875,0.421802,0.474963,0.419473,0.445420,0.409421,0.376633,0.389688,0.375860,0.415168


* Figures


In [21]:
#ax = sns.heatmap(resume_job_cos_sim, cmap = 'coolwarm', linecolor = 'white', xticklabels=job_vectors.index, yticklabels=resume_vectors.index)
#ax.tick_params(length=0)
#ax.xaxis.tick_top()
#figure = ax.get_figure()
#figure.savefig('Cosine similarity of each applicant’s resume and employer’s job description.png',dpi=400)

In [22]:
# user - user matrix (WE-UCF cosine)
resume_resume_cos_sim = cosine_similarity(resume_job_matrix, resume_job_matrix)
resume_resume_matrix = pd.DataFrame(resume_resume_cos_sim, index=resume_job_matrix.index, columns=resume_job_matrix.index)

In [23]:
resume_resume_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,40,41,42,43,45,46,47,48,49,50,84,85,86,87,88,89,90,91,92,93,104,105,106,107,108,109,140,141,143,144,...,711,712,713,714,715,716,717,718,719,742,743,744,745,746,747,748,784,785,786,787,788,824,825,826,827,828,829,830,852,853,854,855,856,892,893,894,895,896,897,898
0,1.000000,0.994460,0.996487,0.998962,0.985815,0.990603,0.996244,0.998931,0.995690,0.998887,0.986509,0.997653,0.979593,0.981161,0.979112,0.981493,0.993256,0.990082,0.993955,0.994206,0.995427,0.985899,0.977976,0.988381,0.997764,0.992349,0.988577,0.993334,0.988999,0.983562,0.995717,0.989154,0.998062,0.985963,0.989925,0.992807,0.997778,0.997855,0.989777,0.998732,...,0.997941,0.998124,0.995338,0.998031,0.998127,0.997755,0.998230,0.998116,0.997603,0.998740,0.997105,0.998514,0.998420,0.999120,0.995211,0.998996,0.997980,0.998820,0.997601,0.998818,0.991942,0.991683,0.987166,0.997197,0.985100,0.998067,0.999046,0.995454,0.996394,0.998368,0.994759,0.996940,0.997159,0.993007,0.995540,0.995761,0.997759,0.997425,0.991848,0.995798
1,0.994460,1.000000,0.998089,0.993814,0.995818,0.997950,0.998341,0.995587,0.998495,0.995794,0.996771,0.997060,0.993107,0.993595,0.992694,0.993213,0.998224,0.997787,0.997841,0.997696,0.998401,0.996447,0.992494,0.997458,0.995839,0.998332,0.997179,0.998562,0.997489,0.995344,0.998137,0.997505,0.991720,0.996362,0.998016,0.998598,0.996919,0.996574,0.997562,0.995859,...,0.994709,0.995535,0.997586,0.995863,0.994771,0.995622,0.994807,0.995194,0.996654,0.995883,0.997853,0.996511,0.996061,0.995208,0.997171,0.995466,0.996487,0.994743,0.997611,0.994423,0.998193,0.996833,0.995692,0.997745,0.995422,0.996111,0.994890,0.997492,0.998190,0.996458,0.997891,0.996773,0.997805,0.997613,0.998161,0.998197,0.996389,0.996708,0.997448,0.997917
2,0.996487,0.998089,1.000000,0.995841,0.993702,0.996699,0.998205,0.997704,0.998623,0.996976,0.994640,0.997608,0.989469,0.990240,0.988852,0.990624,0.997617,0.995561,0.997885,0.998417,0.997917,0.993469,0.988420,0.995343,0.996775,0.996773,0.995445,0.997085,0.995277,0.992376,0.998481,0.995748,0.993805,0.993955,0.995665,0.997515,0.998417,0.998474,0.996125,0.997850,...,0.996341,0.996830,0.997485,0.997374,0.996396,0.996927,0.996358,0.996640,0.997494,0.997338,0.998556,0.997870,0.998004,0.997016,0.998371,0.997209,0.997517,0.996279,0.998196,0.996298,0.997273,0.996877,0.994485,0.998464,0.993941,0.997952,0.997152,0.998275,0.998220,0.998113,0.998705,0.998639,0.998869,0.997143,0.998768,0.998033,0.997275,0.996866,0.997503,0.998394
3,0.998962,0.993814,0.995841,1.000000,0.984571,0.989686,0.995983,0.998719,0.995104,0.998890,0.985638,0.997863,0.978457,0.980061,0.978095,0.980223,0.992723,0.989600,0.993395,0.993555,0.995327,0.984966,0.976790,0.987584,0.998033,0.991773,0.987569,0.992945,0.988472,0.982566,0.995674,0.988720,0.998900,0.985041,0.989264,0.992437,0.997186,0.997061,0.988858,0.998606,...,0.998038,0.998329,0.995216,0.998012,0.998284,0.997937,0.998339,0.998294,0.997598,0.998640,0.996752,0.998324,0.998057,0.999196,0.994155,0.998890,0.998188,0.999206,0.997665,0.999040,0.991367,0.990245,0.985470,0.996657,0.984150,0.997588,0.998838,0.994343,0.996142,0.997976,0.993752,0.996246,0.996417,0.992887,0.994843,0.995687,0.998043,0.997812,0.991183,0.995455
4,0.985815,0.995818,0.993702,0.984571,1.000000,0.997228,0.993902,0.987690,0.994607,0.987632,0.998050,0.990449,0.997315,0.995979,0.996926,0.996316,0.996662,0.996207,0.994811,0.995700,0.993996,0.997890,0.997216,0.997511,0.988384,0.996247,0.997415,0.995736,0.997247,0.996978,0.994026,0.996940,0.981142,0.997272,0.996470,0.995945,0.991018,0.990679,0.996963,0.988609,...,0.987454,0.988410,0.994059,0.989172,0.987215,0.988927,0.986771,0.987856,0.990580,0.988670,0.992959,0.989422,0.989064,0.986936,0.993750,0.987471,0.990013,0.986193,0.991692,0.986218,0.996572,0.995495,0.996675,0.992721,0.996850,0.989724,0.986774,0.994211,0.993322,0.989874,0.995207,0.991633,0.992503,0.995101,0.994365,0.993866,0.989578,0.989611,0.995972,0.994169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894,0.995761,0.998197,0.998033,0.995687,0.993866,0.996709,0.998441,0.996986,0.998211,0.997025,0.994907,0.998141,0.990443,0.990971,0.990085,0.991333,0.998010,0.996756,0.998241,0.998028,0.998777,0.994359,0.989385,0.996155,0.997744,0.997914,0.996228,0.998012,0.996422,0.992926,0.998869,0.996383,0.994321,0.994363,0.996486,0.997978,0.997443,0.997204,0.995864,0.996879,...,0.996567,0.997142,0.997980,0.997204,0.996864,0.997316,0.996339,0.996873,0.997767,0.996903,0.998161,0.997544,0.996892,0.996598,0.996972,0.996499,0.997713,0.996232,0.998203,0.996119,0.997300,0.995120,0.993353,0.997886,0.993162,0.996906,0.996372,0.997228,0.998435,0.997823,0.997438,0.997442,0.998206,0.997903,0.998432,1.000000,0.998149,0.998066,0.997400,0.998020
895,0.997759,0.996389,0.997275,0.998043,0.989578,0.993570,0.997560,0.998354,0.996892,0.998222,0.990869,0.998591,0.985218,0.985989,0.984768,0.986485,0.995947,0.993666,0.996160,0.996344,0.997563,0.990324,0.983741,0.992264,0.998562,0.995301,0.992388,0.995787,0.992954,0.988200,0.997812,0.993354,0.997192,0.990171,0.993369,0.995497,0.997743,0.997442,0.992752,0.998422,...,0.998078,0.998645,0.997407,0.998319,0.998328,0.998555,0.997943,0.998471,0.998443,0.998333,0.997791,0.998276,0.997878,0.998443,0.995949,0.998073,0.998661,0.998160,0.998745,0.998207,0.994890,0.992792,0.989489,0.997596,0.989003,0.997622,0.997923,0.995885,0.998017,0.998192,0.995838,0.997244,0.997513,0.996262,0.997247,0.998149,1.000000,0.998823,0.995016,0.997181
896,0.997425,0.996708,0.996866,0.997812,0.989611,0.993669,0.997554,0.997726,0.996658,0.998410,0.991187,0.998626,0.985473,0.986807,0.985092,0.987109,0.995815,0.994610,0.996136,0.995716,0.997846,0.990840,0.984145,0.992655,0.998574,0.995807,0.992721,0.996596,0.993414,0.988969,0.997487,0.993404,0.997012,0.990700,0.994308,0.995928,0.997355,0.996902,0.992681,0.997743,...,0.997497,0.998193,0.997071,0.997755,0.997870,0.997887,0.997527,0.997916,0.998015,0.997842,0.997681,0.998184,0.997258,0.998035,0.995232,0.997834,0.998191,0.997910,0.998421,0.997584,0.994546,0.992496,0.989266,0.997295,0.988774,0.997142,0.997472,0.995393,0.997704,0.997839,0.995241,0.996660,0.997399,0.996162,0.996908,0.998066,0.998823,1.000000,0.994433,0.997022
897,0.991848,0.997448,0.997503,0.991183,0.995972,0.998486,0.996839,0.994128,0.997565,0.993267,0.997238,0.994907,0.993737,0.994082,0.993272,0.994291,0.997723,0.996942,0.998641,0.998249,0.996963,0.996214,0.993172,0.997575,0.994340,0.997451,0.997562,0.997240,0.997235,0.995369,0.997468,0.997807,0.988887,0.996257,0.996771,0.997702,0.995700,0.995677,0.997371,0.994224,...,0.993501,0.993824,0.996576,0.994484,0.993315,0.994418,0.992879,0.993586,0.995210,0.993676,0.996328,0.994626,0.994400,0.992976,0.997086,0.992926,0.995108,0.992019,0.995759,0.992408,0.997874,0.996783,0.996049,0.996333,0.996853,0.994700,0.993097,0.997044,0.996885,0.995419,0.997702,0.996253,0.996905,0.997674,0.998221,0.997400,0.995016,0.994433,1.000000,0.997198


**WE-UCF (jaccard)**

In [24]:
# user-item matrix (WE-UCF jaccard)
resume_job_jacc = 1 - pairwise_distances(resume_vectors.values, job_vectors.values, metric="jaccard", n_jobs=-1)
resume_job_jacc_matrix = pd.DataFrame(resume_job_jacc, index=resume_vectors.index, columns=job_vectors.index)



In [25]:
resume_job_jacc_matrix

uniq_id,8aec88cba08d53da65ab99cf20f6f9d9,46baa1f69ac07779274bcd90b85d9a72,45efa1f6bc65acc32bbbb953a1ed13b7,e0ac9d926dda5e95162ef05adea7318c,e7e326053c586bd94e59f1fd74de4a1b,b0dadecf4c3c2beecb9c773ca11ecda4,28f5e0c1cc3314813e674f0c32b04d1b,5e0ff38f5eaf44726f4e3d1dd257a244,e4a1ff1b6c0fda5f345e57cf1acb40dd,d0c81a2e3e5d666f3d730f1048c49132,51279d060da242e3baea98b26ddd641e,48b7ca0c2f6191fb48e7ecf19fbd3322,7ad2eadde69e07fee0e38c1a251dd81f,f3af6886ca0d133abda2ddf9b84633e4,d3073d47b79938269b22bdea4dc0b9b8,b7fab2d3de5e129310b382d8f51508f6,4868383e3f99535778f354fcb734d57c,81f8732626888727fcc1093b6c084839,1bef3cf6259e80d18222407858ad9052,72a3f0f0f24a0a78c5fac3c2376f5979,4a3f8742fae151757eb290a774968371,b1db84e23f424b1481b29212749abadc,51a0050d469e787dba2102383797536d,f2ade5d053b992593e2bdacd83a18a7f,2831bbdba025f87efca38b9761f26b95,2b2322459de27834b0c3c86737fb16a8,b923d29624fb4dc9e193aa30f8253ee6,5be5533c9ed448d1ef92016a56573869,ebdaa795cb443c6b7881b7a15c4c25b1,d92f267244249065890ee6bde5275f49,8cbcc9ef3be25e4409fcb4ab55d6503d,e2f1a046c937631529aec11a56f8590a,a2ef697606d460795dda16de672a5d5d,d1130a38c05cd6f6df40a2636a76ea5b,8cb4914f42c5b805d19be0952b3c88e9,8c50099ba0e24e089c12c6e68bb2bdc6,d6920e2979999da6d892774e2be97b75,065002d37b05ad5fa6478b709b94a3a4,b2db6c4923a59708ff6c562b08c23c73,d997f70cf7082ec18ffbb3e7c382001c,...,263e10b8aa545c6406211bb06136e787,a2f612a294080ff97b26ea18ee9ef62c,b24a51456f6575effeec94fb8c34f318,14945b616ed0d0be7a90cd0aa11d5614,e2c8714be39ad48a4f899fc327d99325,993e98d89cc6657df46f6bf6c84f2ca7,3184538e7963c8d5b5f87c34a84fb023,2f2e09bd5cab7c513cf982d68426ad2f,7b94bee8c0b73184ec83e0325900e5e6,17a9c1590e0f878e7ac25e0156287dfc,4574705b4e9054c48f01aacc0efa3964,dc9018c1e30468233e0220e4ab7fa22f,0cce919bcf70c6015ff1c71bea138438,03d79c5f3413b424fefb0a69fbba6ea0,5a47b4ab7731f842c60e5405455b9842,eb93c8d237a8aacc2b741517f40ee190,e501c02a4038dd49d420411fc2632ac8,3a0534473ab9c5cc03899a77703bb9e2,83d1393d14ae7edbefb719b5f37ecd6d,f030dd810d86411a93cb5682c0ac35aa,fd1e05e481af4a76fcc5a7c49914825b,788d893cd6b11dfd5ea921666ee1e009,b6b3d382ba66dfe7f80abaaa89a92e3d,ab42980a1be610f35939d37527fb72c2,8c4e3cfa8afc91c1d002e881b6493b29,c5fc49888e42880339d7753fd7559990,fc7085f0ab79099a6d87490d27def0c8,6cae8f5b52c79e884168d6f9b4962025,74262c496c6c04e38a7fbbb4e2e500d4,95bebff86da8f10232e75d24a94297d3,9a415ba24303aa16475f85438fc78921,76f85e4cf8b60279811613c7cbe72663,ba32786b50e8b4d468cb9d813066fea5,541b0f16ecd86fdbd7ee2b04523dc65e,ee46660dd5318edb27b229f51341fcf8,1acc1a7a845f0b9c233ad42941f0462d,86e27ce6b7e631e55d69d142c7d43df2,4287c7ee3317ccf1edd76e238cf8e584,d7512f0181d69f83f96db38cd77a4d08,ec375268b494b3bcbed1635d64226112
0,0.308880,0.342910,0.237487,0.154628,0.286403,0.310377,0.298337,0.345404,0.371378,0.287081,0.314210,0.249194,0.335377,0.305743,0.345731,0.341593,0.273684,0.327402,0.317808,0.221328,0.328358,0.335683,0.297630,0.304089,0.305101,0.332370,0.200859,0.344828,0.308348,0.289017,0.293347,0.274955,0.327753,0.258166,0.304515,0.336842,0.223416,0.263889,0.276163,0.350863,...,0.268641,0.321207,0.134091,0.338710,0.294283,0.258658,0.171946,0.302026,0.194030,0.267620,0.314629,0.288817,0.192650,0.243927,0.256795,0.321531,0.284274,0.203463,0.228051,0.356856,0.291235,0.286472,0.292902,0.304762,0.285578,0.269231,0.138122,0.231931,0.316783,0.235839,0.086147,0.294170,0.171492,0.184807,0.274924,0.221390,0.350524,0.297935,0.346910,0.333042
1,0.276565,0.245186,0.257827,0.214286,0.239130,0.238095,0.281198,0.264668,0.243398,0.269795,0.254658,0.241379,0.223392,0.246495,0.253378,0.230130,0.262222,0.263092,0.243871,0.250000,0.251656,0.231591,0.234397,0.226107,0.278426,0.278642,0.228846,0.248705,0.237705,0.280179,0.244582,0.261580,0.226730,0.281362,0.255714,0.242568,0.317365,0.251969,0.271624,0.234110,...,0.246732,0.267252,0.187643,0.233708,0.270423,0.271375,0.229581,0.258412,0.225621,0.263682,0.252632,0.294333,0.230769,0.266892,0.271386,0.258020,0.251969,0.243615,0.262760,0.235234,0.251404,0.254499,0.259843,0.215130,0.254703,0.270440,0.238202,0.307692,0.262295,0.238832,0.118227,0.232500,0.245652,0.260579,0.285948,0.236940,0.222676,0.266566,0.207215,0.252695
2,0.322412,0.288889,0.281447,0.215827,0.303831,0.327389,0.326471,0.340633,0.268267,0.299479,0.318707,0.249617,0.281828,0.297619,0.295525,0.304688,0.289646,0.331395,0.295508,0.245690,0.305455,0.303233,0.322122,0.273118,0.315515,0.299875,0.259804,0.314149,0.289588,0.316069,0.305439,0.312655,0.298992,0.287234,0.332016,0.313616,0.285714,0.314894,0.302013,0.282888,...,0.306569,0.303653,0.181488,0.270661,0.284472,0.305600,0.219469,0.332085,0.225040,0.268085,0.334716,0.327014,0.261324,0.267241,0.282581,0.297365,0.318634,0.270764,0.311475,0.310916,0.326848,0.270642,0.325243,0.267760,0.314474,0.312849,0.246350,0.282258,0.324763,0.275037,0.120459,0.280641,0.252220,0.266788,0.318508,0.259905,0.290291,0.314865,0.272065,0.328072
3,0.331455,0.341918,0.243902,0.156716,0.305893,0.317893,0.335031,0.366184,0.355593,0.306407,0.378667,0.236125,0.364574,0.358108,0.382943,0.354506,0.320379,0.341969,0.343750,0.250980,0.326786,0.361519,0.344925,0.352292,0.323996,0.329347,0.216495,0.348566,0.366980,0.315789,0.300774,0.331216,0.333050,0.253493,0.290909,0.374350,0.235597,0.287091,0.300943,0.393791,...,0.288538,0.344021,0.126461,0.356788,0.320220,0.247454,0.183190,0.355372,0.231250,0.276200,0.326255,0.277096,0.209178,0.285857,0.318609,0.331181,0.280920,0.207819,0.240000,0.390960,0.314968,0.340106,0.352252,0.361979,0.304912,0.318671,0.156780,0.239959,0.314394,0.262000,0.097023,0.346561,0.181336,0.182692,0.309665,0.234872,0.380461,0.279630,0.405252,0.347199
4,0.182131,0.170915,0.235000,0.219931,0.187166,0.176183,0.238806,0.185129,0.161039,0.179443,0.162890,0.222222,0.150336,0.164456,0.155000,0.171271,0.181818,0.154062,0.177812,0.213333,0.166667,0.156889,0.176174,0.147606,0.204904,0.201681,0.222826,0.186830,0.161981,0.190053,0.196498,0.177316,0.162050,0.234742,0.181507,0.155196,0.269542,0.207171,0.180000,0.125721,...,0.216102,0.166193,0.213235,0.152091,0.180464,0.275773,0.282230,0.171115,0.231608,0.216561,0.226667,0.268949,0.251553,0.219565,0.195382,0.176568,0.214429,0.257790,0.263852,0.149270,0.202055,0.165926,0.161145,0.155647,0.179130,0.208984,0.243986,0.253968,0.200730,0.214612,0.204545,0.158430,0.292929,0.273649,0.198397,0.227979,0.141563,0.218692,0.124101,0.169168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894,0.294781,0.259029,0.299820,0.208678,0.296671,0.268000,0.291401,0.302835,0.272933,0.295714,0.289571,0.274165,0.268378,0.305065,0.269527,0.286396,0.297965,0.279126,0.301813,0.282645,0.300792,0.289474,0.314972,0.253440,0.303977,0.268456,0.268657,0.281888,0.276744,0.300578,0.274510,0.291055,0.281775,0.283531,0.253406,0.273154,0.312734,0.276336,0.283212,0.274841,...,0.294686,0.297531,0.204301,0.257174,0.286494,0.280919,0.227926,0.310067,0.262963,0.293076,0.294815,0.298094,0.253465,0.292484,0.314410,0.286494,0.284178,0.241252,0.291439,0.266667,0.283747,0.292249,0.300000,0.269596,0.289773,0.302147,0.238494,0.282332,0.296350,0.320624,0.130137,0.304956,0.268595,0.233740,0.316375,0.282332,0.260692,0.280116,0.252115,0.272834
895,0.352941,0.316934,0.266862,0.188525,0.348958,0.303465,0.374277,0.355713,0.353319,0.320303,0.330726,0.272059,0.354305,0.341935,0.353556,0.329694,0.289638,0.352208,0.320646,0.279330,0.332544,0.357860,0.333748,0.271134,0.322500,0.359649,0.259601,0.345476,0.335491,0.308176,0.299472,0.305425,0.346325,0.313783,0.323346,0.302869,0.293846,0.270833,0.306410,0.341974,...,0.298487,0.326281,0.176471,0.326763,0.303249,0.289121,0.203915,0.326992,0.262673,0.297111,0.318596,0.260993,0.256911,0.266667,0.320253,0.332103,0.304813,0.220390,0.274775,0.330153,0.316953,0.310306,0.339202,0.330033,0.308365,0.323963,0.234401,0.288550,0.294710,0.290415,0.127886,0.332192,0.252492,0.251256,0.338866,0.269173,0.302457,0.292035,0.326781,0.317938
896,0.303413,0.280182,0.238166,0.197256,0.286990,0.250294,0.292308,0.323910,0.304852,0.278195,0.322727,0.241481,0.291712,0.287975,0.314315,0.287568,0.273418,0.303911,0.260428,0.280980,0.291422,0.293160,0.265455,0.288462,0.282338,0.315920,0.210687,0.293981,0.284199,0.279040,0.244156,0.272085,0.281996,0.272328,0.252121,0.280983,0.262016,0.270415,0.286641,0.300789,...,0.249319,0.294772,0.172474,0.292054,0.270758,0.243323,0.190955,0.282686,0.230650,0.275766,0.286832,0.249275,0.207120,0.276836,0.277987,0.297297,0.270415,0.207055,0.243570,0.289623,0.283784,0.288483,0.284890,0.329584,0.265180,0.280959,0.203072,0.254992,0.267089,0.251788,0.113553,0.292517,0.215719,0.214165,0.282016,0.258860,0.291587,0.237562,0.277466,0.283422
897,0.246445,0.218232,0.265957,0.239130,0.289076,0.247344,0.329435,0.235294,0.200241,0.209953,0.234270,0.236515,0.203518,0.237674,0.210153,0.247043,0.224359,0.239946,0.223464,0.239464,0.234870,0.216667,0.257053,0.193069,0.236677,0.245802,0.252273,0.213296,0.203727,0.255700,0.219761,0.214493,0.217617,0.266667,0.232087,0.200753,0.279642,0.240351,0.266892,0.212838,...,0.259328,0.214756,0.187328,0.193853,0.218563,0.292842,0.254011,0.229318,0.231626,0.243542,0.243740,0.273101,0.279188,0.242026,0.226624,0.254237,0.280797,0.213808,0.280353,0.209677,0.274603,0.195155,0.235127,0.197452,0.236508,0.250000,0.289694,0.247289,0.262895,0.276210,0.165605,0.235213,0.310811,0.264550,0.273224,0.255459,0.188639,0.226384,0.172743,0.225064


In [26]:
# user - user matrix (WE-UCF jaccard)
resume_resume_jacc = 1 - pairwise_distances(resume_job_jacc_matrix.values, metric="jaccard", n_jobs=-1)
resume_resume_matrix_jacc = pd.DataFrame(resume_resume_jacc, index=resume_job_jacc_matrix.index, columns=resume_job_jacc_matrix.index)



In [27]:
resume_resume_matrix_jacc #hmmm apay one amin

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,40,41,42,43,45,46,47,48,49,50,84,85,86,87,88,89,90,91,92,93,104,105,106,107,108,109,140,141,143,144,...,711,712,713,714,715,716,717,718,719,742,743,744,745,746,747,748,784,785,786,787,788,824,825,826,827,828,829,830,852,853,854,855,856,892,893,894,895,896,897,898
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
895,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
896,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
897,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [28]:
def generate_recommendations_for(resume, matching_func):
    match_func = {
        'cosine': [resume_job_matrix, resume_resume_matrix],
        'jaccard': [resume_job_jacc_matrix, resume_resume_matrix_jacc]
    }
    
    nearest_neighbor = match_func[matching_func][1].loc[resume].sort_values(ascending=False).drop(index=resume).head(1).index[0]
    concat_target_and_neighbor = pd.concat([match_func[matching_func][0].loc[resume],match_func[matching_func][0].loc[nearest_neighbor]])
    recommendations = concat_target_and_neighbor.sort_values(ascending=False).index.drop_duplicates(keep='first') # sort then keep the first item of duplicated jobs
    return recommendations

In [29]:
def nn_generate_recommendations_for(resume):
    
    nearest_neighbor = resume_resume_matrix.loc[resume].sort_values(ascending=False).drop(index=resume).head(1).index[0]
    #concat_target_and_neighbor = pd.concat([resume_job_matrix.loc[resume],resume_job_matrix.loc[nearest_neighbor]])
    #collected_top_n_from_neighbor = resume_job_matrix.loc[nearest_neighbor].sort_values(ascending=False)
    concat_target_and_neighbor = pd.concat([resume_job_matrix.loc[resume],resume_job_matrix.loc[nearest_neighbor]])
    recommendations = concat_target_and_neighbor.groupby('uniq_id').sum().sort_values(ascending=False)
    return concat_target_and_neighbor
    

In [30]:
nn_generate_recommendations_for(894)

Unnamed: 0,0,00069cee44688d1bb1940fb0316ceba0,00073fc9ef7e7cd3a09894fc287103ed,0009c034a443fccd0758c7add365615c,0010716804e013f363f291ace1acbff0,0012bb53511dc9144cf78e4b1ad6ca4e,001a21d7e4ccd088dabb994469039cc9,001f806d32dd936e82ebf3f3c428215d,0020d14f28f84e5bfb81a1b55527cc04,00221f996cc8b17340157f32c37329fa,00256e1f6a82d9dc26a24b44f92b0812,00265cbedbe72acdc68e9eaeb6f2f4c3,002bb85517c53686da6d5a908ffd14fe,002ca6276cf0a779a7235f81158e4940,003278fcd3a39bef1f4b3ed27c17bac6,003d9322e122a5062b7d85add07f6768,003ff19533f70734bc7667e33060246d,0049c50f720c6a8feb967b4d7f9e837b,004bd080bf26782e1297ddb100e1aff4,005415ce57a49d1cc8732435431b4907,0054cfe57ac53952453ed451632b2bff,0058e24157b64f3290a2b0131ef874e8,005a14aecf84c2dfed79e6a182be37d9,005b23cb133310ea023aa8b0decee116,0063cc1655022ba94fac6e39c32796a7,00656cf3629a582d66fa714a761e59ca,0067cc21740ced44e43af3519464e0bc,00683ac175d97b71da614eca767c3d75,007518cd08737cae0f92b796b0ad7c83,0075c441f3ec52e0227becfb750f8924,007d4edff270f66c725e1808e5a067a5,008348275ffbd16607a15bf55bae54ac,0086d52417397029e558dee2373ffeea,0089ab7dde424ea7008144244f877eed,008e0d7d6b47fba718597c52f7a6e24a,0092fc9702805959328d86e028f0ef7f,0093dd398be4bc86da069e335b4dfed7,0098dad5e33307018cdff9485f625570,009b9b809bddd2a2ba31a5086f46e6a7,00a7522c892514ef5e323446a603c310,...,ff81c6c009c0dd630968c7005d51a9ec,ff88ec6aaa59c26d8c4017de05c05a08,ff8a71abf0691d163a5108889b609dca,ff8fc80e157ba81d8bc30c3793d13e85,ff92a73a6a85e5caeac35f00f97ff787,ff937b384cf9926189b13d9bce117712,ff9693a025ec5bd222c6687722c9285c,ff989699cf1c7cafc39f81d39241ef6b,ff9992f878f62d47d6386f1e08bf407f,ff9afb6d4f30c605d31489c494c3abbd,ff9b66aee8b6c5beb3117e34a4f7d07b,ff9b9ebbf12506a34b08eaf81d88683b,ff9e1ae14ce8a12c2fc9575cb3396320,ff9f5fa7ce35e6f4a959b9cf2bf31cb5,ffa23ac33ec8c41d5736d7f424959c4d,ffa2ff8d1b97d51295050a6e388d21c3,ffa9db8825c85c5118ab75b28c2262c4,ffaa74f20f27bd48dbb4922037f43984,ffaab94d7d17eb7ad9eb8838ddde49af,ffaedc4bd4ebb8be14f586c6ac822487,ffbb37beb05dd7cd49e42f943de6485f,ffbded17a4c21bad25947a7e6e8d4219,ffc0aa45e55eeb5d42e20e7c371dec7c,ffc644bc4e0430afd22de60bfb720bc7,ffcd2b1b9afbe73b8a41337fe044b914,ffd387ee590911ae866d0aa3f33bba55,ffd6e6eea0c46653b674ea13aa5c27da,ffdb76a48d66ee3517852c87ebc61cbf,ffde524d1ff1ddcf6d1c9ec34f0cd5ce,ffe15974d709ed7a6ce56cb69d43bc4b,ffe35c50b831191466fac84fd154bb6f,ffe589de20a385dbc264c03f4d69f9f9,ffe82abeee50723e001a7d509f10e2ae,ffe8b8f8ebf7d08257925da4aef3a944,ffeb3a189aa893967ad3a1f30def6078,fffb75617eac6ae15aabc73eb7b6eb4e,fffc0c8ae125e5733e4f034e2a457b3c,fffc6f328cef53e76f5e5bb78ed23ba9,fffcf89ea054a29e92204955ca846d63,fffe1767dbc1713944851a0a4f02ec5b
8aec88cba08d53da65ab99cf20f6f9d9,0.461373,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
46baa1f69ac07779274bcd90b85d9a72,0.423688,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
45efa1f6bc65acc32bbbb953a1ed13b7,0.462260,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
e0ac9d926dda5e95162ef05adea7318c,0.363979,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
e7e326053c586bd94e59f1fd74de4a1b,0.462182,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4287c7ee3317ccf1edd76e238cf8e584,0.441213,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
d7512f0181d69f83f96db38cd77a4d08,0.458964,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ec375268b494b3bcbed1635d64226112,0.448185,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
104,,0.428785,0.473519,0.493043,0.436194,0.482443,0.404383,0.401708,0.438138,0.459765,0.427049,0.444914,0.460841,0.473954,0.477381,0.472601,0.479291,0.461926,0.488259,0.484628,0.437296,0.463246,0.466802,0.464158,0.441560,0.436561,0.455801,0.432929,0.424336,0.460759,0.466383,0.425967,0.486829,0.410685,0.462031,0.448714,0.443935,0.481849,0.471264,0.416622,...,0.475269,0.429264,0.456128,0.452676,0.472413,0.421891,0.439429,0.453580,0.423832,0.461967,0.474024,0.439983,0.456433,0.448427,0.447385,0.443590,0.432614,0.485092,0.479291,0.417111,0.456854,0.467904,0.459908,0.466845,0.461635,0.468579,0.479693,0.460648,0.461285,0.484753,0.513374,0.421171,0.395263,0.440800,0.493453,0.336477,0.495507,0.409270,0.380934,0.482414


# Evaluating Result 

## Our metric is MAP@K & Catalog Coverage
* getting the relevant jobs for each resume

In [31]:
hit_list = pd.read_csv('hits_list.csv', names=['resume', 'job_uniq_id'], index_col=0)
grouped_hit_list =  hit_list.groupby('resume').apply(lambda x : x.to_numpy().flatten().tolist()).to_dict()

In [32]:
# def precision_at_k(resume, k = 1):
#     recommended_jobs = generate_recommendations_for(resume,'temp')[0:k]
#     resume_hit_list = [1 if x in grouped_hit_list[resume] else 0 for x in recommended_jobs]
#     return resume_hit_list.count(1)/k

* needed functions for the calculation for our metrics

In [33]:
# note that the recommended jobs in this function is a product of cosine only, jaccard next time
def average_precision_at_k(resume, k = 1, algo='WE-UCF', matching_func = 'cosine'):
    recommendation_func = {
        'WE-UCF': generate_recommendations_for(resume,matching_func),
        'NN': nn_generate_recommendations_for(resume),
    }
    
    recommended_jobs = recommendation_func[algo][0:k]
    resume_hit_list = [1 if x in grouped_hit_list[resume] else 0 for x in recommended_jobs]
    
    total = 0
    rank = 1
    one_counter = 1
    
    for i in resume_hit_list:
        if i == 1:
            value = one_counter/rank
            total+=value
            one_counter += 1
        rank += 1
    
    try:
        return total / resume_hit_list.count(1)
        # return total / len(grouped_hit_list[resume])
    except ZeroDivisionError:
        return 0

In [34]:
def map_at_k(k=1, algo='WE-UCF', matching_func = 'cosine'):
    total = 0
    for resume_index in resume_no_dup.index:
        total += average_precision_at_k(resume_index, k = k, algo=algo, matching_func = matching_func)
    return total / len(resume_no_dup)

* sample MAP@K calculation , where k =  1

In [35]:
map_at_k(k = 1, algo='WE-UCF', matching_func = 'cosine')

KeyboardInterrupt: 

In [None]:
map_at_k(k = 1, algo='WE-UCF', matching_func = 'jaccard') 

In [None]:
map_at_k(k = 1, algo='NN', matching_func = 'cosine') 

In [None]:
def catalog_coverage(recommendations, all_recommendations):
    return len(set(recommendations))/len(all_recommendations)
    

* sample coverage calculation , where k =  1

In [None]:
recommendation_list = []
for i in resume_no_dup.index:
    # note  k = 2 here
    recommendation_list.extend(generate_recommendations_for(i, 'cosine')[0:2])
catalog_coverage(recommendation_list, no_duplicates_job)

In [None]:
recommendation_list = []
for i in resume_no_dup.index:
    # note  k = 2 here
    recommendation_list.extend(generate_recommendations_for(i, 'jaccard')[0:2])
catalog_coverage(recommendation_list, no_duplicates_job)