In [2]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity

# Skills and Work Experience 
skills = pd.read_csv('skills.csv').dropna()
work_exp = pd.read_csv('work_exp.csv').dropna()

# Grouping Skills per User_ID
skills['skills'] = [(' ' + i) for i in skills['value']]
skills_grpd = skills[['resume_id', 'skills']].groupby('resume_id').sum()

# Users Grouped on work Exp descriptions and latest Job Title
titles = work_exp[['resume_id','title']].groupby('resume_id').first()
user_grpd = work_exp[['resume_id','description']].groupby('resume_id').sum()
user_grpd['title'] = titles

# Merging Users grouped table and grouped skills table. There are some users without SKILLS
merged = user_grpd.merge(skills_grpd, how='left', left_index=True, right_index=True).fillna('')
# Concatenating Work Exp description and skills for every user
merged['exp_skills'] = merged['description'] + merged['skills']

# Create User ID/Resume ID Table
users = merged[['exp_skills']]

# Create Jobs Table. 2000 most occuring job titles and their description+skills
title_skills = merged[merged['skills'] != ''].copy()
title_skills['title'] = title_skills.title.str.lower().str.strip()
jobs = title_skills[['title','exp_skills']].groupby('title').sum()
jobs = jobs[jobs.index.isin(pd.Series(title_skills.title.value_counts().head(1000).index))]

#  Vectorization of Job descriptions and SKILLS
my_stopwords = ['nbsp','aaa','aba','abc','abandonment','abatement','abaqus']
vec = TfidfVectorizer(ngram_range=(1,1), token_pattern='[a-zA-z]{3,50}',  decode_error='ignore',
                      max_df=0.5, min_df=10, stop_words=text.ENGLISH_STOP_WORDS.union(my_stopwords) )

jobs_matrix = vec.fit_transform(jobs.exp_skills)

In [3]:
def recommend_user(resume_id):
    user_mat = vec.transform(users.exp_skills.loc[[resume_id]])
    cos_sim = cosine_similarity(user_mat,jobs_matrix)
    df = pd.DataFrame(cos_sim, columns = jobs.index)
    scr = df.iloc[[0]].values
    top = jobs.index[np.argsort(-scr)]
    
    print "Current Job Title :"
    print "   " , merged.loc[resume_id].title
    print
    print "Top 10 Recommended Jobs :"
    for i in top[0,0:10]:
        print "   " ,i

def recommend_newuser(text_resume):
    user_mat = (vec.transform([text_resume]))
    cos_sim = cosine_similarity(user_mat,jobs_matrix)
    df = pd.DataFrame(cos_sim, columns = jobs.index)
    scr = df.iloc[[0]].values
    top = jobs.index[np.argsort(-scr)]
    
    print "Top 10 Recommended Jobs :"
    for i in top[0,0:10]:
        print "   " ,i

In [4]:
recommend_user(626)

Current Job Title :
    Digital Advertising Sales Executive

Top 10 Recommended Jobs :
    sales representative
    account executive
    business development manager
    marketing manager
    business development representative
    advertising sales representative
    senior account manager
    marketing consultant
    marketing coordinator
    advertising sales executive


In [5]:
# For new user. Copy paste resume as a string
resume = '''

'''

In [6]:
recommend_newuser(resume)

Top 10 Recommended Jobs :
    account coordinator
    project manager/product manager/business analyst
    project manager/scrum master
    project program manager, senior analyst- global packaging
    project scheduler
    project specialist
    project superintendent
    project/product manager
    promoter
    property manager
