In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from pylab import rcParams
rcParams['figure.figsize'] = 50, 20
import nltk
import time
start=time.time()
nltk.download('stopwords')
%matplotlib inline
import warnings; warnings.simplefilter('ignore')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pvali\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
from joblib import load

# Load your model components
vec = load('vec.joblib')
vec2 = load('vec2.joblib')
pca = load('pca.joblib')
lr = load('lr.joblib')
comps = load('comps.joblib')
df = pd.read_json('df.json')

In [5]:
import os
import json

def update_suggestions_json(user_id, new_suggestions_list):
    # Path to the suggestions JSON file
    suggestions_file_path = 'suggestions_user.json'
    
    # Check if the suggestions file already exists
    if os.path.isfile(suggestions_file_path):
        # Read the existing data
        with open(suggestions_file_path, 'r') as file:
            suggestions_data = json.load(file)
    else:
        # Initialize an empty list if the file does not exist
        suggestions_data = []
    
    # Remove any existing suggestions for this user
    suggestions_data = [entry for entry in suggestions_data if entry['user_id'] != user_id]
    
    # Add new suggestions for this user
    suggestions_data.extend(new_suggestions_list)
    
    # Write the updated suggestions back to the JSON file
    with open(suggestions_file_path, 'w') as file:
        json.dump(suggestions_data, file, indent=4)

def give_suggestions(user_id, resume_text):
    # Vectorize user's skills and job descriptions
    desc = pd.DataFrame(vec.transform([resume_text]).todense())
    desc.columns = vec.get_feature_names_out()
    skillz = pd.DataFrame(vec2.transform([resume_text]).todense())
    skillz.columns = vec2.get_feature_names_out()
    mat = pd.concat([skillz, desc], axis=1)
    # Transform feature matrix with pca
    user_comps = pd.DataFrame(pca.transform(mat))

    # Predict cluster for user and print cluster number
    cluster = lr.predict(user_comps)[0]
    print('CLUSTER NUMBER', cluster, '\n\n')

    # Calculate cosine similarity
    cos_sim = pd.DataFrame(cosine_similarity(user_comps, comps[comps.index == cluster]))

    # Get job titles from df to associate cosine similarity scores with jobs
    samp_for_cluster = df[df['cluster_no'] == cluster]
    cos_sim = cos_sim.T.set_index(samp_for_cluster['title'])
    cos_sim.columns = ['score']
    
    # Print the top ten suggested jobs for the user's cluster
    top_cos_sim = cos_sim.sort_values('score', ascending=False)[:10]
    print('Top ten suggested for your cluster', '\n', top_cos_sim, '\n\n')
    
    new_suggestions_list = []
    for job_title, score in top_cos_sim.to_dict()['score'].items():
        # Find the job_id corresponding to the job_title in samp_for_cluster
        job_id = samp_for_cluster[samp_for_cluster['title'] == job_title]['uid'].values[0]
        new_suggestions_list.append({
            "user_id": user_id,
            "job_id": job_id,
            "suggestions": job_title,
            "score": score,
            "feedback": 0
        })
    
    # Call the function to update the JSON file with new suggestions
    update_suggestions_json(user_id, new_suggestions_list)
    
    return top_cos_sim

In [8]:
import pandas as pd
user_data = pd.read_json("../hybrid/user_data.json")
sel_user_id = 1
resume_text_row = user_data.loc[user_data['user_id'] == sel_user_id, 'user_data']
resume_text = resume_text_row.iloc[0]
print(resume_text)


typescript, s, css, express, r, node.js, javascript, html, react


In [9]:
cos_sim_result = give_suggestions(sel_user_id, resume_text)

CLUSTER NUMBER 1 


Top ten suggested for your cluster 
                                             score
title                                            
Sr. Fullstack (Node.js/React) Developer  0.668390
Sr. Fullstack (Node.js/React) Developer  0.668390
Sr. Fullstack (Node.js/React) Developer  0.668390
Software Developer - Front End           0.589798
7+ Yrs Sr. Front End Developer           0.547151
Senior Backend Developer                 0.428196
Cloud Engineer                           0.413607
Web Developer, Promotions                0.398589
Web Applications Developer               0.369323
Front-End Developer (Remote)             0.327625 




In [10]:
def update_user_feedback(user_id, job_id, feedback):
    # Path to the suggestions JSON file
    suggestions_file_path = 'suggestions_user.json'
    
    if not os.path.isfile(suggestions_file_path):
        print("File not found!")
        return
    
    with open(suggestions_file_path, 'r') as file:
        suggestions_data = json.load(file)
    
    # Update the feedback for the specific user_id and job_id
    for entry in suggestions_data:
        if entry['user_id'] == user_id and entry['job_id'] == job_id:
            entry['feedback'] = feedback
            break
    
    # Write the updated suggestions back to the JSON file
    with open(suggestions_file_path, 'w') as file:
        json.dump(suggestions_data, file, indent=4)

In [11]:
update_user_feedback(user_id=2, job_id='222d8da33f324a7b836368cdada0a053', feedback=1)

In [None]:
import scipy.sparse as sp
from typing import List

def _single_list_similarity(predicted: list, feature_df: pd.DataFrame, u: int) -> float:
    # exception predicted list empty
    if not(predicted):
        raise Exception('Predicted list is empty, index: {0}'.format(u))

    #get features for all recommended items
    print(predicted)
    feature_df_reset = feature_df.set_index('jobtitle')
    print(feature_df_reset)
    recs_content = feature_df_reset.concat[predicted]
    #recs_content = feature_df.loc[predicted]
    recs_content = recs_content.dropna()
    recs_content = sp.csr_matrix(recs_content.values)

    #calculate similarity scores for all items in list
    similarity = cosine_similarity(X=recs_content, dense_output=False)

    #get indicies for upper right triangle w/o diagonal
    upper_right = np.triu_indices(similarity.shape[0], k=1)

    #calculate average similarity score of all recommended items in list
    ils_single_user = np.mean(similarity[upper_right])
    return ils_single_user

def intra_list_similarity(predicted: List[list], feature_df: pd.DataFrame) -> float:
    feature_df = feature_df.fillna(0)
    Users = range(len(predicted))
    print(predicted)
    ils = [_single_list_similarity(predicted[u], feature_df, u) for u in Users]
    return np.mean(ils)

top_10_recommendations=cos_sim_result.sort_values('score', ascending=False)[:10]
top_10_list = top_10_recommendations.reset_index().to_records(index=False).tolist()
first_elements = [item[0] for item in top_10_list]
feature_df = df[['jobtitle','jobdescription']]
intra_list_similarity(first_elements, feature_df)