### IMPORT LIBRARY

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import string
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models import Word2Vec
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.cluster import KMeans
import string
import time
import re
import nltk
import torch
from tqdm import tqdm
import warnings 
warnings.filterwarnings("ignore", category=UserWarning)

### READ DATASET

In [2]:
test_cases_file = '/kaggle/input/job-recommendation/Test_Case_Job.csv'
test_cases_df = pd.read_csv(test_cases_file)
job_data_file = '/kaggle/input/job-recommendation/Tahap1_LinkedIn.csv'

# TF IDF

In [4]:
def preprocess_text_simple(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\*+', '', text)
    return text.strip()

def remove_asterisks(text):
    if pd.isna(text):
        return text
    return re.sub(r'\*+', '', text)

def load_and_preprocess_job_data(file_path):
    df = pd.read_csv(file_path)
    df['title'] = df['title'].apply(remove_asterisks)
    df['Combined'] = df['title'].fillna('') + ' ' + df['description_x'].fillna('') + ' ' + df['skills_desc'].fillna('')
    df['Combined'] = df['Combined'].apply(preprocess_text_simple)
    df = df.fillna("Unknown")
    return df.reset_index(drop=True)

def vectorize_text(df):
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(df['Combined'])
    return vectorizer, tfidf_matrix

def recommend_job(user_input, df, vectorizer, tfidf_matrix, experience_levels=None, work_types=None, name=None):
    filtered_df = df.copy()
    if experience_levels:
        filtered_df = filtered_df[filtered_df['formatted_experience_level'].isin(experience_levels)]
    if work_types:
        filtered_df = filtered_df[filtered_df['formatted_work_type'].isin(work_types)]
    if name and name != 'All':
        filtered_df = filtered_df[filtered_df['name'] == name]
    
    if filtered_df.empty:
        return None

    user_input_processed = preprocess_text_simple(user_input)
    user_tfidf = vectorizer.transform([user_input_processed])
    
    cosine_similarities = cosine_similarity(user_tfidf, tfidf_matrix[filtered_df.index]).flatten()
    
    above_zero = cosine_similarities > 0
    if not any(above_zero):
        return None

    threshold = np.percentile(cosine_similarities[above_zero], 95)
    
    above_threshold = cosine_similarities >= threshold
    top_job_indices = np.where(above_threshold)[0]
    
    top_job_indices = top_job_indices[np.argsort(cosine_similarities[top_job_indices])[::-1]]
    
    top_jobs = filtered_df.iloc[top_job_indices].copy()
    top_jobs.reset_index(drop=True, inplace=True)
    
    top_jobs['cosine_similarity'] = cosine_similarities[top_job_indices]
    
    return top_jobs

def run_recommendation_for_test_cases(job_data_file, test_cases_df, num_cases):
    df = load_and_preprocess_job_data(job_data_file)
    vectorizer, tfidf_matrix = vectorize_text(df)

    total_duration = 0

    for i in range(num_cases):
        user_input = test_cases_df['Case'].iloc[i]
        start_time = time.time()

        recommendations = recommend_job(user_input, df, vectorizer, tfidf_matrix)

        end_time = time.time()
        duration = end_time - start_time
        total_duration += duration

        print(f"\nRecommendations for Test Case {test_cases_df['No'].iloc[i]}:")
        if recommendations is not None and not recommendations.empty:
            for idx, row in recommendations.head(10).iterrows():
                print(f"Title: {row['title']} | Score: {row['cosine_similarity']:.4f}")
        else:
            print("No relevant jobs found.")
        
        print(f"Execution Time: {duration:.4f} seconds")
    
    print(f"\nTotal Duration for {num_cases} User Preferences: {total_duration:.4f} seconds")

In [5]:
print("Running recommendation for 1 user preference:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, 1)

Running recommendation for 1 user preference:

Recommendations for Test Case 1:
Title: Software Engineer | Score: 0.2540
Title: Full Stack Engineer | Score: 0.2474
Title: Software Engineer | Score: 0.2438
Title: Software Engineer (PERN Stack) | Score: 0.2422
Title: Cloud DevOps Engineer | Score: 0.2342
Title: Cloud Infrastructure Engineer | Score: 0.2323
Title: Lead Engineer | Score: 0.2308
Title: UI/UX Developer | Score: 0.2282
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Execution Time: 0.1278 seconds

Total Duration for 1 User Preferences: 0.1278 seconds


In [6]:
print("\nRunning recommendation for 5 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, 5)


Running recommendation for 5 user preferences:

Recommendations for Test Case 1:
Title: Software Engineer | Score: 0.2540
Title: Full Stack Engineer | Score: 0.2474
Title: Software Engineer | Score: 0.2438
Title: Software Engineer (PERN Stack) | Score: 0.2422
Title: Cloud DevOps Engineer | Score: 0.2342
Title: Cloud Infrastructure Engineer | Score: 0.2323
Title: Lead Engineer | Score: 0.2308
Title: UI/UX Developer | Score: 0.2282
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Execution Time: 0.1273 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.4941
Title: Strategic Digital Marketing Account Manager | Score: 0.4431
Title: Digital Marketing Specialist | Score: 0.4309
Title: Digital Marketing Coordinator | Score: 0.4148
Title: Marketing Manager | Score: 0.4056
Title: Ma

In [7]:
print("\nRunning recommendation for 10 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, 10)


Running recommendation for 10 user preferences:

Recommendations for Test Case 1:
Title: Software Engineer | Score: 0.2540
Title: Full Stack Engineer | Score: 0.2474
Title: Software Engineer | Score: 0.2438
Title: Software Engineer (PERN Stack) | Score: 0.2422
Title: Cloud DevOps Engineer | Score: 0.2342
Title: Cloud Infrastructure Engineer | Score: 0.2323
Title: Lead Engineer | Score: 0.2308
Title: UI/UX Developer | Score: 0.2282
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Execution Time: 0.1263 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.4941
Title: Strategic Digital Marketing Account Manager | Score: 0.4431
Title: Digital Marketing Specialist | Score: 0.4309
Title: Digital Marketing Coordinator | Score: 0.4148
Title: Marketing Manager | Score: 0.4056
Title: M

In [8]:
print("\nRunning recommendation for 20 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, 20)


Running recommendation for 20 user preferences:

Recommendations for Test Case 1:
Title: Software Engineer | Score: 0.2540
Title: Full Stack Engineer | Score: 0.2474
Title: Software Engineer | Score: 0.2438
Title: Software Engineer (PERN Stack) | Score: 0.2422
Title: Cloud DevOps Engineer | Score: 0.2342
Title: Cloud Infrastructure Engineer | Score: 0.2323
Title: Lead Engineer | Score: 0.2308
Title: UI/UX Developer | Score: 0.2282
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Execution Time: 0.1226 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.4941
Title: Strategic Digital Marketing Account Manager | Score: 0.4431
Title: Digital Marketing Specialist | Score: 0.4309
Title: Digital Marketing Coordinator | Score: 0.4148
Title: Marketing Manager | Score: 0.4056
Title: M

In [9]:
print("\nRunning recommendation for 50 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, 50)


Running recommendation for 50 user preferences:

Recommendations for Test Case 1:
Title: Software Engineer | Score: 0.2540
Title: Full Stack Engineer | Score: 0.2474
Title: Software Engineer | Score: 0.2438
Title: Software Engineer (PERN Stack) | Score: 0.2422
Title: Cloud DevOps Engineer | Score: 0.2342
Title: Cloud Infrastructure Engineer | Score: 0.2323
Title: Lead Engineer | Score: 0.2308
Title: UI/UX Developer | Score: 0.2282
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Title: Senior Director of Software Engineering - People APIs and Experiences (People Technology) | Score: 0.2230
Execution Time: 0.1228 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.4941
Title: Strategic Digital Marketing Account Manager | Score: 0.4431
Title: Digital Marketing Specialist | Score: 0.4309
Title: Digital Marketing Coordinator | Score: 0.4148
Title: Marketing Manager | Score: 0.4056
Title: M

# Word2Vec

In [12]:
nltk.download('punkt')

def preprocess_text_simple(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\*+', '', text)  
    return text.strip()

def remove_asterisks(text):
    if pd.isna(text):
        return text
    return re.sub(r'\*+', '', text)

def load_and_preprocess_job_data(file_path):
    df = pd.read_csv(file_path)
    df = df.drop_duplicates(subset=['company_id', 'title', 'description_x', 'location', 'url'])
    df['title'] = df['title'].apply(remove_asterisks)
    df['Combined'] = df['title'].fillna('') + ' ' + df['description_x'].fillna('') + ' ' + df['skills_desc'].fillna('')
    df['Combined'] = df['Combined'].apply(preprocess_text_simple)
    df['Tokenized'] = df['Combined'].apply(word_tokenize)
    df = df.fillna("Unknown")
    return df.reset_index(drop=True)

def train_word2vec(df):
    model = Word2Vec(sentences=df['Tokenized'], vector_size=100, window=5, min_count=1, workers=4)
    return model

def get_document_vector(doc, model):
    words = word_tokenize(doc)
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    if len(word_vectors) == 0:
        return np.zeros(model.vector_size)
    return np.mean(word_vectors, axis=0)

def vectorize_text(df, model):
    doc_vectors = np.array([get_document_vector(doc, model) for doc in df['Combined']])
    return doc_vectors

def recommend_job(user_input, df, model, doc_vectors, experience_levels=None, work_types=None, name=None):
    filtered_df = df.copy()
    if experience_levels:
        filtered_df = filtered_df[filtered_df['formatted_experience_level'].isin(experience_levels)]
    if work_types:
        filtered_df = filtered_df[filtered_df['formatted_work_type'].isin(work_types)]
    if name and name != 'All':
        filtered_df = filtered_df[filtered_df['name'] == name]

    if filtered_df.empty:
        return None

    user_input_processed = preprocess_text_simple(user_input)
    user_vector = get_document_vector(user_input_processed, model)

    cosine_similarities = cosine_similarity([user_vector], doc_vectors[filtered_df.index]).flatten()

    above_zero = cosine_similarities > 0
    if not any(above_zero):
        return None

    threshold = np.percentile(cosine_similarities[above_zero], 95)
    above_threshold = cosine_similarities >= threshold
    top_job_indices = np.where(above_threshold)[0]

    top_job_indices = top_job_indices[np.argsort(cosine_similarities[top_job_indices])[::-1]]

    top_jobs = filtered_df.iloc[top_job_indices].copy()
    top_jobs.reset_index(drop=True, inplace=True)

    top_jobs['cosine_similarity'] = cosine_similarities[top_job_indices]

    return top_jobs

def run_recommendation_for_test_cases(job_data_file, test_cases_df, model, doc_vectors, num_cases):
    df = load_and_preprocess_job_data(job_data_file)

    total_duration = 0

    for i in range(num_cases):
        user_input = test_cases_df['Case'].iloc[i]
        start_time = time.time()

        recommendations = recommend_job(user_input, df, model, doc_vectors)

        end_time = time.time()
        duration = end_time - start_time
        total_duration += duration

        print(f"\nRecommendations for Test Case {test_cases_df['No'].iloc[i]}:")
        if recommendations is not None and not recommendations.empty:
            for idx, row in recommendations.head(10).iterrows():
                print(f"Title: {row['title']} | Score: {row['cosine_similarity']:.4f}")
        else:
            print("No relevant jobs found.")
        
        print(f"Execution Time: {duration:.4f} seconds")
    
    print(f"\nTotal Duration for {num_cases} User Preferences: {total_duration:.4f} seconds")

df = load_and_preprocess_job_data(job_data_file)
word2vec_model = train_word2vec(df)
doc_vectors = vectorize_text(df, word2vec_model)

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [13]:
print("Running recommendation for 1 user preference:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, word2vec_model, doc_vectors, 1)

Running recommendation for 1 user preference:

Recommendations for Test Case 1:
Title: Salesforce Consultant | Score: 0.9369
Title: Power Platform Engineer with Dataverse | Score: 0.9243
Title: Frontend Web Developer (Machine Learning) | Score: 0.9174
Title: Java Full Stack Developer - McLean, VA (Only On W2) | Score: 0.9139
Title: Android Developer | Score: 0.9138
Title: Java Software Engineer | Score: 0.9112
Title: Java Developer - W2 ONLY | Score: 0.9100
Title: Javascript Developer | Score: 0.9099
Title: Cloud DevOps Engineer | Score: 0.9093
Title: AWS Cloud Automation Engineer | Score: 0.9055
Execution Time: 0.0158 seconds

Total Duration for 1 User Preferences: 0.0158 seconds


In [14]:
print("\nRunning recommendation for 5 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, word2vec_model, doc_vectors, 5)


Running recommendation for 5 user preferences:

Recommendations for Test Case 1:
Title: Salesforce Consultant | Score: 0.9369
Title: Power Platform Engineer with Dataverse | Score: 0.9243
Title: Frontend Web Developer (Machine Learning) | Score: 0.9174
Title: Java Full Stack Developer - McLean, VA (Only On W2) | Score: 0.9139
Title: Android Developer | Score: 0.9138
Title: Java Software Engineer | Score: 0.9112
Title: Java Developer - W2 ONLY | Score: 0.9100
Title: Javascript Developer | Score: 0.9099
Title: Cloud DevOps Engineer | Score: 0.9093
Title: AWS Cloud Automation Engineer | Score: 0.9055
Execution Time: 0.0148 seconds

Recommendations for Test Case 2:
Title: Social Media and Email Marketing Manager | Score: 0.9264
Title: Digital Marketing Lead | Score: 0.9239
Title: Digital Marketing Specialist | Score: 0.9163
Title: Digital Marketing Lead Generation Specialist | Score: 0.9129
Title: Head of Demand Generation | Score: 0.9126
Title: Marketing Communications Specialist - 75484

In [15]:
print("\nRunning recommendation for 10 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, word2vec_model, doc_vectors, 10)


Running recommendation for 10 user preferences:

Recommendations for Test Case 1:
Title: Salesforce Consultant | Score: 0.9369
Title: Power Platform Engineer with Dataverse | Score: 0.9243
Title: Frontend Web Developer (Machine Learning) | Score: 0.9174
Title: Java Full Stack Developer - McLean, VA (Only On W2) | Score: 0.9139
Title: Android Developer | Score: 0.9138
Title: Java Software Engineer | Score: 0.9112
Title: Java Developer - W2 ONLY | Score: 0.9100
Title: Javascript Developer | Score: 0.9099
Title: Cloud DevOps Engineer | Score: 0.9093
Title: AWS Cloud Automation Engineer | Score: 0.9055
Execution Time: 0.0157 seconds

Recommendations for Test Case 2:
Title: Social Media and Email Marketing Manager | Score: 0.9264
Title: Digital Marketing Lead | Score: 0.9239
Title: Digital Marketing Specialist | Score: 0.9163
Title: Digital Marketing Lead Generation Specialist | Score: 0.9129
Title: Head of Demand Generation | Score: 0.9126
Title: Marketing Communications Specialist - 7548

In [16]:
print("\nRunning recommendation for 20 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, word2vec_model, doc_vectors, 20)


Running recommendation for 20 user preferences:

Recommendations for Test Case 1:
Title: Salesforce Consultant | Score: 0.9369
Title: Power Platform Engineer with Dataverse | Score: 0.9243
Title: Frontend Web Developer (Machine Learning) | Score: 0.9174
Title: Java Full Stack Developer - McLean, VA (Only On W2) | Score: 0.9139
Title: Android Developer | Score: 0.9138
Title: Java Software Engineer | Score: 0.9112
Title: Java Developer - W2 ONLY | Score: 0.9100
Title: Javascript Developer | Score: 0.9099
Title: Cloud DevOps Engineer | Score: 0.9093
Title: AWS Cloud Automation Engineer | Score: 0.9055
Execution Time: 0.0159 seconds

Recommendations for Test Case 2:
Title: Social Media and Email Marketing Manager | Score: 0.9264
Title: Digital Marketing Lead | Score: 0.9239
Title: Digital Marketing Specialist | Score: 0.9163
Title: Digital Marketing Lead Generation Specialist | Score: 0.9129
Title: Head of Demand Generation | Score: 0.9126
Title: Marketing Communications Specialist - 7548

In [17]:
print("\nRunning recommendation for 50 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, word2vec_model, doc_vectors, 50)


Running recommendation for 50 user preferences:

Recommendations for Test Case 1:
Title: Salesforce Consultant | Score: 0.9369
Title: Power Platform Engineer with Dataverse | Score: 0.9243
Title: Frontend Web Developer (Machine Learning) | Score: 0.9174
Title: Java Full Stack Developer - McLean, VA (Only On W2) | Score: 0.9139
Title: Android Developer | Score: 0.9138
Title: Java Software Engineer | Score: 0.9112
Title: Java Developer - W2 ONLY | Score: 0.9100
Title: Javascript Developer | Score: 0.9099
Title: Cloud DevOps Engineer | Score: 0.9093
Title: AWS Cloud Automation Engineer | Score: 0.9055
Execution Time: 0.0158 seconds

Recommendations for Test Case 2:
Title: Social Media and Email Marketing Manager | Score: 0.9264
Title: Digital Marketing Lead | Score: 0.9239
Title: Digital Marketing Specialist | Score: 0.9163
Title: Digital Marketing Lead Generation Specialist | Score: 0.9129
Title: Head of Demand Generation | Score: 0.9126
Title: Marketing Communications Specialist - 7548

# BERT

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def preprocess_text_simple(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\*+', '', text)  
    return text.strip()

def remove_asterisks(text):
    if pd.isna(text):
        return text
    return re.sub(r'\*+', '', text)

def load_and_preprocess_job_data(file_path):
    df = pd.read_csv(file_path)
    df = df.drop_duplicates(subset=['company_id', 'title', 'description_x', 'location', 'url'])
    df['title'] = df['title'].apply(remove_asterisks)
    df['Combined'] = df['title'].fillna('') + ' ' + df['description_x'].fillna('') + ' ' + df['skills_desc'].fillna('')
    df['Combined'] = df['Combined'].apply(preprocess_text_simple)
    df = df.fillna("Unknown")
    return df.reset_index(drop=True)

def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

def vectorize_text(df):
    doc_vectors = []
    for doc in tqdm(df['Combined'], desc="Vectorizing documents"):
        doc_vectors.append(get_bert_embedding(doc))
    return np.array(doc_vectors)

def recommend_job(user_input, df, doc_vectors, experience_levels=None, work_types=None, name=None):
    filtered_df = df.copy()
    if experience_levels:
        filtered_df = filtered_df[filtered_df['formatted_experience_level'].isin(experience_levels)]
    if work_types:
        filtered_df = filtered_df[filtered_df['formatted_work_type'].isin(work_types)]
    if name and name != 'All':
        filtered_df = filtered_df[filtered_df['name'] == name]
    
    if filtered_df.empty:
        return None

    user_input_processed = preprocess_text_simple(user_input)
    user_vector = get_bert_embedding(user_input_processed)
    
    cosine_similarities = cosine_similarity([user_vector], doc_vectors[filtered_df.index]).flatten()
    
    above_zero = cosine_similarities > 0
    if not any(above_zero):
        return None

    threshold = np.percentile(cosine_similarities[above_zero], 95)
    above_threshold = cosine_similarities >= threshold
    top_job_indices = np.where(above_threshold)[0]
    
    top_job_indices = top_job_indices[np.argsort(cosine_similarities[top_job_indices])[::-1]]
    
    top_jobs = filtered_df.iloc[top_job_indices].copy()
    top_jobs.reset_index(drop=True, inplace=True)
    
    top_jobs['cosine_similarity'] = cosine_similarities[top_job_indices]
    
    return top_jobs

def run_recommendation_for_test_cases(job_data_file, test_cases_df, doc_vectors, num_cases):
    df = load_and_preprocess_job_data(job_data_file)
    
    total_duration = 0

    for i in range(num_cases):
        user_input = test_cases_df['Case'].iloc[i]
        start_time = time.time()

        recommendations = recommend_job(user_input, df, doc_vectors)

        end_time = time.time()
        duration = end_time - start_time
        total_duration += duration

        print(f"\nRecommendations for Test Case {test_cases_df['No'].iloc[i]}:")
        if recommendations is not None and not recommendations.empty:
            for idx, row in recommendations.head(10).iterrows():
                print(f"Title: {row['title']} | Score: {row['cosine_similarity']:.4f}")
        else:
            print("No relevant jobs found.")
        
        print(f"Execution Time: {duration:.4f} seconds")
    
    print(f"\nTotal Duration for {num_cases} User Preferences: {total_duration:.4f} seconds")

df = load_and_preprocess_job_data(job_data_file)
doc_vectors = vectorize_text(df)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Vectorizing documents: 100%|██████████| 15309/15309 [1:51:27<00:00,  2.29it/s] 


In [4]:
print("Running recommendation for 1 user preference:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, doc_vectors, 1)

Running recommendation for 1 user preference:

Recommendations for Test Case 1:
Title: JUNIOR ANDROID DEVELOPER | Score: 0.9266
Title: Senior Frontend Developer | Score: 0.9199
Title: Web Developer | Score: 0.9192
Title: Senior Dotnet Developer | Score: 0.9192
Title: Web Developer | Score: 0.9185
Title: Senior .net Developer | Score: 0.9155
Title: Java Developer - W2 ONLY | Score: 0.9146
Title: Dotnet Developer | Score: 0.9137
Title: Software Engineer 3 | Score: 0.9130
Title: Azure Architect | Score: 0.9129
Execution Time: 0.1767 seconds

Total Duration for 1 User Preferences: 0.1767 seconds


In [5]:
print("\nRunning recommendation for 5 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, doc_vectors, 5)


Running recommendation for 5 user preferences:

Recommendations for Test Case 1:
Title: JUNIOR ANDROID DEVELOPER | Score: 0.9266
Title: Senior Frontend Developer | Score: 0.9199
Title: Web Developer | Score: 0.9192
Title: Senior Dotnet Developer | Score: 0.9192
Title: Web Developer | Score: 0.9185
Title: Senior .net Developer | Score: 0.9155
Title: Java Developer - W2 ONLY | Score: 0.9146
Title: Dotnet Developer | Score: 0.9137
Title: Software Engineer 3 | Score: 0.9130
Title: Azure Architect | Score: 0.9129
Execution Time: 0.2007 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.9031
Title: VP, Channel Marketing | Score: 0.8941
Title: Digital Marketing Specialist | Score: 0.8931
Title: Social Media Graphic Designer | Score: 0.8912
Title: Head of Strategic Growth  | Score: 0.8902
Title: Vice President Marketing | Score: 0.8873
Title: Personal Finance Writers | Score: 0.8869
Title: Graphic Designer | Score: 0.8840
Title: Graphic Designer | Score: 

In [6]:
print("\nRunning recommendation for 10 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, doc_vectors, 10)


Running recommendation for 10 user preferences:

Recommendations for Test Case 1:
Title: JUNIOR ANDROID DEVELOPER | Score: 0.9266
Title: Senior Frontend Developer | Score: 0.9199
Title: Web Developer | Score: 0.9192
Title: Senior Dotnet Developer | Score: 0.9192
Title: Web Developer | Score: 0.9185
Title: Senior .net Developer | Score: 0.9155
Title: Java Developer - W2 ONLY | Score: 0.9146
Title: Dotnet Developer | Score: 0.9137
Title: Software Engineer 3 | Score: 0.9130
Title: Azure Architect | Score: 0.9129
Execution Time: 0.1957 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.9031
Title: VP, Channel Marketing | Score: 0.8941
Title: Digital Marketing Specialist | Score: 0.8931
Title: Social Media Graphic Designer | Score: 0.8912
Title: Head of Strategic Growth  | Score: 0.8902
Title: Vice President Marketing | Score: 0.8873
Title: Personal Finance Writers | Score: 0.8869
Title: Graphic Designer | Score: 0.8840
Title: Graphic Designer | Score:

In [7]:
print("\nRunning recommendation for 20 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, doc_vectors, 20)


Running recommendation for 20 user preferences:

Recommendations for Test Case 1:
Title: JUNIOR ANDROID DEVELOPER | Score: 0.9266
Title: Senior Frontend Developer | Score: 0.9199
Title: Web Developer | Score: 0.9192
Title: Senior Dotnet Developer | Score: 0.9192
Title: Web Developer | Score: 0.9185
Title: Senior .net Developer | Score: 0.9155
Title: Java Developer - W2 ONLY | Score: 0.9146
Title: Dotnet Developer | Score: 0.9137
Title: Software Engineer 3 | Score: 0.9130
Title: Azure Architect | Score: 0.9129
Execution Time: 0.1833 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.9031
Title: VP, Channel Marketing | Score: 0.8941
Title: Digital Marketing Specialist | Score: 0.8931
Title: Social Media Graphic Designer | Score: 0.8912
Title: Head of Strategic Growth  | Score: 0.8902
Title: Vice President Marketing | Score: 0.8873
Title: Personal Finance Writers | Score: 0.8869
Title: Graphic Designer | Score: 0.8840
Title: Graphic Designer | Score:

In [8]:
print("\nRunning recommendation for 50 user preferences:")
run_recommendation_for_test_cases(job_data_file, test_cases_df, doc_vectors, 50)


Running recommendation for 50 user preferences:

Recommendations for Test Case 1:
Title: JUNIOR ANDROID DEVELOPER | Score: 0.9266
Title: Senior Frontend Developer | Score: 0.9199
Title: Web Developer | Score: 0.9192
Title: Senior Dotnet Developer | Score: 0.9192
Title: Web Developer | Score: 0.9185
Title: Senior .net Developer | Score: 0.9155
Title: Java Developer - W2 ONLY | Score: 0.9146
Title: Dotnet Developer | Score: 0.9137
Title: Software Engineer 3 | Score: 0.9130
Title: Azure Architect | Score: 0.9129
Execution Time: 0.2003 seconds

Recommendations for Test Case 2:
Title: Digital Marketing Specialist | Score: 0.9031
Title: VP, Channel Marketing | Score: 0.8941
Title: Digital Marketing Specialist | Score: 0.8931
Title: Social Media Graphic Designer | Score: 0.8912
Title: Head of Strategic Growth  | Score: 0.8902
Title: Vice President Marketing | Score: 0.8873
Title: Personal Finance Writers | Score: 0.8869
Title: Graphic Designer | Score: 0.8840
Title: Graphic Designer | Score: