In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def cosine_similarity_tfidf(target, job_titles):
    # Vectorize the target and job titles together
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([target] + job_titles)
    
    # Calculate cosine similarity between target and each job title
    target_vector = vectors[0]  # The first vector is for the target
    similarity_scores = cosine_similarity(target_vector, vectors[1:])[0]  # Compare target to each job title

    # Return similarity scores as a list
    return similarity_scores.tolist()

# Example usage
target_job = "Data Scientist"
job_titles_list = ["Data Analyst", "Machine Learning Engineer", "Data Engineer", "Software Developer"]
tfidf_scores = cosine_similarity_tfidf(target_job, job_titles_list)
print("TF-IDF Cosine Similarity Scores:", tfidf_scores)


TF-IDF Cosine Similarity Scores: [0.3096371820080698, 0.0, 0.35541083347483243, 0.0]


In [2]:
from sklearn.feature_extraction.text import CountVectorizer

def cosine_similarity_bow(target, job_titles):
    # Vectorize the target and job titles together
    vectorizer = CountVectorizer()
    vectors = vectorizer.fit_transform([target] + job_titles)
    
    # Calculate cosine similarity between target and each job title
    target_vector = vectors[0]  # The first vector is for the target
    similarity_scores = cosine_similarity(target_vector, vectors[1:])[0]  # Compare target to each job title

    # Return similarity scores as a list
    return similarity_scores.tolist()

# Example usage
bow_scores = cosine_similarity_bow(target_job, job_titles_list)
print("BoW Cosine Similarity Scores:", bow_scores)


BoW Cosine Similarity Scores: [0.4999999999999999, 0.0, 0.4999999999999999, 0.0]


In [None]:
import numpy as np
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

# Load pre-trained word embeddings (e.g., GloVe, Word2Vec)
# Make sure to point to the correct path to your embeddings file
# e.g., word_vectors = KeyedVectors.load_word2vec_format("path/to/glove.6B.100d.txt", binary=False)
word_vectors = KeyedVectors.load_word2vec_format("path/to/glove.6B.100d.txt", binary=False)

def average_word_embeddings(text, model, vector_size=100):
    # Generate average word embeddings for the input text
    words = text.lower().split()
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(vector_size)

def cosine_similarity_word_embeddings(target, job_titles, model, vector_size=100):
    # Get the embedding for the target title
    target_vector = average_word_embeddings(target, model, vector_size).reshape(1, -1)
    
    # Calculate cosine similarity between the target and each job title
    similarity_scores = []
    for title in job_titles:
        title_vector = average_word_embeddings(title, model, vector_size).reshape(1, -1)
        score = cosine_similarity(target_vector, title_vector)[0][0]
        similarity_scores.append(score)
    
    return similarity_scores

# Example usage
embedding_scores = cosine_similarity_word_embeddings(target_job, job_titles_list, word_vectors, vector_size=100)
print("Word Embeddings Cosine Similarity Scores:", embedding_scores)
