In [1]:
import json
import logging
import os

import yaml
from qdrant_client import QdrantClient
from scripts.utils.logger import init_logging_config




In [2]:
init_logging_config(basic_log_level=logging.INFO)
# Get the logger
logger = logging.getLogger(__name__)

# Set the logging level
logger.setLevel(logging.INFO)

In [None]:
def find_path(folder_name):
    """
    This function takes a folder name as input and returns the path to the folder.

    Args:
    folder_name (str): The name of the folder to find the path for.

    Returns:
    str: The path to the folder.

    Raises:
    ValueError: If the folder does not exist.
    """
    curr_dir = os.getcwd() #Gets the current working directory
    while True:
        if folder_name in os.listdir(curr_dir):
            return os.path.join(curr_dir, folder_name) #if the folder is found, it returns the full path in the current directory
        else:
            parent_dir = os.path.dirname(curr_dir) #get the parent directory
            if parent_dir == "/": #if the parent directory is the root directory, break the loop
                break
            curr_dir = parent_dir #updates the current directory to the parent directory
    raise ValueError(f"Folder '{folder_name}' not found.")


def read_config(filepath):
    """
    This function reads a YAML configuration file

    Args:
    filepath (str): The path to the configuration file

    Returns:
    dict: The configuration file as a dictionaryif the file is read successfully, None otherwise
    """
    try:
        with open(filepath) as f:
            config = yaml.safe_load(f) # Load the YAML data from the file
        return config #returns the config data
    except FileNotFoundError as e: #handle the case where the file is not found
        logger.error(f"Configuration file {filepath} not found: {e}")
    except yaml.YAMLError as e: #handles any YAML parsing error
        logger.error(
            f"Error parsing YAML in configuration file {filepath}: {e}", exc_info=True
        )
    except Exception as e: #handles any other exception
        logger.error(f"Error reading configuration file {filepath}: {e}")
    return None


def read_doc(path):
    """
        This function reads a JSON document

        Args:
        path (str): The path to the JSON document

        Returns:
        dict: The JSON document as a dictionary if the file is read successfully, None otherwise
    """
    with open(path) as f:
        try:
            data = json.load(f)
        except Exception as e:
            logger.error(f"Error reading JSON file: {e}")
            data = {}
    return data


In [None]:
def get_score(resume_string, job_description_string):
    """
        Calculates the similarity score between a resume and a job description.

        Args:
        resume_string (str): The resume string to be analyzed.
        job_description_string (str): The job description string to be analyzed.

        Returns:
        search_result (float): The similarity score between the resume and the job description.
    """
    logger.info("Started getting similarity score")

    documents = [resume_string]
    client = QdrantClient(":memory:") #initialization of a Qdrant client with in-memory storage
    client.set_model("BAAI/bge-base-en") #set the model to "BAAI/bge-base-en" for text embeddings

    client.add(
        collection_name="demo_collection", #specify the collection name where documents will be added
        documents=documents, #add the documents (resume text) to the collection
    )

    search_result = client.query(
        collection_name="demo_collection", query_text=job_description_string  #specification of the commection name and provide the job description text as the query
    )
    logger.info("Finished getting similarity score")
    return search_result

In [5]:
cwd = find_path('Resume-Matcher')
READ_RESUME_FROM = os.path.join(cwd, 'Data', 'Processed', 'Resumes') #constructs the path to the 'Resumes' directory where processed resumes files are stored
#it uses the 'cwd' path obtained above and appends 'Data/Processed/Resumes'to it and assigns the result to READ_RESUME_FROM
READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, 'Data', 'Processed', 'JobDescription')
config_path = os.path.join(cwd, "scripts", "similarity") 
#constructs the path to the 'similarity' directory where configuration files or scripts are stored, using the 'cwd' path obtained and appends 'scripts/similarity' to it

In [6]:
# To give your custom resume use this code
resume_dict = read_config(
    READ_RESUME_FROM
    + "/Resume-alfred_pennyworth_pm.pdf83632b66-5cce-4322-a3c6-895ff7e3dd96.json"
)
job_dict = read_config(
    READ_JOB_DESCRIPTION_FROM
    + "/JobDescription-job_desc_product_manager.pdf6763dc68-12ff-4b32-b652-ccee195de071.json"
)

#extracting keywords from the resume and job decription
resume_keywords = resume_dict["extracted_keywords"] 
job_description_keywords = job_dict["extracted_keywords"]

#join the resume_keywords and job_description_keywords each into a single string with space-separated words
resume_string = " ".join(resume_keywords)
jd_string = " ".join(job_description_keywords)

#calculates the similarity score between the resume and job-description
final_result = get_score(resume_string, jd_string)
for r in final_result:
    print(r.score)

[32;10m2024-05-14 15:32:28,825 (1835371807.py:2) - INFO: [0mStarted getting similarity score
[32;10m2024-05-14 15:32:29,664 (1835371807.py:16) - INFO: [0mFinished getting similarity score


0.8572231574198962
