# Minimum Viable Product - ATS system

**Instructions:** Create your own Groq and Gemini API key. Add either of them, or both to the secrets in colab notebook as GROQ_API_KEY and GEMINI_API_KEY respectfully. Then run all cells from Runtime -> Run all.

LAUNCH_MODE=True is an option to generate fronted for analysis requird. No frontend is generated in opposite scenario and the debug sections are primarily written for purpose of packaging the system in future.

In [None]:
# Install necessary requirements
!pip install -q google-generativeai groq gradio PyPDF2 torch transformers

In [None]:
# Make valid imports
import google.generativeai as genai
import gradio as gr
import os
import PyPDF2
import re
import torch
from datetime import datetime
from google.colab import auth, userdata
from groq import Groq
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# If False, it defaults to debugging
LAUNCH_MODE = True

In [None]:
# Function to read API key file: Supports local development of code
def get_api_key(key_path):
  try:
    with open(file_path, 'r') as file:
      return file.read().strip()
  except FileNotFoundError:
    raise ValueError(f"API key file not found in pre-determined location: {key_path}. Please provide a valid file")

# Support
technologies = ["Gemini", "Groq"]

# Fetch API keys and create communicators
if LAUNCH_MODE:
  auth.authenticate_user()
  api_keys = {tech: userdata.get(f"{tech.upper()}_API_KEY") for tech in technologies}
else:
  key_paths = {
    "Gemini": ".secrets/gemini_api_key.txt",
    "Groq": ".secrets/groq_api_key.txt"
  }
  api_keys = {tech: get_api_key(key_paths[tech]) for tech in technologies}

# Identify if any keys are missing
missing_techs = [tech for tech, key in api_keys.items() if not key]
for tech in missing_techs:
  technologies.remove(tech)
  print(f"Warning: {tech.capitalize()} API key not found. Removing {tech} from available technologies.")

if not technologies:
  if LAUNCH_MODE:
    raise SystemExit("Cannot initiate the instance as no API keys are found. Set them using Secrets in colab notebook or userdata.save('...')")
  else:
    raise SystemExit("Cannot initiate the instance as no API keys are found. Verify if the files are properly structured.")

In [None]:
""" Text processor functions """
def read_from_pdf(pdf_file_path):
    with open(pdf_file_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ''.join(page.extract_text() for page in pdf_reader.pages)
    return text

def read_from_text(text_file_path):
    with open(text_file_path, 'r', encoding='utf_8') as text_file:
        return text_file.read()

def text_to_comma_seperated(text):
    text = text.lower()
    text = re.sub(r"\*", "", text)
    # Remove subheadings (Skills, Experience, Education)
    text = re.sub(r"(skills|experience|education):", "", text)
    text = re.sub(r"[.,:]", ",", text)
    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(r",\s*", ",", text)

    return text

""" Text extractor functions """
def genai_text_extractor(text, tech):
    if tech == "Gemini":
        prompt = f"""Act as a efficient ATS system. Summarize the text into data suitable for input to a RoBERTa model and in following format
        Skills: List skills here (Technical and Soft-skills), \n
        Experience: List number of years of experience here. Mention only 1 most relevant title. \n
        Education: List degrees here. Mention highest degree if candidate has Dual degree. Convert abbreviations to only Bachelors, Masters etc. Discard abbreviations in generation.
        Ensure the result is concise. Text: {text}"""
        genai.configure(api_key=userdata.get(f"{tech.upper()}_API_KEY"))
        # Selecting a gemini model depending on the plan (Free in this instance)
        model = genai.GenerativeModel("gemini-pro")
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                candidate_count = 1,
                max_output_tokens = 512,
                temperature = 0.1
            )
        )
        return response.text

    if tech == "Groq":
        prompt = f"""Act as an efficient ATS system. Directly summarize the provided text into data suitable for input to a RoBERTa model.
            Output only in the exact format specified below, with no introductory phrases, explanations, or additional context.
            Skills: List all relevant skills here (technical and soft skills).
            Experience: Provide the number of years of experience and mention only the most relevant title.
            Education: Provide the highest degree achieved. If the candidate has a dual degree, mention only the highest.
            Do not include abbreviations like B.Tech or M.Tech—use 'Bachelors,' 'Masters,' etc., instead.
            Ensure the response is concise and strictly follows the specified format. Text: {text}"""
        client = Groq(api_key=userdata.get(f"{tech.upper()}_API_KEY"))
        response = client.chat.completions.create(
            messages=[
                {"role": "user",
                "content": prompt}],
            # Selecting a Llama model depending on the plan (Free in this instance)
            model="llama3-70b-8192"
        )
        return response.choices[0].message.content

""" Text similarity functions """
def calculate_resume_similarity(resume_text, job_description_text):
    """Calculates similarity score between resume and job description."""
    model_name = "cross-encoder/stsb-roberta-base"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)

    inputs = tokenizer(resume_text, job_description_text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
        similarity_score = torch.sigmoid(outputs.logits).item()
    return similarity_score

# --- Fit Categorization ---
def categorize_fit(similarity_score):
    """Categorizes fit based on similarity score."""
    fit_percentage = similarity_score * 100
    if fit_percentage >= 75:
        return "Good Fit", fit_percentage
    elif fit_percentage >= 50:
        return "Moderate Fit", fit_percentage
    else:
        return "Not a Good Fit", fit_percentage

# --- Communication Generation ---
def communication_generator(message, matching_skills, fit_category):
    """Generates a communication response based on the input message and fit category."""
    return (f"{message} The matching skills are: {', '.join(matching_skills)}. "
            f"This candidate is considered a {fit_category}.")

In [None]:
"""Analyzes the resume and job description."""
def analyse_document(resume_path, job_description_path, tech):

    if tech is None:
        return "Error: Select a technology", "", "", "", ""

    # Extract resume text based on the file type
    if os.path.splitext(resume_path)[1] == '.pdf':
        resume_text = read_from_pdf(resume_path)
    elif os.path.splitext(resume_path)[1] == '.txt':
        resume_text = read_from_text(resume_path)
    else:
        return "Invalid file type. Please upload a PDF or TXT file for the resume."

    # Extract job description text
    if os.path.splitext(job_description_path)[1] == '.txt':
        job_description_text = read_from_text(job_description_path)
    else:
        return "Invalid file type. Please upload a TXT file for the job description."

    analysed_resume = genai_text_extractor(resume_text, tech)
    analysed_job_description = genai_text_extractor(job_description_text, tech)

    # Identify matching skills
    resume_skill_set = text_to_comma_seperated(analysed_resume).split(',')
    job_description_skill_set = text_to_comma_seperated(analysed_job_description).split(',')
    matching_skills = list(set(resume_skill_set) & set(job_description_skill_set))

    # Calculate similarity score USING PROCESSED RESUME TEXT AND JOB DESCRIPTION TEXT THAN JUST PDF EXTRACTED TEXT
    similarity_score = calculate_resume_similarity(analysed_resume, analysed_job_description)
    fit_category, fit_percentage = categorize_fit(similarity_score)

    # Generate communication response
    communication_response = communication_generator(
        f"The candidate has the following skills: {', '.join(resume_skill_set)}.",
        matching_skills,
        fit_category
    )

    return (
        "Success",
        f"Similarity score: {similarity_score*100:.2f}%",
        communication_response,
        ", ".join(resume_skill_set),
        ", ".join(job_description_skill_set),
    )

In [None]:
"""Frontend manager for the ATS system"""
def __set_technology(tech):
    if tech:
        return gr.update(value=f"Running analysis using technology: {tech.upper()}", visible=True)
    return gr.update(visible=False)

def interface(technologies):
    with gr.Blocks() as demo:
        selected_tech = gr.State(None)  # To store selected technology
        with gr.Tab("Resume Analysis"):
          # Technology selection dropdown
          gr.Markdown("### Technology Selection")
          tech_dropdown = gr.Dropdown(
              label="Choose a model from the available API keys",
              choices=technologies,
              interactive=True,
              value=None,
          )
          selected_tech_display = gr.Markdown(visible=False)
          tech_dropdown.change(fn=__set_technology, inputs=tech_dropdown, outputs=selected_tech_display)

          with gr.Row():
              # Inputs on the left
              with gr.Column(scale=1):  # Adjust scale for layout proportions
                  gr.Markdown("### Upload Files")
                  resume_input = gr.File(label="Upload Resume (.PDF or .TXT)")
                  job_input = gr.File(label="Upload Job Description (.TXT)")
                  tech_dropdown
                  analyse_button = gr.Button("Submit")

              # Outputs on the right
              with gr.Column(scale=2):  # Adjust scale for layout proportions
                  gr.Markdown("### Results")
                  status = gr.Textbox(label="Status", interactive=False)
                  similarity_score = gr.Textbox(label="Similarity Score", interactive=False)
                  tool_response = gr.Textbox(label="Tool Response", interactive=False)
                  resume_skills = gr.Textbox(label="Identified Skills in Resume", interactive=False)
                  job_skills = gr.Textbox(label="Defined Skills in Job Description", interactive=False)

          analyse_button.click(
              fn=analyse_document,
              inputs=[resume_input, job_input, tech_dropdown],
              outputs=[status, similarity_score, tool_response, resume_skills, job_skills],
          )

    return demo

In [None]:
""" Main script to combine everything"""

if LAUNCH_MODE:
  demo = interface(technologies)
  demo.launch(debug=True, share=True)
else:
  resume_path = "documents/resume.pdf"
  job_description_path = "documents/job_description.txt"
  for tech in technologies:
    print(f"\nRunning analysis using technology: {tech.capitalize()}")
    _, similarity_score, communication_response, _, _ = analyse_document(resume_path, job_description_path, tech)
    print(f"Similarity score: {similarity_score}")
    print(communication_response)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7ea26f2f967bde49e3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2108, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1655, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
           ^^^^^^^^^^

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://7ea26f2f967bde49e3.gradio.live
