In [1]:
!pip install PyMuPDF

Collecting PyMuPDF
  Downloading pymupdf-1.26.4-cp39-abi3-win_amd64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.4-cp39-abi3-win_amd64.whl (18.7 MB)
   ---------------------------------------- 0.0/18.7 MB ? eta -:--:--
   ------------ --------------------------- 6.0/18.7 MB 33.5 MB/s eta 0:00:01
   --------------------------- ------------ 12.8/18.7 MB 32.2 MB/s eta 0:00:01
   ---------------------------------------- 18.7/18.7 MB 32.8 MB/s  0:00:00
Installing collected packages: PyMuPDF
Successfully installed PyMuPDF-1.26.4


In [1]:
import fitz  # Import the PyMuPDF library, aliased as fitz
import re    # Import the regular expression module

def extract_text_from_pdf(pdf_path):
    """
    Extracts all text from a given PDF file.
    
    Args:
        pdf_path (str): The file path to the PDF document.
        
    Returns:
        str: The extracted text, with multi-line breaks condensed.
             Returns an error message if an exception occurs.
    """
    try:
        # Open the specified PDF file
        doc = fitz.open(pdf_path)
        text = ""
        # Iterate through each page of the document
        for page in doc:
            # page.get_text("text") ensures only plain text is extracted
            text += page.get_text("text")
        
        # Replace multiple consecutive newlines with a single one for easier processing
        text = re.sub(r'\n+', '\n', text)
        return text
    except Exception as e:
        # Return an error message if reading the PDF fails
        return f"Error reading PDF file: {e}"

def find_specific_experience(resume_text, keyword):
    """
    Finds the specific sentences in the resume text that contain a keyword.
    
    Args:
        resume_text (str): The full text of the resume.
        keyword (str): The skill or experience to search for.
        
    Returns:
        list: A list of sentences that contain the keyword.
    """
    # Split the text into sentences using a regular expression.
    # This pattern splits the text after a period, question mark, or exclamation mark
    # that is followed by whitespace. The (?<=[.?!]) is a positive lookbehind
    # that keeps the delimiter (.?!) as part of the sentence.
    sentences = re.split(r'(?<=[.?!])\s+', resume_text)
    
    found_sentences = []
    
    # Compile a regular expression to match the keyword as a whole word, case-insensitively.
    # '\b' is a word boundary, preventing partial matches (e.g., finding "Java" in "JavaScript").
    keyword_regex = re.compile(r'\b' + re.escape(keyword) + r'\b', re.IGNORECASE)
    
    # Iterate over each sentence found in the text
    for sentence in sentences:
        # Clean up the sentence by replacing newlines with spaces and removing leading/trailing whitespace.
        cleaned_sentence = sentence.replace('\n', ' ').strip()
        # Check if the cleaned sentence is not empty and contains the keyword
        if cleaned_sentence and keyword_regex.search(cleaned_sentence):
            # If a match is found, add the cleaned sentence to our results list
            found_sentences.append(cleaned_sentence)
            
    return found_sentences

def main():
    """
    The main function to execute the resume analyzer.
    """
    # Prompt the user to enter the name of their resume PDF file
    pdf_file_path = input("Please enter the PDF filename of your resume (e.g., my_resume.pdf): ")
    
    # Extract the text content from the PDF
    resume_text = extract_text_from_pdf(pdf_file_path)
    
    # Check if text extraction resulted in an error
    if "Error" in resume_text:
        print(resume_text)
        return
        
    print("\nResume content has been successfully read!")
    
    # Start a loop to allow the user to perform multiple searches
    while True:
        # Prompt the user to enter a keyword for searching
        search_keyword = input("\nPlease enter the skill or experience you want to search for (type 'quit' to exit): ")
        
        # Allow the user to exit the loop
        if search_keyword.lower() == 'quit':
            break
            
        # Call the function to find all sentences containing the keyword
        experiences = find_specific_experience(resume_text, search_keyword)
        
        # Check if any matching sentences were found
        if experiences:
            print(f"\n--- Found {len(experiences)} relevant experiences for '{search_keyword}' ---")
            # Iterate through the found sentences and print each one
            for i, exp in enumerate(experiences, 1):
                print(f"\nExperience {i}: {exp}")
            print("\n-----------------------------------------")
        else:
            # Inform the user if the keyword was not found
            print(f"\nNo experiences related to '{search_keyword}' were found in your resume.")


In [2]:
main()


Resume content has been successfully read!

--- Found 1 relevant experiences for 'SQL' ---

Experience 1: in Information and Computing Science, Ningbo University of Technology Key Courses: Data Structures and Algorithms, Database in SQL, Object-Oriented Programming, Data Visualization with Python, Mathematical Modeling, Big Data in Spark Projects Apr 2025 National Survey Design of Undergraduate Students ○Designed and executed a dual-frame survey strategy (USPS ABS + online panel) to be representative to U.S.

-----------------------------------------

--- Found 4 relevant experiences for 'Python' ---

Experience 1: Expertise in R (tidyverse, data.table), Python (pandas, scikit-learn), Machine Learning, Statistical Modeling, Causal Inference, and Data Visualization.

Experience 2: Experience May 2025 - Present Research Assistant, Department of Epidemiology and Biostatistics at UMD ○Web scraping All of US datasets by searching, extracting, and filtering biomarkers in Python.

Experience

In [4]:
!pip install spacy

Collecting spacy
  Downloading spacy-3.8.7-cp310-cp310-win_amd64.whl.metadata (28 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.13-cp310-cp310-win_amd64.whl.metadata (2.2 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.11-cp310-cp310-win_amd64.whl.metadata (8.8 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.10-cp310-cp310-win_amd64.whl.metadata (2.5 kB)
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Downloading thinc-8.3.6-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Downloading wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)
Collecting srsly<3.0.0,>=2.4.3 (from spacy)
  Downloading srsly-2.5.1-cp310-cp310-win_amd6