In [None]:
#ATS for Path.ai
#drop the files into the content folder so the path is: "/content/{file_name}"

In [None]:
!pip install python-docx
import docx

#converts docx to a string
def docx_to_string(docx_path):
    # Load the Word document using python-docx
    doc = docx.Document(docx_path)

    # Extract text from each paragraph
    text = ''.join([para.text for para in doc.paragraphs])

    return text

In [None]:
!pip install pypdf2
from PyPDF2 import PdfReader

#converts pdf to a string (NOT RECOMMENDED)
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()  # Updated method name
    return text

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string

#converts resume_string to all lowercase & eliminates punctuation
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text

#ranks resume_string against job_description
def rank_resume(resume, job_description):
    # Preprocess the texts
    resume = preprocess_text(resume)
    job_description = preprocess_text(job_description)

    # Create a vectorizer
    vectorizer = TfidfVectorizer(stop_words='english')

    # Vectorize the texts
    tfidf_matrix = vectorizer.fit_transform([resume, job_description])

    # Compute cosine similarity
    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

    # Convert to a scale of 0 to 100
    return similarity[0][0] * 100

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

#suggests improvements for a users resume based on keywords found in job_description
def suggest_improvements(resume, job_description):
    # Vectorize the texts to get word scores using TF-IDF
    vectorizer = TfidfVectorizer(stop_words='english', max_df=0.85)  # max_df can be adjusted to filter out extremely common terms
    matrix = vectorizer.fit_transform([resume, job_description])
    feature_names = vectorizer.get_feature_names_out()
    scores = matrix.toarray()

    # Extract important keywords from the job description that are not in the resume
    threshold = 0.1  # This can be adjusted based on your needs
    missing_keywords = [feature_names[i] for i, score in enumerate(scores[1]) if score > threshold and scores[0][i] == 0]

    suggestions = []

    if missing_keywords:
        suggestions.append(f"Consider adding these keywords from the job description to your resume: {', '.join(missing_keywords)}.")

    return suggestions

In [None]:
#got this from chat using the prompt: "give me a job description for a cto"
job_description = """
Job Title: Chief Technology Officer (CTO)

Company: [Company Name]

Location: [Company Location or "Remote"]

Description:
[Company Name], an industry-leading [specific industry, e.g., 'fintech', 'healthtech', 'e-commerce'] company, is seeking an experienced Chief Technology Officer (CTO) to lead our technology department. The ideal candidate will be responsible for overseeing all technical aspects of the company, guiding the company's strategic direction, development, and future growth.

Key Responsibilities:

1. Lead the technology department in day-to-day operations, ensuring optimal performance and output.
2. Develop and implement new technologies that yield competitive advantage.
3. Collaborate with departments to align the company's technology resources with its short-term and long-term goals.
4. Work with stakeholders to define business and system requirements.
5. Monitor system infrastructure to ensure functionality and efficiency.
6. Build quality assurance and data protection processes.
7. Create and implement technology strategies.
8. Track, analyze, and monitor technology performance metrics.
9. Oversee IT budgets to ensure cost-effectiveness.
10. Identify and leverage opportunities for technological partnerships and outsourcing.

Qualifications:

1. Bachelor's degree in Computer Science, Engineering, or a related field; Master's degree preferred.
2. Proven experience in a CTO or similar leadership role.
3. Knowledge of technological trends to build a strategy.
4. Experience in software development and platform implementation.
5. Strong leadership qualities and organizational skills.
6. Exceptional project management skills.
7. Ability to conduct technology analysis and research.
8. Strong problem-solving capabilities and the ability to think strategically.
9. Excellent verbal and written communication skills.

To Apply:
Interested candidates are invited to send their resume, along with a cover letter detailing their relevant experience and explaining why they are the best fit for this role, to [HR email address]. We thank all applicants for their interest, but only those selected for an interview will be contacted.

"""

Running a PDF resume of a software engineer that WOULD NOT be qualified for a CTO position. The score is low. Scores range from 0-100 in this ranking system.

In [None]:
resume_path = "/content/sample_software_engineer_resume.pdf"
resume_text = extract_text_from_pdf(resume_path)
#print(resume_text)

In [None]:
similarity_score = rank_resume(resume_text, job_description)
print(f"Similarity Score: {similarity_score:.2f}")

In [None]:
tips = suggest_improvements(resume_text, job_description)
for tip in tips:
    print(tip)

Running a PDF resume of a CTO that SHOULD be qualified for a CTO position. The ranking is higher.

In [None]:
resume_path = "/content/sample_cto_resume.pdf"
resume_text = extract_text_from_pdf(resume_path)
#print(resume_text)

In [None]:
similarity_score = rank_resume(resume_text, job_description)
print(f"Similarity Score: {similarity_score:.2f}")

In [None]:
tips = suggest_improvements(resume_text, job_description)
for tip in tips:
    print(tip)

Running a PDF resume of a CTO that SHOULD be qualified for a CTO position w/ the suggested keywords. The ranking is even higher.

In [None]:
resume_path = "/content/sample_cto_resume_w_keywords.pdf"
resume_text = extract_text_from_pdf(resume_path)
#print(resume_text)

In [None]:
similarity_score = rank_resume(resume_text, job_description)
print(f"Similarity Score: {similarity_score:.2f}")

In [None]:
tips = suggest_improvements(resume_text, job_description)
for tip in tips:
    print(tip)

Running a DOCX resume of a CTO that SHOULD be qualified for a CTO position (ORIGINAL -> convereted the PDF to a DOCX). The ranking is similar to sample_cto_resume PDF.

In [None]:
resume_path = "/content/sample_cto_resume_no_keywords.docx"
resume_text = docx_to_string(resume_path)
#print(resume_text)

In [None]:
similarity_score = rank_resume(resume_text, job_description)
print(f"Similarity Score: {similarity_score:.2f}")

In [None]:
tips = suggest_improvements(resume_text, job_description)
for tip in tips:
    print(tip)

Questions of interest:
1. Would this be cheaper than the current implementation? **We could use the job description from the scraped LinkedIn data. This should be cheaper.**
2. Is this implementation as accurante/more accurate than the current implementation? **No idea. We need to test.**
3. Once we get a working model, what should the threshold (the score) of a GOOD resume be?

Word documents are the preferred resume file type for applicant tracking systems (ATSs) because they are easier to parse than a PDF.

There are other tips/tricks that we could implement to help the job applicant better pass ATSs (https://www.linkedin.com/pulse/6-resume-hacks-pass-ats-test-amelia-walker).