In [None]:
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load skills from file
with open("skills.txt") as f:
    skill_list = [line.strip().lower() for line in f.readlines()]

# Preprocess skills to allow multi-word matching
skill_set = set(skill_list)

def extract_skills(text):
    doc = nlp(text.lower())
    extracted_skills = set()

    # Extract noun chunks for multi-word skills
    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip().lower()
        if chunk_text in skill_set:
            extracted_skills.add(chunk_text)

    # Also check individual tokens (for single word skills)
    for token in doc:
        if token.text in skill_set:
            extracted_skills.add(token.text)
    
    return extracted_skills

# Example usage (for testing)
resume_text = """Experienced Python developer with Flask, React, MongoDB, and NLP tools like spaCy and machine learning."""
skills_found = extract_skills(resume_text)
print("Extracted Skills:", skills_found)



In [16]:
!pip install PyMuPDF




[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import fitz  # PyMuPDF
import os
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load skills from file
with open("skills.txt") as f:
    skill_list = [line.strip().lower() for line in f.readlines()]
skill_set = set(skill_list)

# Skill extraction function
def extract_skills(text):
    doc = nlp(text.lower())
    extracted_skills = set()

    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip().lower()
        if chunk_text in skill_set:
            extracted_skills.add(chunk_text)

    for token in doc:
        if token.text in skill_set:
            extracted_skills.add(token.text)
    
    return extracted_skills

# PDF resume text extraction
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Main script
resume_path = input("Enter path to resume PDF: ")
if os.path.exists(resume_path):
    resume_text = extract_text_from_pdf(resume_path)
    print("\n--- Resume Text Preview ---\n")
    print(resume_text[:1000])  # Preview first 1000 characters

    # Extract and print skills
    skills_found = extract_skills(resume_text)
    print("\n--- Extracted Skills ---\n")
    print(skills_found)
else:
    print("File not found.")


In [None]:
doc = nlp(resume_text)
skills = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN") and not token.is_stop]
print(set(skills))  # Using set to remove duplicates


In [None]:
for ent in doc.ents:
    print(ent.text, "->", ent.label_)


In [None]:
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

def summarize_text(text, n=3):
    doc = nlp(text)
    word_freq = {}
    for word in doc:
        if word.text.lower() not in STOP_WORDS and word.text.lower() not in punctuation:
            word_freq[word.text.lower()] = word_freq.get(word.text.lower(), 0) + 1

    sentence_strength = {}
    for sent in doc.sents:
        for word in sent:
            if word.text.lower() in word_freq:
                sentence_strength[sent] = sentence_strength.get(sent, 0) + word_freq[word.text.lower()]

    summary = nlargest(n, sentence_strength, key=sentence_strength.get)
    return ' '.join([str(s) for s in summary])

print(summarize_text(resume_text, 3))



In [25]:
job_description = """
Looking for a Python developer with knowledge in Flask, React, MongoDB, and NLP tools like spaCy.
"""


In [None]:
# Function to extract keywords from text
def extract_keywords(text):
    doc = nlp(text)
    return set(
        token.text.lower()
        for token in doc
        if token.pos_ in ("NOUN", "PROPN") and not token.is_stop and token.is_alpha
    )

# Extract keywords from both job description and resume


In [None]:
# Function to extract keywords from text
def extract_keywords(text):
    doc = nlp(text)
    return set(
        token.text.lower()
        for token in doc
        if token.pos_ in ("NOUN", "PROPN") and not token.is_stop and token.is_alpha
    )

# Extract keywords from both job description and resume
jd_skills = extract_skills(job_description)
resume_skills = extract_skills(resume_text)

# Calculate matched skills
matched_skills = jd_skills & resume_skills
skill_score = len(matched_skills) / len(jd_skills) if jd_skills else 0

# Display
print("Job Description Skills:", jd_skills)
print("Resume Skills:", resume_skills)
print("Matched Skills:", matched_skills)
print(f"Skill Match Score: {skill_score:.2f}")

In [None]:
final_score = calculate_resume_score(resume_text, job_description)
print(f"\nFinal Resume Score: {final_score:.2f}")


In [None]:
!pip install scikit-learn


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Extract top N keywords using TF-IDF
def extract_top_n_keywords(text, n=10):
    vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = vectorizer.get_feature_names_out()
    scores = tfidf_matrix.toarray().flatten()
    keywords = sorted(zip(feature_names, scores), key=lambda x: -x[1])[:n]
    return set(k for k, _ in keywords)

# Compute cosine similarity between two texts
def get_similarity_score(text1, text2):
    vectorizer = TfidfVectorizer(stop_words='english')
    vectors = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(vectors[0], vectors[1])[0][0]

# Main function to compute final score
def calculate_resume_score(resume_text, job_text):
    # Skill score based on keyword overlap
    resume_keywords = extract_top_n_keywords(resume_text)
    job_keywords = extract_top_n_keywords(job_text)
    if job_keywords:
        skill_score = len(resume_keywords & job_keywords) / len(job_keywords)
    else:
        skill_score = 0.0

    # Experience score: simple rule-based logic
    experience_score = 1.0 if any(x in resume_text.lower() for x in ["2 years", "3 years", "experience"]) else 0.5

    # Education score: basic keyword check
    education_score = 1.0 if any(x in resume_text.lower() for x in ["b.tech", "bachelor", "graduation"]) else 0.0

    # Weighted final score
    final_score = (0.5 * skill_score) + (0.3 * experience_score) + (0.2 * education_score)
    return final_score

# Usage
score = calculate_resume_score(resume_text, job_description)
print(f"\n Final Resume Score: {score:.2f}")
