In [None]:
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load skills from file
with open("skills.txt") as f:
    skill_list = [line.strip().lower() for line in f.readlines()]

# Preprocess skills to allow multi-word matching
skill_set = set(skill_list)


# ──────────────────────────────────────────────────────────────────────────────
# NO print(...) calls below. Instead, wrap in a function that returns data.
# ──────────────────────────────────────────────────────────────────────────────

def get_skills_from_resume_text(resume_text):
    """
    Given resume_text (string), return a Python list of matched skills.
    This function can be called by a Flask route to produce JSON output.
    """
    skills_found = extract_skills(resume_text)
    return list(skills_found)




In [None]:
import fitz  # PyMuPDF
import os
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load skills from file
with open("skills.txt") as f:
    skill_list = [line.strip().lower() for line in f.readlines()]
skill_set = set(skill_list)

# Skill extraction function
def extract_skills(text):
    doc = nlp(text.lower())
    extracted_skills = set()

    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip().lower()
        if chunk_text in skill_set:
            extracted_skills.add(chunk_text)

    for token in doc:
        if token.text in skill_set:
            extracted_skills.add(token.text)
    
    return extracted_skills

# PDF resume text extraction
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# ─── Modified process_resume for Flask ────────────────────────────────────────
def process_resume(resume_path, job_description):
    """
    Given a path to a PDF resume and a job description string, return a dict:
      - 'skills':   list of matched skills (from skills.txt)
      - 'resume_text_preview': first 1000 characters of the resume text
      - 'error':    only present if the resume file was not found
    """
    if not os.path.exists(resume_path):
        return {"error": "File not found."}

    # 1) Extract full resume text
    resume_text = extract_text_from_pdf(resume_path)

    # 2) Extract skills (using skills.txt match)
    skills_found = extract_skills(resume_text)

    # Return as a JSON-serializable dict
    return {
        "resume_text_preview": resume_text[:1000],
        "skills": list(skills_found)
    }


In [None]:
def extract_noun_skills(resume_text):
    """
    From resume_text, return a deduplicated list of all NOUN/PROPN tokens
    (excluding stop words). No printing—just return a list.
    """
    doc = nlp(resume_text)
    skills = [
        token.text
        for token in doc
        if token.pos_ in ("NOUN", "PROPN") and not token.is_stop
    ]
    # Convert to set (to dedupe) and then back to list
    return list(set(skills))



In [None]:
def extract_entities(resume_text):
    """
    From resume_text, return a list of (entity_text, entity_label) tuples.
    """
    doc = nlp(resume_text)
    return [(ent.text, ent.label_) for ent in doc.ents]


In [None]:
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

def summarize_text(text, n=3):
    """
    Return a summary (n sentences) of the provided text.
    """
    doc = nlp(text)
    word_freq = {}
    for word in doc:
        if word.text.lower() not in STOP_WORDS and word.text.lower() not in punctuation:
            word_freq[word.text.lower()] = word_freq.get(word.text.lower(), 0) + 1

    sentence_strength = {}
    for sent in doc.sents:
        for word in sent:
            if word.text.lower() in word_freq:
                sentence_strength[sent] = sentence_strength.get(sent, 0) + word_freq[word.text.lower()]

    top_sentences = nlargest(n, sentence_strength, key=sentence_strength.get)
    return ' '.join([str(s) for s in top_sentences])

# Example usage inside a Flask route (no print statements):
# summary = summarize_text(resume_text, 3)
# return jsonify({"summary": summary})




In [None]:
def extract_keywords(text):
    """
    Given a string `text`, return a set of all NOUN/PROPN tokens
    (lowercased, alphabetic, and not stop words).
    """
    doc = nlp(text)
    return {
        token.text.lower()
        for token in doc
        if token.pos_ in ("NOUN", "PROPN") and not token.is_stop and token.is_alpha
    }

def get_keywords_from_text(text):
    """
    Flask can call this to get keywords as a JSON‐serializable list.
    """
    return list(extract_keywords(text))



In [None]:
# Function to extract keywords from text (you can keep this if you still need it elsewhere)
def extract_keywords(text):
    doc = nlp(text)
    return {
        token.text.lower()
        for token in doc
        if token.pos_ in ("NOUN", "PROPN") and not token.is_stop and token.is_alpha
    }

# Existing skill‐extraction function (no changes needed here)
def extract_skills(text):
    doc = nlp(text.lower())
    extracted_skills = set()
    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip().lower()
        if chunk_text in skill_set:
            extracted_skills.add(chunk_text)
    for token in doc:
        if token.text in skill_set:
            extracted_skills.add(token.text)
    return extracted_skills

# ─── Wrapped into a single function ─────────────────────────────────────────────

def calculate_skill_match(job_description, resume_text):
    """
    Given job_description and resume_text, return a dict:
      - 'job_skills':    set of skills extracted from job_description
      - 'resume_skills': set of skills extracted from resume_text
      - 'matched_skills': the intersection of the two sets
      - 'skill_score':   a float between 0.0 and 1.0
    """
    jd_skills = extract_skills(job_description)
    resume_skills = extract_skills(resume_text)
    matched_skills = jd_skills & resume_skills

    skill_score = (
        len(matched_skills) / len(jd_skills)
        if jd_skills
        else 0.0
    )

    return {
        "job_skills": list(jd_skills),
        "resume_skills": list(resume_skills),
        "matched_skills": list(matched_skills),
        "skill_score": round(skill_score, 2)
    }


In [None]:
def get_resume_score(resume_text, job_description):
    """
    Return the final resume score (rounded to two decimals) instead of printing it.
    """
    final_score = calculate_resume_score(resume_text, job_description)
    return round(final_score, 2)



In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def extract_top_n_keywords(text, n=10):
    """
    Return a set of the top‐n keywords (by TF-IDF score) from the given text.
    """
    vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = vectorizer.get_feature_names_out()
    scores = tfidf_matrix.toarray().flatten()
    keywords = sorted(zip(feature_names, scores), key=lambda x: -x[1])[:n]
    return set(k for k, _ in keywords)

def get_similarity_score(text1, text2):
    """
    Return the cosine similarity between text1 and text2 based on TF-IDF vectors.
    """
    vectorizer = TfidfVectorizer(stop_words='english')
    vectors = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(vectors[0], vectors[1])[0][0]

def calculate_resume_score(resume_text, job_text):
    """
    Compute a weighted final score for a resume: 
      - 50% from TF-IDF keyword overlap (resume vs. job)
      - 30% from a simple experience heuristic
      - 20% from a simple education heuristic
    """
    # Skill score via TF-IDF top-n keywords
    resume_keywords = extract_top_n_keywords(resume_text)
    job_keywords = extract_top_n_keywords(job_text)
    skill_score = (
        len(resume_keywords & job_keywords) / len(job_keywords)
        if job_keywords
        else 0.0
    )

    # Experience score (simple rule: check for “2 years”, “3 years”, or “experience”)
    experience_score = 1.0 if any(
        x in resume_text.lower() for x in ["2 years", "3 years", "experience"]
    ) else 0.5

    # Education score (check for “b.tech”, “bachelor”, “graduation”)
    education_score = 1.0 if any(
        x in resume_text.lower() for x in ["b.tech", "bachelor", "graduation"]
    ) else 0.0

    final_score = (0.5 * skill_score) + (0.3 * experience_score) + (0.2 * education_score)
    return final_score

def get_resume_score(resume_text, job_description):
    """
    Wrapper for Flask: returns the final score (rounded to two decimals).
    """
    score = calculate_resume_score(resume_text, job_description)
    return round(score, 2)
