In [26]:
import json

def extract_resume_details(resume):
    """Extract annotations from a single resume."""
    # Check if 'annotations' exists and is a list, default to an empty list otherwise
    annotations = resume.get("annotation", None)
    if annotations is None:
        annotations = []
    
    extracted_data = {}

    for annotation in annotations:
        # Check if 'label' exists and is non-empty
        if annotation.get('label'):
            label = annotation['label'][0]  # Assuming one label per annotation
            for point in annotation.get('points', []):  # Safely get 'points', default to empty list
                # Check if 'text' exists for each point
                if 'text' in point:
                    extracted_data.setdefault(label, []).append(point['text'])
        else:
            print(f"Warning: Missing label in annotation: {annotation}")

    return extracted_data

def process_resumes(file_path):
    """Process all resumes in the file."""
    with open(file_path, "r", encoding="utf-8") as file:
        resumes = json.load(file)
    
    all_resumes_data = []

    for idx, resume in enumerate(resumes, start=1):
        print(f"Processing Resume {idx}...")
        extracted_data = extract_resume_details(resume)
        all_resumes_data.append(extracted_data)

    return all_resumes_data

# Specify the path to your file
file_path = "data.json"

# Process the resumes
parsed_resumes = process_resumes(file_path)

# Display the parsed data for each resume
for idx, resume_data in enumerate(parsed_resumes, start=1):
    print(f"\nResume {idx}:")
    for field, values in resume_data.items():
        print(f"{field}: {', '.join(values)}")


Processing Resume 1...
Processing Resume 2...
Processing Resume 3...
Processing Resume 4...
Processing Resume 5...
Processing Resume 6...
Processing Resume 7...
Processing Resume 8...
Processing Resume 9...
Processing Resume 10...
Processing Resume 11...
Processing Resume 12...
Processing Resume 13...
Processing Resume 14...
Processing Resume 15...
Processing Resume 16...
Processing Resume 17...
Processing Resume 18...
Processing Resume 19...
Processing Resume 20...
Processing Resume 21...
Processing Resume 22...
Processing Resume 23...
Processing Resume 24...
Processing Resume 25...
Processing Resume 26...
Processing Resume 27...
Processing Resume 28...
Processing Resume 29...
Processing Resume 30...
Processing Resume 31...
Processing Resume 32...
Processing Resume 33...
Processing Resume 34...
Processing Resume 35...
Processing Resume 36...
Processing Resume 37...
Processing Resume 38...
Processing Resume 39...
Processing Resume 40...
Processing Resume 41...
Processing Resume 42...
P

In [28]:
output_file = "parsed_resumes.json"

with open(output_file, "w", encoding="utf-8") as file:
    json.dump(parsed_resumes, file, indent=4)

print(f"Parsed data saved to {output_file}")


Parsed data saved to parsed_resumes.json


start

In [35]:
import spacy
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import json

# Load the NLP model for Named Entity Recognition (NER)
nlp = spacy.load("en_core_web_sm")

# Load the Sentence Transformer model for semantic matching
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Load the summarization model from Hugging Face
summarizer = pipeline("summarization", model="t5-small")

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:02<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


1. Load Resume Data from JSON File

In [36]:
input_file = "parsed_resumes.json"

# Function to read the resume data from the JSON file
def load_resume_data(file_path):
    with open(file_path, "r") as file:
        return json.load(file)

# Load the resume data from the input JSON file
resume_data = load_resume_data(input_file)

2. Data Extraction and Preprocessing

In [43]:
# Function to extract and preprocess resume text from multiple resumes
def preprocess_resume(resume):
   # Get the resume details safely
    name = resume.get('Name', [''])[0]
    companies_worked_at = ', '.join(resume.get('Companies worked at', []))
    skills = ', '.join(resume.get('Skills', []))
    college_name = ', '.join(resume.get('College Name', []))
    degree = ', '.join(resume.get('Degree', []))  # Use a default empty string if key doesn't exist

    # Prepare the resume text
    resume_text = f"{name} has worked at {companies_worked_at}. " \
                  f"She has skills in {skills}. " \
                  f"Graduated from {college_name} with a degree in {degree}."
    return resume_text

3. Resume Summarization (Using LLM)

In [41]:
# Function to summarize the resume
def summarize_resume(resume_text):
    summary = summarizer(resume_text, max_length=150, min_length=50, do_sample=False)
    return summary[0]['summary_text']

4. Job Fit Score Calculation (Semantic Matching) 

In [38]:
# Example job description for matching (optional)
job_description = """
We are looking for a Network Engineer with experience in Database technologies, Linux, C++, and Java.
The candidate should have a strong understanding of networking protocols and be able to work in a fast-paced environment.
"""

# Function to calculate the job fit score
def calculate_job_fit(resume_summary, job_description):
    # Encode both the resume summary and job description
    resume_embedding = sentence_model.encode(resume_summary, convert_to_tensor=True)
    job_description_embedding = sentence_model.encode(job_description, convert_to_tensor=True)
    
    # Compute cosine similarity
    cosine_similarity = util.pytorch_cos_sim(resume_embedding, job_description_embedding)
    return cosine_similarity.item()

5. Feedback Generation

In [45]:
# Function to generate feedback based on the resume
def generate_feedback(resume, job_description):
    feedback = []

     # Safely get the 'Skills' key
    skills = resume.get("Skills", [])
    
    # Example feedback logic
    if "Java" not in skills:
        feedback.append("Consider adding Java-related skills to the resume.")
    if "Database" not in skills:
        feedback.append("Consider adding Database-related skills to the resume.")
    
    # Add more feedback logic here...

    
    return feedback

6. Processing Multiple Resumes

In [46]:
# Process all resumes
output_data = []

for resume in resume_data:
    resume_text = preprocess_resume(resume)
    resume_summary = summarize_resume(resume_text)
    job_fit_score = calculate_job_fit(resume_summary, job_description)
    feedback = generate_feedback(resume, job_description)
    
    output_data.append({
        "Name": resume['Name'][0],
        "Resume Summary": resume_summary,
        "Job Fit Score": job_fit_score,
        "Feedback": feedback
    })

Your max_length is set to 150, but your input_length is only 127. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=63)
Your max_length is set to 150, but your input_length is only 128. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=64)
Your max_length is set to 150, but your input_length is only 106. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)
Your max_length is set to 150, but your input_length is only 99. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)
Y

KeyError: 'Name'

7. Output 

In [47]:
# Print the final output for all resumes
print(json.dumps(output_data, indent=4))

[
    {
        "Name": "Afreen Jamadar",
        "Resume Summary": "MICROSOFT ACCESS (Less than 1 year), Linux . Java, Java, .net, php. ADDITIONAL INFORMATION TECHNICAL SKILLS . graduated from Shivaji University Kolhapur with degree in Bachelor of Engg in Information Technology .",
        "Job Fit Score": 0.41518211364746094,
        "Feedback": [
            "Consider adding Java-related skills to the resume.",
            "Consider adding Database-related skills to the resume."
        ]
    },
    {
        "Name": "Alok Khandai",
        "Resume Summary": "alok Khandai has worked at Microsoft Corporation, HCL Technologies, Microsoft Corporation and UNISYS . she has skills in  operating environment: [...] Windows95/98/XP/NT  Database Tool: SQL Management Studio (MSSQL), Business Development Studio, Visual studio 2005 .",
        "Job Fit Score": 0.41903814673423767,
        "Feedback": [
            "Consider adding Java-related skills to the resume.",
            "Consider adding