In [1]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

In [6]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer, AutoModelForCausalLM
import torch
import time
import evaluate
import pandas as pd
import numpy as np
import time

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [173]:
from huggingface_hub import login
login(token = 'Your token')

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
model = model.to(device)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [147]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"
print_number_of_trainable_model_parameters(model)

'trainable model parameters: 83886080\nall model parameters: 8114147328\npercentage of trainable model parameters: 1.03%'

In [148]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    lora_dropout=0.05,
    bias="none"
    #task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)


In [149]:
peft_model = get_peft_model(model,
                            lora_config)
peft_model = peft_model.to(device) 
# Print number of trainable model parameters
# Your Code Here
# Function to print the number of trainable parameters
def print_trainable_parameters(model):
    trainable_params = 0
    all_params = 0
    for param in model.parameters():
        all_params += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"Trainable parameters: {trainable_params} ({100 * trainable_params / all_params:.2f}%) of total parameters")

# Print the number of trainable parameters
print_trainable_parameters(peft_model)

Trainable parameters: 83886080 (1.03%) of total parameters


### Demo data change later, neeed to load the real data into a similar format later

In [132]:
import json
import pandas as pd

# Replace with your actual file paths
CSV_FILE = "DATA_chunk.csv"        # CSV containing columns: ID, extracted_job_details, summarized_sections
IMPROVED_JSON_FILE = "combined_results_v2.json"  # JSON with {ID: improved_resume_dict}
OUTPUT_JSONL = "training_data_v2.jsonl"

In [36]:
# Master prompt: replace with your actual MASTER prompt
MASTER_PROMPT = """I am a highly experienced career advisor and resume writing expert with 15 years of specialized experience.

Primary role: Craft exceptional resumes tailored to specific job descriptions, optimized for both ATS systems and human readers.

# Instructions for creating optimized resumes and cover letters
1. Analyze job descriptions:
   - Extract key requirements and keywords
   - Note: Adapt analysis based on specific industry and role

2. Create compelling resumes:
   - Highlight quantifiable achievements (e.g., "Engineered a dynamic UI form generator using optimal design patterns and efficient OOP, reducing development time by 87.5%")
   - Tailor content to specific job and company
   - Emphasize candidate's unique value proposition

3. Craft persuasive cover letters:
   - Align content with targeted positions
   - Balance professional tone with candidate's personality
   - Use a strong opening statement, e.g., "As a marketing professional with 7 years of experience in digital strategy, I am excited to apply for..."
   - Identify and emphasize soft skills valued in the target role/industry. Provide specific examples demonstrating these skills

4. Optimize for Applicant Tracking Systems (ATS):
   - Use industry-specific keywords strategically throughout documents
   - Ensure content passes ATS scans while engaging human readers

5. Provide industry-specific guidance:
   - Incorporate current hiring trends
   - Prioritize relevant information (apply "6-second rule" for quick scanning)
   - Use clear, consistent formatting

6. Apply best practices:
   - Quantify achievements where possible
   - Use specific, impactful statements instead of generic ones
   - Update content based on latest industry standards
   - Use active voice and strong action verbs

Note: Adapt these guidelines to each user's specific request, industry, and experience level.

Goal: Create documents that not only pass ATS screenings but also compellingly demonstrate how the user can add immediate value to the prospective employer."""

# Section-specific prompts (Use your actual prompts)
ACHIEVEMENTS_PROMPT = """ACHIEVEMENTS = """ + """You are going to write a JSON resume section of "Achievements" for an applicant applying for job posts.

Step to follow:
1. Analyze my achievements details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
1. Focus: Craft relevant achievements aligned with the job description.
2. Honesty: Prioritize truthfulness and objective language.
3. Specificity: Prioritize relevance to the specific job over general achievements.
4. Style:
  4.1. Voice: Use active voice whenever possible.
  4.2. Proofreading: Ensure impeccable spelling and grammar.

<achievements>
{section_data}
</achievements>

<job_description>
{job_description}
</job_description>

<example>
  "achievements": [
    "Won E-yantra Robotics Competition 2018 - IITB.",
    "1st prize in “Prompt Engineering Hackathon 2023 for Humanities”",
    "Received the 'Extra Miller - 2021' award at Winjit Technologies for outstanding performance.",
    [and So on ...]
  ]
</example>

{format_instructions}
"""
CERTIFICATIONS_PROMPT = """CERTIFICATIONS = """ + """You are going to write a JSON resume section of "Certifications" for an applicant applying for job posts.

Step to follow:
1. Analyze my certification details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
1. Focus: Include relevant certifications aligned with the job description.
2. Proofreading: Ensure impeccable spelling and grammar.

<CERTIFICATIONS>
{section_data}
</CERTIFICATIONS>

<job_description>
{job_description}
</job_description>

<example>
  "certifications": [
    {{
      "name": "Deep Learning Specialization",
      "by": "DeepLearning.AI, Coursera Inc.",
      "link": "https://www.coursera.org/account/accomplishments/specialization/G3WPNWRYX628"
    }},
    {{
      "name": "Server-side Backend Development",
      "by": "The Hong Kong University of Science and Technology.",
      "link": "https://www.coursera.org/account/accomplishments/verify/TYMQX23D4HRQ"
    }}
    ...
  ],
</example>

{format_instructions}
"""
EDUCATION_PROMPT = """EDUCATIONS = """ + """You are going to write a JSON resume section of "Education" for an applicant applying for job posts.

Step to follow:
1. Analyze my education details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
- Maintain truthfulness and objectivity in listing experience.
- Prioritize specificity - with respect to job - over generality.
- Proofread and Correct spelling and grammar errors.
- Aim for clear expression over impressiveness.
- Prefer active voice over passive voice.

<Education>
{section_data}
</Education>

<job_description>
{job_description}
</job_description>

<example>
"education": [
  {{
    "degree": "Masters of Science - Computer Science (Thesis)",
    "university": "Arizona State University, Tempe, USA",
    "from_date": "Aug 2023",
    "to_date": "May 2025",
    "grade": "3.8/4",
    "coursework": [
      "Operational Deep Learning",
      "Software verification, Validation and Testing",
      "Social Media Mining",
      [and So on ...]
    ]
  }}
  [and So on ...]
],
</example>

{format_instructions}
"""
PROJECTS_PROMPT = """PROJECTS = """ + """You are going to write a JSON resume section of "Project Experience" for an applicant applying for job posts.

Step to follow:
1. Analyze my project details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
1. Focus: Craft three highly relevant project experiences aligned with the job description.
2. Content:
  2.1. Bullet points: 3 per experience, closely mirroring job requirements.
  2.2. Impact: Quantify each bullet point for measurable results.
  2.3. Storytelling: Utilize STAR methodology (Situation, Task, Action, Result) implicitly within each bullet point.
  2.4. Action Verbs: Showcase soft skills with strong, active verbs.
  2.5. Honesty: Prioritize truthfulness and objective language.
  2.6. Structure: Each bullet point follows "Did X by doing Y, achieved Z" format.
  2.7. Specificity: Prioritize relevance to the specific job over general achievements.
3. Style:
  3.1. Clarity: Clear expression trumps impressiveness.
  3.2. Voice: Use active voice whenever possible.
  3.3. Proofreading: Ensure impeccable spelling and grammar.

<PROJECTS>
{section_data}
</PROJECTS>

<job_description>
{job_description}
</job_description>

<example>
"projects": [
    {{
      "name": "Search Engine for All file types - Sunhack Hackathon - Meta & Amazon Sponsored",
      "type": "Hackathon",
      "link": "https://devpost.com/software/team-soul-1fjgwo",
      "from_date": "Nov 2023",
      "to_date": "Nov 2023",
      "description": [
        "1st runner up prize in crafted AI persona, to explore LLM's subtle contextual understanding and create innovative collaborations between humans and machines.",
        "Devised a TabNet Classifier Model having 98.7% accuracy in detecting forest fire through IoT sensor data, deployed on AWS and edge devices 'Silvanet Wildfire Sensors' using technologies TinyML, Docker, Redis, and celery.",
        [and So on ...]
      ]
    }}
    [and So on ...]
  ]
  </example>
  
  {format_instructions}
  """
SKILLS_PROMPT = """SKILLS = """ + """You are going to write a JSON resume section of "Skills" for an applicant applying for job posts.

Step to follow:
1. Analyze my Skills details to match job requirements.
2. Create a JSON resume section that highlights strongest matches.
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
- Specificity: Prioritize relevance to the specific job over general achievements.
- Proofreading: Ensure impeccable spelling and grammar.

<SKILL_SECTION>
{section_data}
</SKILL_SECTION>

<job_description>
{job_description}
</job_description>

<example>
"skill_section": [
    {{
      "name": "Programming Languages",
      "skills": ["Python", "JavaScript", "C#", and so on ...]
    }},
    {{
      "name": "Cloud and DevOps",
      "skills": [ "Azure", "AWS", and so on ... ]
    }},
    and so on ...
  ]
</example>
  
  {format_instructions}
  """
EXPERIENCE_PROMPT = """EXPERIENCE = """ + """You are going to write a JSON resume section of "Work Experience" for an applicant applying for job posts.

Step to follow:
1. Analyze my Work details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
1. Focus: Craft three highly relevant work experiences aligned with the job description.
2. Content:
  2.1. Bullet points: 3 per experience, closely mirroring job requirements.
  2.2. Impact: Quantify each bullet point for measurable results.
  2.3. Storytelling: Utilize STAR methodology (Situation, Task, Action, Result) implicitly within each bullet point.
  2.4. Action Verbs: Showcase soft skills with strong, active verbs.
  2.5. Honesty: Prioritize truthfulness and objective language.
  2.6. Structure: Each bullet point follows "Did X by doing Y, achieved Z" format.
  2.7. Specificity: Prioritize relevance to the specific job over general achievements.
3. Style:
  3.1. Clarity: Clear expression trumps impressiveness.
  3.2. Voice: Use active voice whenever possible.
  3.3. Proofreading: Ensure impeccable spelling and grammar.

<work_experience>
{section_data}
</work_experience>

<job_description>
{job_description}
</job_description>

<example>
"work_experience": [
    {{
      "role": "Software Engineer",
      "company": "Winjit Technologies",
      "location": "Pune, India"
      "from_date": "Jan 2020",
      "to_date": "Jun 2022",
      "description": [
        "Engineered 10+ RESTful APIs Architecture and Distributed services; Designed 30+ low-latency responsive UI/UX application features with high-quality web architecture; Managed and optimized large-scale Databases. (Systems Design)",  
        "Initiated and Designed a standardized solution for dynamic forms generation, with customizable CSS capabilities feature, which reduces development time by 8x; Led and collaborated with a 12 member cross-functional team. (Idea Generation)"  
        and so on ...
      ]
    }},
    {{
      "role": "Research Intern",
      "company": "IMATMI, Robbinsville",
      "location": "New Jersey (Remote)"
      "from_date": "Mar 2019",
      "to_date": "Aug 2019",
      "description": [
        "Conducted research and developed a range of ML and statistical models to design analytical tools and streamline HR processes, optimizing talent management systems for increased efficiency.",
        "Created 'goals and action plan generation' tool for employees, considering their weaknesses to facilitate professional growth.",
        and so on ...
      ]
    }}
  ],
</example>

{format_instructions}
"""
PERSONAL_INFO_PROMPT = """PERSONAL_INFO = """ + """"Include a 'personal_info' key in the single output JSON object. This key must contain the candidate's personal information exactly as provided. "
        "Do not alter any personal details. No extra commentary or explanation.\n\n"
        "<personal_info>{section_data}</personal_info>\n"
        "<job_description>{job_description}</job_description>"
"""

# Updated SECTION_CONFIG to include personal info
SECTION_CONFIG = {
    "personal_info": {
        "original_key": "personal_information",
        "improved_key": "personal_info",
        "prompt_template": PERSONAL_INFO_PROMPT
    },
    "achievements": {
        "original_key": "achievements",
        "improved_key": "achievements",
        "prompt_template": ACHIEVEMENTS_PROMPT
    },
    "certifications": {
        "original_key": "certificate",  # original called it 'certificate'
        "improved_key": "certifications",
        "prompt_template": CERTIFICATIONS_PROMPT
    },
    "education": {
        "original_key": "education",
        "improved_key": "education",
        "prompt_template": EDUCATION_PROMPT
    },
    "projects": {
        "original_key": "projects",
        "improved_key": "projects",
        "prompt_template": PROJECTS_PROMPT
    },
    "skill_section": {
        "original_key": "skills",
        "improved_key": "skill_section",
        "prompt_template": SKILLS_PROMPT
    },
    "work_experience": {
        "original_key": "experience",
        "improved_key": "work_experience",
        "prompt_template": EXPERIENCE_PROMPT
    }
}

### Prompt Version 2

In [177]:
MASTER_PROMPT = """"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture the employer's attention."
Apply best practices: Quantify achievements where possible. Use specific, impactful statements instead of generic ones, Use active voice and strong action verbs

Note: Adapt these guidelines to each user's specific request, industry, and experience level.

Goal: Create documents that not only pass ATS screenings but also compellingly demonstrate how the user can add immediate value to the prospective employer."""

ACHIEVEMENTS_PROMPT = """ACHIEVEMENTS = """ + """You are going to write a JSON resume section of "Achievements" for an applicant applying for job posts.

Step to follow:
1. Analyze my achievements details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions: Modify the achievements section to make it more aligned with the job descriptions and more professional.
1. Focus: Craft relevant achievements aligned with the job description.
2. Honesty: Prioritize truthfulness and objective language.
3. Specificity: Prioritize relevance to the specific job over general achievements.
4. Style:
  4.1. Voice: Use active voice whenever possible.
  4.2. Proofreading: Ensure impeccable spelling and grammar.
  

<achievements>
{section_data}
</achievements>

<job_description>
{job_description}
</job_description>
 
"""
CERTIFICATIONS_PROMPT = """CERTIFICATIONS = """ + """You are going to write a JSON resume section of "Certifications" for an applicant applying for job posts.

Step to follow:
1. Analyze my certification details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions: Modify the CERTIFICATIONS section to make it more aligned with the job descriptions and more professional.
1. Focus: Include relevant certifications aligned with the job description.
2. Proofreading: Ensure impeccable spelling and grammar.

<CERTIFICATIONS>
{section_data}
</CERTIFICATIONS>

<job_description>
{job_description}
</job_description>

"""
EDUCATION_PROMPT = """EDUCATIONS = """ + """You are going to write a JSON resume section of "Education" for an applicant applying for job posts.

Step to follow:
1. Analyze my education details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions: Modify the Education section to make it more aligned with the job descriptions and more professional.
- Maintain truthfulness and objectivity in listing experience.
- Prioritize specificity - with respect to job - over generality.
- Proofread and Correct spelling and grammar errors.
- Aim for clear expression over impressiveness.
- Prefer active voice over passive voice.

<Education>
{section_data}
</Education>

<job_description>
{job_description}
</job_description>

"""
PROJECTS_PROMPT = """PROJECTS = """ + """You are going to write a JSON resume section of "Project Experience" for an applicant applying for job posts.

Step to follow:
1. Analyze my project details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions: Modify the PROJECTS section to make it more aligned with the job descriptions and more professional.
1. Focus: Craft three highly relevant project experiences aligned with the job description.
2. Content:
  2.1. Bullet points: 3 per experience, closely mirroring job requirements.
  2.2. Impact: Quantify each bullet point for measurable results.
  2.3. Storytelling: Utilize STAR methodology (Situation, Task, Action, Result) implicitly within each bullet point.
  2.4. Action Verbs: Showcase soft skills with strong, active verbs.
  2.5. Honesty: Prioritize truthfulness and objective language.
  2.6. Structure: Each bullet point follows "Did X by doing Y, achieved Z" format.
  2.7. Specificity: Prioritize relevance to the specific job over general achievements.
3. Style:
  3.1. Clarity: Clear expression trumps impressiveness.
  3.2. Voice: Use active voice whenever possible.
  3.3. Proofreading: Ensure impeccable spelling and grammar.
4. Modify the PROJECTS sectio to make it more aligned with the job descriptions and more professional.

<PROJECTS>
{section_data}
</PROJECTS>

<job_description>
{job_description}
</job_description>

  """
SKILLS_PROMPT = """SKILLS = """ + """You are going to write a JSON resume section of "Skills" for an applicant applying for job posts.

Step to follow:
1. Analyze my Skills details to match job requirements.
2. Create a JSON resume section that highlights strongest matches.
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:Modify the SKILL_SECTION sectio to make it more aligned with the job descriptions and more professional. 
- Specificity: Prioritize relevance to the specific job over general achievements.
- Proofreading: Ensure impeccable spelling and grammar.

<SKILL_SECTION>
{section_data}
</SKILL_SECTION>

<job_description>
{job_description}
</job_description>

  """
EXPERIENCE_PROMPT = """EXPERIENCE = """ + """You are going to write a JSON resume section of "Work Experience" for an applicant applying for job posts .

Step to follow:
1. Analyze my Work details to match job requirements.
2. Create a JSON resume section that highlights strongest matches
3. Optimize JSON section for clarity and relevance to the job description.

Instructions:
1. Focus: Craft three highly relevant work experiences aligned with the job description.
2. Content:
  2.1. Bullet points: 3 per experience, closely mirroring job requirements.
  2.2. Impact: Quantify each bullet point for measurable results.
  2.3. Storytelling: Utilize STAR methodology (Situation, Task, Action, Result) implicitly within each bullet point.
  2.4. Action Verbs: Showcase soft skills with strong, active verbs.
  2.5. Honesty: Prioritize truthfulness and objective language.
  2.6. Structure: Each bullet point follows "Did X by doing Y, achieved Z" format.
  2.7. Specificity: Prioritize relevance to the specific job over general achievements.
3. Style:
  3.1. Clarity: Clear expression trumps impressiveness.
  3.2. Voice: Use active voice whenever possible.
  3.3. Proofreading: Ensure impeccable spelling and grammar.
4.Modify the work experience sectio to make it more aligned with the job descriptions and more professional.

<work_experience>
{section_data}
</work_experience>

<job_description>
{job_description}
</job_description>
"""
PERSONAL_INFO_PROMPT = """PERSONAL_INFO = """ + """"Include a 'personal_info' key in the single output JSON object. This key must contain the candidate's personal information exactly as provided. "
        "Do not alter any personal details. No extra commentary or explanation.\n\n"
        "<personal_info>{section_data}</personal_info>\n"
"""

In [140]:
df = pd.read_csv(CSV_FILE)

with open(IMPROVED_JSON_FILE, "r", encoding="utf-8") as f:
    improved_data = json.load(f)

with open(OUTPUT_JSONL, "w", encoding="utf-8") as outfile:
    for _, row in df.iterrows():
        resume_id = str(row["ID"])
        job_details = row["extracted_job_details"]
        original_sections = row["summarized_sections"]

        # Parse fields that are JSON strings
        if isinstance(original_sections, str):
            original_sections = json.loads(original_sections)

        # Retrieve improved resume by ID
        if resume_id not in improved_data:
            continue

        improved_resume_str = improved_data[resume_id]
        # improved_resume_str might be a JSON string, parse it
        if isinstance(improved_resume_str, str):
            improved_resume = json.loads(improved_resume_str)
        else:
            improved_resume = improved_resume_str

        # Convert job details to a plain string (no JSON)
        job_description = job_details if isinstance(job_details, str) else json.dumps(job_details)

        # For each section, build prompt and completion
        for section_name, config in SECTION_CONFIG.items():
            original_section_data = original_sections.get(config["original_key"], None)
            
            improved_section_data = improved_resume.get(config["improved_key"], None)

            # Fallback: Handle missing personal information explicitly
            if section_name == "personal_info" and not original_section_data:
                print(f"Warning: Missing personal_information for ID {resume_id}")
                original_section_data = original_sections.get("personal_information", "")
            
            # If either original or improved is missing, skip
            if original_section_data is None or improved_section_data is None:
                continue

            # Convert original_section_data to JSON string if needed
            original_section_json = (
                original_section_data 
                if isinstance(original_section_data, str) 
                else json.dumps(original_section_data, ensure_ascii=False)
            )
            #print(original_section_json)
            # Ensure improved_section_data is JSON string for completion
            improved_section_json = json.dumps(improved_section_data, ensure_ascii=False)
            #print(config["prompt_template"]
            # Construct the prompt
            prompt = (
                MASTER_PROMPT + "\n\n" +
                config["prompt_template"]
                .replace("{section_data}", original_section_json)
                .replace("{job_description}", job_description)  # Use plain string here
            )

            # Construct the training entry
            data_entry = {
                "prompt": prompt,
                "completion": "<|output starts|>" + improved_section_json+"\n<|out ends|>"
            }
            outfile.write(json.dumps(data_entry, ensure_ascii=False) + "\n")


In [8]:
import json

training_data = []

with open("training_data_v2.jsonl", "r", encoding="utf-8") as file:
    for line in file:
        data = json.loads(line)
        training_data.append(data)

In [11]:
from datasets import load_dataset, Dataset
# Convert this to a Hugging Face Dataset
dataset = Dataset.from_list(training_data)

In [12]:
def preprocess_function(examples):
    model_inputs = {
        "input_ids": [],
        "attention_mask": [],
        "labels": []
    }

    # Here, we assume that `examples` is a batch of data containing lists of prompts and completions.
    for p, c in zip(examples["prompt"], examples["completion"]):
        # Combine the prompt and completion for the model input
        # You can customize how you concatenate them if needed
        full_text = p + c

        # Tokenize the combined text
        tokenized = tokenizer(full_text, max_length=512, truncation=True)

        # For causal LM training, labels are typically the same as input_ids
        tokenized["labels"] = tokenized["input_ids"].copy()

        model_inputs["input_ids"].append(tokenized["input_ids"])
        model_inputs["attention_mask"].append(tokenized["attention_mask"])
        model_inputs["labels"].append(tokenized["labels"])

    return model_inputs


In [13]:
tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset.column_names)
# Create a small evaluation set (20 samples)
eval_dataset = tokenized_dataset.select(range(20))
train_dataset = tokenized_dataset.select(range(20, len(tokenized_dataset)))
#train_dataset = tokenized_dataset

Map:   0%|          | 0/3695 [00:00<?, ? examples/s]

In [146]:
from transformers import DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [150]:
training_args = TrainingArguments(
    output_dir="./finetuned-llama-lora",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    learning_rate=2e-4,
    num_train_epochs=2,
    logging_steps=300,
    save_steps=3000,
    #save_total_limit=1,
    gradient_accumulation_steps=1,
    fp16=True if torch.cuda.is_available() else False,
    eval_strategy="no",
    #eval_steps=300,
    logging_dir="./logs"
)

In [151]:
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=None,
    data_collator=data_collator
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 4.19.90, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [152]:
trainer.train()

# Save the LoRA adapter
trainer.save_model("./finetuned-llama-lora")

Step,Training Loss
300,0.4788
600,0.3948
900,0.395
1200,0.3885
1500,0.385
1800,0.3433
2100,0.3338
2400,0.3184
2700,0.2633
3000,0.2826


In [184]:
tokenizer.save_pretrained("./finetuned-llama-lora")

('./finetuned-llama-lora/tokenizer_config.json',
 './finetuned-llama-lora/special_tokens_map.json',
 './finetuned-llama-lora/tokenizer.json')

In [153]:
job_details = """{
  "job_title": "iOS Engineer",
  "company_name": "Duolingo",
  "location": "Not Specified",
  "salary_range": "$148,800—$274,600 USD",
  "job_areas": [
    "Longform Content Team (Learning R&D Area)",
    "Monetization Area",
    "Music Team"
  ],
  "keywords": [
    "iOS Engineer",
    "Swift",
    "Cocoa Touch",
    "iOS SDK",
    "Mobile Application Development",
    "Multithreaded Programming",
    "Unit Tests",
    "Testable Code",
    "Performance Optimization",
    "Data Structures",
    "Algorithms",
    "Software Design",
    "Product Design Collaboration",
    "Mentorship",
    "API Design"
  ],
  "job_duties_and_responsibilities": [
    "Collaborate on software projects with product design and backend aspects.",
    "Develop, release, and maintain native iOS applications, primarily in Swift.",
    "Mentor and set technical direction for junior engineers across the company.",
    "Build engaging and immersive learning experiences in specific teams:",
    "Longform Content Team: Enable learners to practice language skills through immersive experiences like stories and duoradio.",
    "Monetization Area: Improve premium subscriptions, pricing, packaging, and monetization game mechanics (Hearts, Gems).",
    "Music Team: Develop features to teach core music skills like sight reading and ear training."
  ],
  "required_qualifications": [
    "Bachelor’s degree in Computer Science or a related technical field.",
    "Strong foundation in computer science, including data structures, algorithms, and software design.",
    "Programming experience in Swift."
  ],
  "preferred_qualifications": [
    "Minimum of 5 years of programming experience in Swift.",
    "Strong technical knowledge of iOS mobile application development for iPhone/iPad using Swift with Cocoa Touch.",
    "Experience designing clean and maintainable APIs.",
    "Experience with multithreaded programming.",
    "Experience writing unit tests and testable code.",
    "Knowledge of iOS SDK performance tools and optimization techniques."
  ],
  "benefits": [
    "Equity compensation.",
    "World-class benefits.",
    "Inclusive salary range for all US locations.",
    "Assistance or accommodation during interviews as needed."
  ],
  "company_overview": {
    "company_name": "Duolingo",
    "mission": "To make education freely available and fun for everyone.",
    "values": [
      "Diversity, equity, and inclusion.",
      "Innovation in education technology.",
      "Building engaging and immersive learning experiences."
    ],
    "equal_employment_statement": "Duolingo is proud to be an Equal Employment Opportunity employer. We do not discriminate based on race, gender, disability, or any legally protected characteristics."
  }
}
"""
original_sections = {
    "personal_information": [{
        "name": "Tomisin Adeyemi",
        "email": "tomisinadeyemi7@gmail.com",
        "linkedin": "https://linkedin.com/in/ota231",
        "github": "https://github.com/ota231"
    }],
    "achievements": [
        "Enhanced AWS Aurora GlobalDB API architecture to incorporate recovery of misconfigured encrypted clusters into existing workflows, benefiting 30,000+ customers.",
        "Achieved stretch goal by developing an automated recovery feature to seamlessly recover clusters without customer intervention.",
        "Led a 4-member team to develop a movie recommendation system, placing 4th in NYC Kaggle competition with a Root Mean Squared Error of 17.97%."
    ],
    "certificate": [
        {
            "name": "Presidential Honors Scholar",
            "organization": "New York University",
            "details": "Top 10% in academics."
        },
        {
            "name": "Davis Scholar",
            "organization": "Davis Foundation",
            "details": "$35,000 scholarship."
        }
    ],
    "education": [
        {
            "degree": "BA in Computer & Data Science (Honors)",
            "minor": "Mathematics",
            "university": "New York University",
            "location": "New York, NY",
            "from_date": "Sep. 2021",
            "to_date": "May 2025",
            "gpa": "3.7",
            "coursework": [
                "Data Structures",
                "Basic Algorithms",
                "Operating Systems",
                "Machine Learning",
                "Deep Learning",
                "NLP",
                "Data Management & Analysis",
                "Causal Inference",
                "Predictive Analytics",
                "Responsible Data Science"
            ]
        }
    ],
    "projects": [
        {
            "name": "Linguistic Features & Multi-label Emotion Classification",
            "dates": "Apr. 2023 – May 2023",
            "description": [
                "Led a 4-member team to write the code and paper for an NLP research project assessing the effectiveness of linguistic features for emotion classification.",
                "Preprocessed 50k-row dataset, leveraging TFIDF with unigrams for feature extraction; engineered custom textual features for model training.",
                "Achieved 89.03% Multilabel Accuracy and 58.91% Micro-F1 score, outperforming baseline BERT model by 8%."
            ]
        },
        {
            "name": "Movie Recommendation System",
            "dates": "Jan. 2023 – Apr. 2023",
            "description": [
                "Built a movie recommendation system using content-based filtering, collaborative filtering, and matrix factorization techniques.",
                "Collaborated to clean, preprocess, and merge 3 datasets totaling 60k rows, utilizing duplicate identification and data imputation techniques.",
                "Placed 4th out of 15 teams in NYC Kaggle competition with a Root Mean Squared Error of 17.97%."
            ]
        }
    ],
    "skills": {
        "Languages": ["Python", "Java", "C", "C++ (familiar)", "R (familiar)"],
        "Developer Tools": ["Git", "Docker", "UNIX", "Regex"],
        "Databases": ["SQL", "MongoDB", "PostgreSQL", "pgAdmin"]
    },
    "experience": [
        {
            "role": "Software Engineering Intern",
            "company": "Amazon",
            "location": "Seattle, WA",
            "from_date": "Jun. 2023",
            "to_date": "Aug. 2023",
            "responsibilities": [
                "Enhanced AWS Aurora GlobalDB API architecture to incorporate recovery of misconfigured encrypted clusters into existing workflows, benefiting 30,000+ customers.",
                "Innovated a cross-functional solution to resolve multi-team issues, surpassing initial engineering recommendations.",
                "Developed an automated recovery feature to seamlessly recover clusters without customer intervention."
            ]
        },
        {
            "role": "Undergraduate Teaching Assistant",
            "company": "New York University",
            "location": "New York, NY",
            "from_date": "Sep. 2022",
            "to_date": "Present",
            "responsibilities": [
                "Provided in-class tutoring for 80+ students and held office hours open to 600+ students for the course Introduction to Computer Programming (Python).",
                "Created supplemental lecture materials using Google Colab notebooks, providing additional opportunities for students to reinforce programming skills.",
                "Helped students build problem-solving skills by breaking down complex programming problems into manageable steps."
            ]
        }
    ]
}

In [154]:
peft_model.eval()

PeftModel(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(
     

In [None]:
 +"Please provide a modified version of the " + section_name + " section below.\n\n" +
    "<|output|>"

In [179]:
import re

def generate_section(original_data, section_name):
    # Convert job details to a string
    job_details_str = json.dumps(job_details, ensure_ascii=False)
    # Convert original_data to JSON if not a string
    original_data_str = json.dumps(original_data, ensure_ascii=False) if not isinstance(original_data, str) else original_data

    prompt = (
        MASTER_PROMPT + "\n\n" +
        SECTION_CONFIG[section_name]["prompt_template"]
        .replace("{section_data}", original_data_str)
        .replace("{job_description}", job_details_str)
       
    )

    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt").to(peft_model.device)

    # Generate output
    with torch.no_grad():
        outputs = peft_model.generate(
            **inputs,
            max_new_tokens=700,
            temperature=0.8,
            top_p=0.95,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # The model output may contain the prompt and completion mixed, 
    # depending on how the model was finetuned. You may need to parse out 
    # the completion carefully. For demonstration, we'll just return the entire output:
    # Ideally, your finetuned model should produce clearly delimited JSON or structured text.
    return generated_text

# Perform inference section by section
improved_resume = {}
full_improved = {}
for section_name, config in SECTION_CONFIG.items():
    print(section_name)
    original_section_data = original_sections.get(config["original_key"], None)
    if original_section_data is None:
        print("Not Found")
        continue
    improved_section = generate_section(original_section_data, section_name)
    full_improved[config["improved_key"]] = improved_section
    if section_name == "personal_info":
        pattern = r"<personal_info>(.*?)</personal_info>"
        match = re.search(pattern, improved_section, re.DOTALL)

        if match:
            # Extract the content between the tags
            personal_info_content = match.group(1).strip()
            print(personal_info_content)
            improved_resume[config["improved_key"]] = personal_info_content
        else:
            improved_resume[config["improved_key"]] = None

    else:
        pattern = r"<\|output starts\|>(.*?)<\|out ends\|>"
        #print(improved_section)
        match = re.search(pattern, improved_section, re.DOTALL)
        extracted_content = None
        if match:
            extracted_content = match.group(1) # Extract the JSON content
            #achievements = json.loads(json_content).get("achievements", [])  # Parse JSON and get achievements list
            print("Extracted:")
            print(extracted_content)
        else:
            # Retry with extra prompt
            '''
            print("Retrying for", section_name)
            retry_prompt = (
                f"Please provide the content for the {section_name} section, "
                "clearly enclosed between <|output starts|> and <|out ends|> tags."
            )
            improved_section = generate_section(original_section_data + retry_prompt, section_name)
            match = re.search(pattern, improved_section, re.DOTALL)
            '''
            pattern_two = r"<out>(.*?)<\out>"
            extracted_content = match.group(1) if match else None
        if "{format_instructions}" in str(extracted_content):
            extracted_content = extracted_content.replace("{format_instructions}", "").strip()
        improved_resume[config["improved_key"]] = extracted_content

# Now improved_resume will hold the generated improved sections
# You can post-process them if needed
#print(json.dumps(improved_resume, ensure_ascii=False, indent=2))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


personal_info


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[{"name": "Tomisin Adeyemi", "email": "tomisinadeyemi7@gmail.com", "linkedin": "https://linkedin.com/in/ota231", "github": "https://github.com/ota231"}]
achievements


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Extracted:
{"Enhanced AWS Aurora GlobalDB API architecture to incorporate recovery of misconfigured encrypted clusters into existing workflows, benefiting 30,000+ customers.", "Achieved stretch goal by developing an automated recovery feature to seamlessly recover clusters without customer intervention.", "Led a 4-member team to develop a movie recommendation system, placing 4th in NYC Kaggle competition with a Root Mean Squared Error of 17.97%"}
{format_instructions}

certifications


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Extracted:
{"Presidential Honors Scholar": "Top 10% in academics.", "Davis Scholar": "$35,000 scholarship."}
{format_instructions}

education


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Extracted:
{"degree": "BA in Computer & Data Science (Honors)", "minor": "Mathematics", "university": "New York University", "location": "New York, NY", "from_date": "Sep 2021", "to_date": "May 2025", "gpa": "3.7", "coursework": ["Data Structures", "Basic Algorithms", "Operating Systems", "Machine Learning", "Deep Learning", "NLP", "Data Management & Analysis", "Causal Inference", "Predictive Analytics", "Responsible Data Science"]}

projects


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


skill_section


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


work_experience
Extracted:
{"education": {"degree": "Bachelor of Science in Computer Science", "university": "University of California, Berkeley", "from_date": "2013-09", "to_date": "2017-05", "grade": "3.8/4"}, "work_experience": [{"role": "Software Engineering Intern", "company": "Amazon", "location": "Seattle, WA", "from_date": "Jun. 2023", "to_date": "Aug. 2023", "responsibilities": ["Enhanced AWS Aurora GlobalDB API architecture to incorporate recovery of misconfigured encrypted clusters into existing workflows, benefiting 30,000+ customers.", "Innovated a cross-functional solution to resolve multi-team issues, surpassing initial engineering recommendations.", "Developed an automated recovery feature to seamlessly recover clusters without customer intervention."]}, {"role": "Undergraduate Teaching Assistant", "company": "New York University", "location": "New York, NY", "from_date": "Sep. 2022", "to_date": "Present", "responsibilities": ["Provided in-class tutoring for 80+ student

In [180]:
improved_resume

{'personal_info': '[{"name": "Tomisin Adeyemi", "email": "tomisinadeyemi7@gmail.com", "linkedin": "https://linkedin.com/in/ota231", "github": "https://github.com/ota231"}]',
 'achievements': '{"Enhanced AWS Aurora GlobalDB API architecture to incorporate recovery of misconfigured encrypted clusters into existing workflows, benefiting 30,000+ customers.", "Achieved stretch goal by developing an automated recovery feature to seamlessly recover clusters without customer intervention.", "Led a 4-member team to develop a movie recommendation system, placing 4th in NYC Kaggle competition with a Root Mean Squared Error of 17.97%"}',
 'certifications': '{"Presidential Honors Scholar": "Top 10% in academics.", "Davis Scholar": "$35,000 scholarship."}',
 'education': '{"degree": "BA in Computer & Data Science (Honors)", "minor": "Mathematics", "university": "New York University", "location": "New York, NY", "from_date": "Sep 2021", "to_date": "May 2025", "gpa": "3.7", "coursework": ["Data Struct

In [178]:
def generate_section(original_data, section_name):
    # Convert job details to a string
    job_details_str = json.dumps(job_details, ensure_ascii=False)
    # Convert original_data to JSON if not a string
    original_data_str = json.dumps(original_data, ensure_ascii=False) if not isinstance(original_data, str) else original_data

    prompt = (
        MASTER_PROMPT + "\n\n" +
        SECTION_CONFIG[section_name]["prompt_template"]
        .replace("{section_data}", original_data_str)
        .replace("{job_description}", job_details_str)
        +f"Please provide the content for the {section_name} section, clearly enclosed the output between <|output starts|> and <|out ends|> tags."
    )

    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate output
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=700,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # The model output may contain the prompt and completion mixed, 
    # depending on how the model was finetuned. You may need to parse out 
    # the completion carefully. For demonstration, we'll just return the entire output:
    # Ideally, your finetuned model should produce clearly delimited JSON or structured text.
    return generated_text

# Perform inference section by section
origin_model_extract = {}
full_origin_model_extract = {}
for section_name, config in SECTION_CONFIG.items():
    print(section_name)
    original_section_data = original_sections.get(config["original_key"], None)
    if original_section_data is None:
        print("Not Found")
        continue
    improved_section = generate_section(original_section_data, section_name)
    origin_model_extract[config["improved_key"]] = improved_section
    print(improved_section)
    if section_name == "personal_info":
        pattern = r"<personal_info>(.*?)</personal_info>"
        match = re.search(pattern, improved_section, re.DOTALL)

        if match:
            # Extract the content between the tags
            personal_info_content = match.group(1).strip()
            print(personal_info_content)
            origin_model_extract[config["improved_key"]] = personal_info_content
        else:
            origin_model_extract[config["improved_key"]] = None

    else:
        pattern = r"<\|output starts\|>(.*?)<\|out ends\|>"
        #print(improved_section)
        match = re.search(pattern, improved_section, re.DOTALL)
        extracted_content = None
        if match:
            extracted_content = match.group(1) # Extract the JSON content
            #achievements = json.loads(json_content).get("achievements", [])  # Parse JSON and get achievements list
            print("Extracted:")
            print(extracted_content)
        else:
            # Retry with extra prompt
            '''
            print("Retrying for", section_name)
            retry_prompt = (
                f"Please provide the content for the {section_name} section, "
                "clearly enclosed between <|output starts|> and <|out ends|> tags."
            )
            improved_section = generate_section(original_section_data + retry_prompt, section_name)
            match = re.search(pattern, improved_section, re.DOTALL)
            '''
            pattern_two = r"<out>(.*?)<\out>"
            extracted_content = match.group(1) if match else None
        if "{format_instructions}" in str(extracted_content):
            extracted_content = extracted_content.replace("{format_instructions}", "").strip()
        if extracted_content == "and":
            extracted_content == "None"
        origin_model_extract[config["improved_key"]] = extracted_content

# Now improved_resume will hold the generated improved sections
# You can post-process them if needed
#print(json.dumps(improved_resume, ensure_ascii=False, indent=2))
improved_resume

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


personal_info


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


"I am an experienced career advisor specializing in crafting exceptional resumes and cover letters tailored to specific job descriptions, optimized for ATS systems and human readers.

Instructions for creating optimized resumes and cover letters:

Analyze Job Descriptions: Extract key requirements and industry-specific keywords.
Create Compelling Resumes: Highlight quantifiable achievements, tailor content to the role, and emphasize the candidate's value proposition.
Craft Persuasive Cover Letters: Align with the job, balance professionalism with personality, and demonstrate soft skills through specific examples.
Optimize for ATS: Strategically integrate keywords and ensure documents pass ATS scans while remaining engaging for human readers.
Provide Industry-Specific Guidance: Reflect current trends, use clear formatting, and prioritize relevance with concise, impactful statements.
Goal: Deliver tailored documents that highlight the candidate's value, pass ATS screenings, and capture t

{'personal_info': '[{"name": "Tomisin Adeyemi", "email": "tomisinadeyemi7@gmail.com", "linkedin": "https://linkedin.com/in/ota231", "github": "https://github.com/ota231"}]',
 'achievements': ' and ',
 'certifications': ' and ',
 'education': ' and ',
 'projects': ' and ',
 'skill_section': ' and ',
 'work_experience': ' and '}

### Load the finetuned model

In [7]:
# Path to your fine-tuned model directory
model_path = "finetuned-llama-lora"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_path)
finetune_model = AutoModelForCausalLM.from_pretrained(model_path)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
finetune_model = finetune_model.to(device)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]