In [None]:
%%load

In [None]:
import logging
import random
import re
from typing import Any, Dict, List, Tuple

from langchain_core.prompts import PromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.runnables import Runnable
from langchain_community.llms import VLLM
import numpy as np


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


from langchain_core.prompts import ChatPromptTemplate


def generate_applicant_data(model, n: int, p: float, shuffle: bool = False):
    """
    Generates a dataset of data science applicant summaries and their labels.

    Args:
        n (int): The total number of applicant summaries to generate.
        p (float): The probability (0.0 to 1.0) of generating a strong applicant.
        shuffle (bool): Whether to shuffle the generated data (X and y) together.

    Returns:
        tuple: A tuple containing two lists:
            - X (list): A list of applicant summary strings.
            - y (list): A list of labels (1 for strong, 0 for typical).
    """

    # Define the prompts
    strong_applicant_prompt = ChatPromptTemplate([
        (
            "system",
            "You are a data science professional articulating your suitability for a challenging data science role in 100 words. "
        ),
        (
            "human",
            "Articulate your key contributions to past data-driven projects, focusing on how your "
            "analytical approach led to measurable improvements or novel insights. "
            "Feel free to mention tools and methodologies you used, your ability to solve complex problems, "
            "or your capacity to communicate complex findings to non-technical stakeholders. "
            "Begin with 'As'. Do not exceed the 100-word limit, else you will not be hired."
            "ONLY OUTPUT YOUR RESPONSE, DO NOT OUTPUT ANY OTHER TEXT!"
        )
    ])
    typical_applicant_prompt = ChatPromptTemplate([
        (
            "system",
            "You are an aspiring data scientist articulating your qualifications for an entry-level or junior role in 100 words. "
        ),
        (
            "human",
            "Articulate your key qualifications, including relevant coursework, projects, or prior experience. "
            "Highlight your enthusiasm for learning and applying data analysis techniques to real-world problems. "
            "Mention your comfort with standard data tools and a desire to grow your skills within a collaborative team environment. "
            "Begin with 'As'. Do not exceed the 100-word limit, else you will not be hired."
            "ONLY OUTPUT YOUR RESPONSE, DO NOT OUTPUT ANY OTHER TEXT!"
        )
    ])

    X = []  # List to store applicant summaries
    y = []  # List to store labels (1 for strong, 0 for typical)

    # Create chains for each prompt
    strong_chain: Runnable = strong_applicant_prompt | model
    typical_chain: Runnable = typical_applicant_prompt | model

    # Determine the number of strong and typical applicants
    num_strong = round(n * p)
    num_typical = n - num_strong

    # Generate strong applicants
    if num_strong > 0:
        logging.info(f"Generating {num_strong} strong applicants...")
        strong_inputs = [{} for _ in range(num_strong)] # Empty dictionaries as input since prompts are self-contained
        strong_results = strong_chain.batch(strong_inputs)
        X.extend(strong_results)
        y.extend([1] * num_strong)

        logging.info("=" * 40)
        logging.info(f"generate_applicant_data (Label: 'Strong')")
        logging.info("=" * 40)
        logging.info(f"{strong_results[0]}")
        logging.info("=" * 40)

    # Generate typical applicants
    if num_typical > 0:
        logging.info(f"Generating {num_typical} typical applicants...")
        typical_inputs = [{} for _ in range(num_typical)]
        typical_results = typical_chain.batch(typical_inputs)
        X.extend(typical_results)
        y.extend([0] * num_typical)

        logging.info("=" * 40)
        logging.info(f"generate_applicant_data (Label: 'Typical')")
        logging.info("=" * 40)
        logging.info(f"{typical_results[0]}")
        logging.info("=" * 40)

    # Combine and shuffle if requested
    if shuffle:
        combined_data = list(zip(X, y))
        random.shuffle(combined_data)
        X, y = zip(*combined_data) # Unzip the shuffled data
        X = list(X)
        y = list(y)

    logging.info("Generation complete!")
    return X, y


def create_ats_scorer(scorer_model) -> Runnable:
    """
    Creates an LLM-based ATS scorer chain.
    """
    ats_prompt_template = ChatPromptTemplate([
        (
            "system",
            "You are a seasoned Hiring Manager for a Data Scientist role. Rate the applicant's summary (0-100).\n"
            "Indicators of a strong candidate: advanced ML (deep learning, NLP, CV), MLOps/deployment, leadership, quantifiable impact, specialized tools (TF, PyTorch, Spark, cloud), PhD/research.\n"
            "Indicators of a typical candidate: foundational skills (data cleaning, basic EDA), common libraries (Pandas, Scikit-learn), general project experience, academic focus.\n"
            "Score: 100 (top-tier), 80-99 (very strong), 60-79 (good/typical), 40-59 (decent), <40 (needs experience).\n"
            "\n"
            "Applicant Summary:\n"
            "{applicant_summary}\n"
            "Please output the following. If you don't, you will get fired!\n"
            "Score: [0-100]\n"
            "Justification: [One-sentence justification. Be super concise!]"
        )
    ])
    return ats_prompt_template | scorer_model


def run_ats_on_batch(
    scorer_model, # Pass the LLM model instance directly
    applicant_summaries: List[str],
    labels: List[int], # For comparison, not used by ATS for scoring
) -> List[Tuple[int, str]]:
    """
    Runs the ATS system on a batch of applicant summaries using true LLM batching.

    Args:
        model: The LLM model instance to use for scoring.
        applicant_summaries (List[str]): A list of applicant summary strings.
        labels (List[int]): The true labels (1 for strong, 0 for typical) for comparison.

    Returns:
        List[Tuple[int, str]]: A list of tuples, each containing (score, justification).
    """
    ats_scorer_chain = create_ats_scorer(scorer_model) # Create the chain with the model

    # Prepare inputs for batch processing
    batch_inputs = [{"applicant_summary": summary} for summary in applicant_summaries]

    logging.info(f"Running ATS on {len(applicant_summaries)} applicants...")
    raw_ats_responses = ats_scorer_chain.batch(batch_inputs) # This is the true batch call

    results = []
    for i, response_text in enumerate(raw_ats_responses):
        score_match = re.search(r"Score:\s*(\d+)", response_text)
        justification_match = re.search(r"Justification:\s*(.*)", response_text, re.DOTALL)

        score = 0
        justification = "Error parsing score or justification."

        if score_match:
            try:
                score = int(score_match.group(1))
                score = max(0, min(100, score))
            except ValueError:
                pass

        if justification_match:
            justification = justification_match.group(1).strip()

        results.append((score, justification))

        logging.debug(f"\nProcessing Applicant {i+1} (True Label: {'Strong' if labels[i] == 1 else 'Typical'}):")
        logging.debug(f"  Summary:\n{applicant_summaries[i]}") # Print original summary for context
        logging.debug(f"  ATS Score: {score}")
        logging.debug(f"  Justification: {justification}")
        logging.debug("-" * 40)
    
    strong_idx = labels.index(1) if 1 in labels else -1
    typical_idx = labels.index(0) if 0 in labels else -1
    if strong_idx != -1:
        logging.info("=" * 40)
        logging.info(f"Strong applicant example: {applicant_summaries[strong_idx]} (Label: 'Strong')")
        logging.info("=" * 40)
        logging.info(f"Strong applicant ATS score: {results[strong_idx][0]}")
        logging.info(f"Justification: {results[strong_idx][1]}")
        logging.info("=" * 40)
    if typical_idx != -1:
        logging.info("=" * 40)
        logging.info(f"Typical applicant example: {applicant_summaries[typical_idx]} (Label: 'Typical')")
        logging.info("=" * 40)
        logging.info(f"ATS Score: {results[typical_idx][0]}")
        logging.info(f"Justification: {results[typical_idx][1]}")
        logging.info("=" * 40)

    return results


def create_summary_modifier(model) -> Runnable:
    """
    Creates an LLM-based chain for modifying applicant summaries.
    """
    modification_prompt_template = ChatPromptTemplate.from_messages([
        (
            "system",
            "You are an AI career coach helping a data scientist improve their resume summary.\n"
            "You have analyzed their current summary and received feedback from an ATS system.\n"
            "\n"
            "Original Summary:\n"
            "{original_summary}\n"
            "\n"
            "ATS Score: {ats_score}\n"
            "ATS Justification: {ats_justification}\n"
            "\n"
            "Modification Intensity (0 = no change, 1 = maximum change): {modification_intensity:.2f}\n"
            "\n"
            "Based on the ATS feedback and the modification intensity:\n"
            "- If intensity is 0, return the original summary verbatim.\n"
            "- If intensity is high (e.g., near 1), make significant changes to address the ATS justification,\n"
            "  emphasizing business impact, advanced techniques, leadership, and quantifiable results.\n"
            "  Ensure the new summary sounds like a very strong candidate.\n"
            "- If intensity is moderate, make thoughtful, subtle improvements focusing on clarity and incorporating\n"
            "  stronger phrasing suggested by the ATS justification.\n"
            "\n"
            "Begin with 'As'. Only output the 100-word revised summary. Else you will get fired!\n"
        )
    ])
    return modification_prompt_template | model


def modify_applicant_summaries_batch(
    original_summaries: List[str],
    labels: List[int], # For comparison, not used by ATS for scoring
    ats_feedback: List[Tuple[int, str]], # List of (score, justification)
    lambda_param: int, # lambda between 0 and 100
    summary_modifier_chain: Runnable,
) -> List[str]:
    """
    Modifies a list of applicant summaries based on ATS feedback and a lambda parameter.

    Args:
        original_summaries (List[str]): A list of the applicant's original summaries.
        ats_feedback (List[Tuple[int, str]]): A list of tuples, where each tuple
                                               contains (ats_score, ats_justification)
                                               for the corresponding summary.
        lambda_param (int): Controls modification extent (0=max change, 100=no change).
        summary_modifier_chain (Runnable): The LangChain Runnable for summary modification.

    Returns:
        List[str]: A list of the modified applicant summaries.
    """
    if not (0 <= lambda_param <= 1):
        raise ValueError("lambda_param must be between 0 and 100.")

    # Convert lambda to modification intensity (0 = no change, 1 = max change)
    modification_intensity = 1. - lambda_param

    if modification_intensity == 0:
        # If no modification is desired, return original summaries directly
        return original_summaries

    # Prepare inputs for batch processing
    batch_inputs: List[Dict[str, Any]] = []
    for i, summary in enumerate(original_summaries):
        ats_score, ats_justification = ats_feedback[i]
        batch_inputs.append({
            "original_summary": summary,
            "ats_score": ats_score,
            "ats_justification": ats_justification,
            "modification_intensity": modification_intensity
        })

    logging.info(f"Modifying {len(original_summaries)} summaries with Lambda = {lambda_param}...")
    modified_responses = summary_modifier_chain.batch(batch_inputs)
    
    # Strip whitespace from each modified summary
    modified_summaries = [res.strip() for res in modified_responses]

    logging.info("Modification complete!")
    strong_idx = labels.index(1) if 1 in labels else -1
    typical_idx = labels.index(0) if 0 in labels else -1
    if strong_idx != -1:
        logging.info("=" * 40)
        logging.info(f"Strong applicant example: {original_summaries[strong_idx]} (Label: 'Strong')")
        logging.info("=" * 40)
        logging.info(f"Strong applicant modified summary: {modified_summaries[strong_idx]}")
        logging.info("=" * 40)
    if typical_idx != -1:
        logging.info("=" * 40)
        logging.info(f"Typical applicant example: {original_summaries[typical_idx]} (Label: 'Typical')")
        logging.info("=" * 40)
        logging.info(f"Strong applicant modified summary: {modified_summaries[typical_idx]}")
        logging.info("=" * 40)

    return modified_summaries




In [2]:
vllm = VLLM(model="meta-llama/Llama-3.1-8B-Instruct", temperature=0.8, top_p=0.95, max_tokens=512)

vllm.max_new_tokens = 250
vllm.temperature = 0.4
applicant_summaries, labels = generate_applicant_data(vllm, n=20, p=0.5, shuffle=True)
print(applicant_summaries[0])
print(applicant_summaries[1])

  from .autonotebook import tqdm as notebook_tqdm


INFO 05-21 13:45:13 [__init__.py:239] Automatically detected platform cuda.


2025-05-21 13:45:18,422	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 05-21 13:45:30 [config.py:717] This model supports multiple tasks: {'classify', 'score', 'generate', 'embed', 'reward'}. Defaulting to 'generate'.
INFO 05-21 13:45:30 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=16384.
INFO 05-21 13:45:34 [core.py:58] Initializing a V1 LLM engine (v0.8.5.post1) with config: model='meta-llama/Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:04<00:13,  4.37s/it]
Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:08<00:08,  4.48s/it]
Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:10<00:02,  2.97s/it]
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:14<00:00,  3.54s/it]
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:14<00:00,  3.63s/it]



INFO 05-21 13:45:53 [loader.py:458] Loading weights took 14.58 seconds
INFO 05-21 13:45:53 [gpu_model_runner.py:1347] Model loading took 14.9889 GiB and 15.080214 seconds
INFO 05-21 13:46:14 [backends.py:420] Using cache directory: /gpfs/home/vhl2022/.cache/vllm/torch_compile_cache/1cfb3cc26e/rank_0_0 for vLLM's torch.compile
INFO 05-21 13:46:14 [backends.py:430] Dynamo bytecode transform time: 20.98 s
INFO 05-21 13:46:18 [backends.py:118] Directly load the compiled graph(s) for shape None from the cache, took 4.357 s
INFO 05-21 13:46:20 [monitor.py:33] torch.compile takes 20.98 s in total
INFO 05-21 13:46:21 [kv_cache_utils.py:634] GPU KV cache size: 417,808 tokens
INFO 05-21 13:46:21 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 3.19x
INFO 05-21 13:46:39 [gpu_model_runner.py:1686] Graph capturing finished in 18 secs, took 0.52 GiB
INFO 05-21 13:46:39 [core.py:159] init engine (profile, create kv cache, warmup model) took 46.39 seconds
INFO 05-21 13:46:39

2025-05-21 13:46:39,661 - root - INFO - Generating 10 strong applicants...
Processed prompts: 100%|██████████| 10/10 [00:07<00:00,  1.39it/s, est. speed input: 139.39 toks/s, output: 713.68 toks/s]
2025-05-21 13:46:46,938 - root - INFO - generate_applicant_data (Label: 'Strong')
2025-05-21 13:46:46,938 - root - INFO -  
As a seasoned data science professional, I have consistently delivered high-impact projects that drive business growth and strategic decision-making. My analytical approach has led to significant improvements in customer retention, revenue increase, and operational efficiency. Notably, I developed a predictive model that forecasted customer churn, enabling proactive measures that reduced churn by 25%. I am well-versed in a range of methodologies, including machine learning, statistical modeling, and data visualization. I excel in communicating complex findings to stakeholders, ensuring data-driven insights inform business decisions. My expertise has been sought after by

In [27]:
vllm.temperature = 0.0
ats_feedback = run_ats_on_batch(
    scorer_model=vllm,
    applicant_summaries=applicant_summaries,
    labels=labels
)

2025-05-21 14:06:00,600 - root - INFO - Running ATS on 20 applicants...
Processed prompts: 100%|██████████| 20/20 [00:01<00:00, 13.44it/s, est. speed input: 5874.19 toks/s, output: 1344.51 toks/s]
2025-05-21 14:06:02,116 - root - INFO - Strong applicant example:  
As a seasoned data scientist, I've consistently delivered high-impact projects that drive business growth. In my previous role, I developed a predictive model that increased sales by 25% through targeted marketing campaigns. I utilized machine learning algorithms, such as decision trees and random forests, to identify high-value customer segments. I also created data visualizations to communicate insights to non-technical stakeholders, ensuring seamless collaboration. My analytical approach led to a 30% reduction in customer churn, demonstrating my ability to drive business outcomes through data-driven decision-making. I'm excited to leverage my expertise to tackle complex challenges in this new role.  I'm excited to leverage

In [36]:
modify_applicant_summaries_batch(
    original_summaries=applicant_summaries,
    labels=labels,
    ats_feedback=ats_feedback,
    lambda_param=0.0,  # Example lambda value
    summary_modifier_chain=create_summary_modifier(vllm)
)

2025-05-21 14:11:48,447 - root - INFO - Modifying 20 summaries with Lambda = 0.0...
Processed prompts: 100%|██████████| 20/20 [00:01<00:00, 11.95it/s, est. speed input: 6307.91 toks/s, output: 1195.35 toks/s]
2025-05-21 14:11:50,155 - root - INFO - Modification complete!
2025-05-21 14:11:50,156 - root - INFO - Strong applicant example:  
As a seasoned data scientist, I've consistently delivered high-impact projects that drive business growth. In my previous role, I developed a predictive model that increased sales by 25% through targeted marketing campaigns. I utilized machine learning algorithms, such as decision trees and random forests, to identify high-value customer segments. I also created data visualizations to communicate insights to non-technical stakeholders, ensuring seamless collaboration. My analytical approach led to a 30% reduction in customer churn, demonstrating my ability to drive business outcomes through data-driven decision-making. I'm excited to leverage my expert

["As a seasoned data scientist with a proven track record of driving business growth through data-driven insights, I've developed and deployed predictive models that have yielded significant revenue increases for clients. Leveraging expertise in machine learning, data visualization, and statistical modeling, I've led cross-functional teams to inform strategic decisions and optimize business outcomes. Proficient in Python, R, and SQL, I excel in collaborative environments, driving innovation and results through data analysis. With a strong focus on staying up-to-date with industry trends",
 "As a seasoned data scientist with a proven track record of driving business growth through data-driven insights, I've developed expertise in advanced machine learning techniques, including deep learning and natural language processing. I've successfully led data science projects in healthcare and finance, leveraging tools like TensorFlow and PyTorch to deliver predictive models with significant ROI.