In [1]:
from AnthropicWrapper import ClaudeChat
from AnthropicWrapper import Utilities
from joblib import load
import os
import fitz
import numpy as np
from hume_analysis import HumeSentimentAnalyzer
from dotenv import load_dotenv

In [2]:
# --- LLM-2 Configuration ---
chat_model_name = "claude-3-5-sonnet-20240620"
llm2_system_prompt = """
You are a seasoned hiring manager evaluating a candidate for a [Job Title] position at [Company Name]. The candidate has [Experience Level] experience in the field. You have access to the candidate's CV, the transcript of their interview, and sentiment analysis data from Hume.ai.

**Task:**

1. **Understand the Job Requirements:** Carefully analyze the [Job Description] provided and identify the key skills, experience, and qualifications required for success in this role.

2. **Evaluate Candidate Alignment:** Assess how well the candidate's qualifications and experience align with the identified job requirements. 
    * **Technical Skills:**  Does the candidate demonstrate the necessary technical skills, including specific software, tools, and frameworks mentioned in the job description? 
    * **Project Experience:**  Are the candidate's projects relevant to the role and demonstrate the required level of complexity and problem-solving abilities?
    * **Soft Skills:**  Does the candidate possess the necessary communication, collaboration, and problem-solving skills?
    * **Cultural Fit:**  Does the candidate seem to align with the company's values and work environment as described in the job description?

3. **Analyze Sentiment Data:**  Use the sentiment analysis data from Hume.ai to understand the candidate's emotional state during the interview.  Does their emotional response indicate confidence, enthusiasm, or any potential issues?

4. **Analyze Toxicity Scores:**  Does the candidate's communication exhibit any signs of toxicity, unprofessionalism, or potentially problematic behavior?

5. **Provide a Recommendation:** 
    * **Strong Candidate:** The candidate demonstrates strong technical skills, relevant experience, and a positive attitude. Recommend moving them forward in the interview process.
    * **Consider for Further Evaluation:** The candidate has potential, but their technical skills or project experience need further investigation. Recommend additional interviews or assessments. 
    * **Not a Fit:** The candidate lacks the necessary technical skills or experience, or their communication/attitude was not compelling. Recommend not moving forward with this candidate.

**Explain your reasoning:** Provide specific evidence from the CV, transcript, and sentiment analysis data to support your recommendation. 
"""


In [3]:
role_description = """
Do you want to tackle the biggest questions in finance with near infinite compute power at your fingertips?

G-Research is a leading quantitative research and technology firm, with offices in London and Dallas. We are proud to employ some of the best people in their field and to nurture their talent in a dynamic, flexible and highly stimulating culture where world-beating ideas are cultivated and rewarded.

This is a role based in our new Soho Place office - opened in 2023 - in the heart of Central London and home to our Research Lab.

The role

We are looking for exceptional machine learning engineers to work alongside our quantitative researchers on cutting-edge machine learning problems.

As a member of the Core Technical Machine Learning team, you will be engaged in a mixture of individual and collaborative work to tackle some of the toughest research questions.

In this role, you will use a combination of off-the-shelf tools and custom solutions written from scratch to drive the latest advances in quantitative research.

Past projects have included:

Implementing ideas from a recently published research paper
Writing custom libraries for efficiently training on petabytes of data
Reducing model training times by hand optimising machine learning operations
Profiling custom ML architectures to identify performance bottlenecks
Evaluating the latest hardware and software in the machine learning ecosystem
Who are we looking for?

Candidates will be comfortable working both independently and in small teams on a variety of engineering challenges, with a particular focus on machine learning and scientific computing.

The ideal candidate will have the following skills and experience:

Either a post-graduate degree in machine learning or a related discipline, or commercial experience working on machine learning models at scale. We will also consider exceptional candidates with a proven record of success in online data science competitions, such as Kaggle
Strong object-oriented programming skills and experience working with Python, PyTorch and NumPy are desirable
Experience in one or more advanced optimisation methods, modern ML techniques, HPC, profiling, model inference; you dont need to have all of the above
Excellent ML reasoning and communication skills are crucial: off-the-shelf methods dont always work on our data so you will need to understand how to develop your own models in a collaborative environment working in a team with complementary skills
Finance experience is not necessary for this role and candidates from non-financial backgrounds are encouraged to apply.

Why should you apply?

Highly competitive compensation plus annual discretionary bonus
Lunch provided (via Just Eat for Business) and dedicated barista bar
35 days annual leave
9 percent company pension contributions
Informal dress code and excellent work/life balance
Comprehensive healthcare and life assurance
Cycle-to-work scheme
Monthly company events
"""

In [17]:
# --- Function for LLM-2 analysis ---
def analyze_candidate(job_title, experience_level, job_description, interview_transcript, cv, analyzer_output):
    """
    Analyzes a candidate's suitability based on the provided data.

    Args:
        job_title (str): The title of the job.
        experience_level (str): The candidate's experience level.
        job_description (str): The text of the job description.
        interview_transcript (str): The transcript of the interview.
        cv (str): The candidate's CV.
        sentiment_data (dict): Sentiment data from Hume.ai.
        toxicity_data (dict): Toxicity data from Hume.ai.

    Returns:
        str: The recommendation and reasoning from LLM-2.
    """

    # Create a prompt with placeholders for dynamic values
    llm2_prompt = llm2_system_prompt.replace("[Job Title]", job_title)
    llm2_prompt = llm2_prompt.replace("[Experience Level]", experience_level)
    llm2_prompt = llm2_prompt.replace("[Job Description]", job_description)
    
     # --- Preprocess Sentiment Data ---
    positive_emotions = ["Joy", "Enthusiasm", "Excitement", "Pride", "Satisfaction", "Triumph", "Surprise (positive)", "Contentment", "Ecstasy", "Gratitude", "Love", "Relief"]
    negative_emotions = ["Anger", "Annoyance", "Anxiety", "Boredom", "Contempt", "Disappointment", "Disapproval", "Disgust", "Distress", "Fear", "Guilt", "Horror", "Pain", "Sadness", "Shame", "Surprise (negative)", "Sympathy", "Tiredness"]

    positive_sentiment_score = np.mean([analyzer_output["emotions"].get(emotion, 0) for emotion in positive_emotions])
    negative_sentiment_score = np.mean([analyzer_output["emotions"].get(emotion, 0) for emotion in negative_emotions])

    # --- Preprocess Toxicity Data ---
    toxicity_weights = {"severe_toxic": 0.3, "threat": 0.2, "toxic": 0.15, "identity_hate": 0.1, "insult": 0.1, "obscene": 0.1}
    overall_toxicity_score = np.sum([analyzer_output["toxicity"].get(category, 0) * toxicity_weights[category] for category in toxicity_weights])
    toxicity_level = "Low" if overall_toxicity_score < 0.01 else "Moderate" if overall_toxicity_score < 0.05 else "High"

    # ---  Preprocess Sentiment Scores (1-9) ---
    sentiment_scores = analyzer_output["sentiments"] 
    sentiment_scores = analyzer_output["sentiments"]
    overall_sentiment_score = sum(float(key) * float(value) for key, value in sentiment_scores.items())
    overall_sentiment_level = "Highly Negative" if overall_sentiment_score < 3 else "Negative" if overall_sentiment_score < 6 else "Neutral" if overall_sentiment_score < 7 else "Positive" if overall_sentiment_score < 8 else "Highly Positive"
    
    # --- Add Preprocessed Data to the Analyzer Output ---
    analyzer_output["Preprocessed Sentiment"] = {
        "Positive Sentiment": positive_sentiment_score,
        "Negative Sentiment": negative_sentiment_score,
        "Overall Sentiment Score (1-9)": overall_sentiment_score,
        "Overall Sentiment Level": overall_sentiment_level
    }

    analyzer_output["Preprocessed Toxicity"] = {
        "Overall Toxicity Score": overall_toxicity_score,
        "Toxicity Level": toxicity_level
    }

    # --- Prepare the data for LLM-2 ---
    analysis_data = {
        "Interview Transcript": interview_transcript,
        "CV": cv,
        "Analyzer Output": analyzer_output  # Pass the entire dictionary
    }

    llm2_prompt += "\n\nHere is the data for your analysis:\n"
    llm2_prompt += "\n".join(f"{key}: {value}" for key, value in analysis_data.items())
    global claude
    claude = ClaudeChat(chat_model_name, llm2_prompt)
    return claude.chat("Please Execute the system prompt")


In [6]:
model = "claude-3-5-sonnet-20240620"
job_role = "Machine Learning Engineer"
candidate_skill = "Entry-Level"
utilities = Utilities()
transcript_path = "conversation.pdf"
transcript = utilities.process_pdf(transcript_path)
pdf_path = r"D:\Hidden Desktop\OneDrive\Cross Device\Jobs Applications\Graduating\CV.pdf"
cv = utilities.process_pdf(transcript_path)

In [7]:
"""
def extract_transcript_from_pdf(pdf_path):

    doc = fitz.open(pdf_path)
    extracted_transcript = ""

    for page_num in range(doc.page_count):
        page = doc[page_num]
        blocks = page.get_text("blocks")

        for block in blocks:
            text = block[4].strip()
            if text:
                if block[0] < 200 and block[3] > 12:  # Check for headings or dialogue lines
                    extracted_transcript += f"\n\n{text}\n"
                else:
                    extracted_transcript += f"{text} "

    return extracted_transcript
"""

'\ndef extract_transcript_from_pdf(pdf_path):\n\n    doc = fitz.open(pdf_path)\n    extracted_transcript = ""\n\n    for page_num in range(doc.page_count):\n        page = doc[page_num]\n        blocks = page.get_text("blocks")\n\n        for block in blocks:\n            text = block[4].strip()\n            if text:\n                if block[0] < 200 and block[3] > 12:  # Check for headings or dialogue lines\n                    extracted_transcript += f"\n\n{text}\n"\n                else:\n                    extracted_transcript += f"{text} "\n\n    return extracted_transcript\n'

In [12]:
# Get the directory containing the script
script_directory = os.getcwd()
print("Script Directory : "+script_directory)
# Navigate one folder up
one_folder_up = os.path.dirname(script_directory)
# Construct the path to the .env file in the parent folder
parent_folder_path = os.path.dirname(os.getcwd())
dotenv_path = os.path.join(parent_folder_path, ".env")
# Load the .env file
load_dotenv(dotenv_path)
analyzer = HumeSentimentAnalyzer(os.getenv("HUME_API_KEY"))
result = analyzer.analyze_audio(os.path.join(script_directory, "interview_audio.wav"))


Script Directory : d:\Kent\University Of Kent UK\Projects\Disso\Screening-LLM\LLM2-Prompting
Analyzing audio...


In [14]:
print(result)

{'emotions': {'Admiration': 0.013154713436961174, 'Adoration': 0.005036264657974243, 'Aesthetic Appreciation': 0.02096581645309925, 'Amusement': 0.038279544562101364, 'Anger': 0.00032193015795201063, 'Annoyance': 0.00515311723574996, 'Anxiety': 0.015116434544324875, 'Awe': 0.004606128670275211, 'Awkwardness': 0.0266299806535244, 'Boredom': 0.032327186316251755, 'Calmness': 0.3473110795021057, 'Concentration': 0.2725161612033844, 'Confusion': 0.02289200946688652, 'Contemplation': 0.1169968917965889, 'Contempt': 0.01220963429659605, 'Contentment': 0.09862833470106125, 'Craving': 0.005860886070877314, 'Desire': 0.002857029438018799, 'Determination': 0.13869863748550415, 'Disappointment': 0.001249863300472498, 'Disapproval': 0.00198471755720675, 'Disgust': 0.0005089972401037812, 'Distress': 0.0017954296199604869, 'Doubt': 0.02944629080593586, 'Ecstasy': 0.003593969624489546, 'Embarrassment': 0.0014260212192311883, 'Empathic Pain': 0.004238705150783062, 'Enthusiasm': 0.3247373402118683, 'En

In [18]:
# --- Call LLM-2 for Candidate Analysis ---
print("Analyzing candidate...")
llm2_recommendation = analyze_candidate(
    job_title=job_role,
    experience_level=candidate_skill,
    job_description=role_description,
    interview_transcript=transcript,
    cv=cv,
    analyzer_output=result
)

print(f"LLM-2 Recommendation: {llm2_recommendation}")

Analyzing candidate...
LLM-2 Recommendation: Based on the provided information, here's my evaluation of the candidate for the Entry-Level Machine Learning Engineer position at G-Research:

1. Understanding the Job Requirements:
The role requires strong machine learning skills, experience with Python, PyTorch, and NumPy, and the ability to work on complex ML problems. The ideal candidate should have a post-graduate degree in ML or related field, or commercial experience with ML at scale. Strong object-oriented programming skills and experience with advanced optimization methods are desirable.

2. Evaluating Candidate Alignment:

Technical Skills: 
The candidate demonstrates strong experience with NumPy (3 years) and has worked on complex ML projects, including developing a fine-tuned LLM BERT model at NVIDIA. This aligns well with the required technical skills.

Project Experience: 
The candidate's work on the BERT model at NVIDIA, including handling challenges like image-to-text conver

In [None]:
print(claude.chat_with_history_doc(input()))