In [22]:
# Install Required Packages if you don't have them:
# !pip install python-dotenv joblib anthropic hume-api

# Import Statements
import os
from dotenv import load_dotenv
from joblib import load
from pprint import pprint
import time
import traceback

# Custom Modules (Assuming these are in your project structure)
from src.utils.anthropicwrapper import ClaudeChat, ClaudeChatAssess
from src.utils.humewrapper import HumeSentimentAnalyzer
from src.modules import ConversationVerifier

import json
import os

In [23]:

# --- Load Environment Variables ---
load_dotenv(os.path.join(os.path.dirname(os.getcwd()), ".env"))

# --- Timestamp Input ---
timestamp = 1724532643  # Example timestamp, replace with your input 

# --- Load Data ---
directory = f'data/interviews/{timestamp}/'
chatlog = load(os.path.join(directory, "joblib/conversation.joblib"))



In [24]:
# --- Model Setup --- 
try:
    sentiment_analyser = HumeSentimentAnalyzer(api_key=os.getenv("HUME_API_KEY"))

    sentiment_system_prompt = """You are a skilled emotions analyst provided with a detailed breakdown of sentiment analysis scores from Hume.ai, for a single response in an interview to a question from the interviewer. The scores are split into 3 sections. All numbers are from 0 to 1, linearly scaling, with 1 being a very strong representation of the indicator in question.

    First, Emotions. This contains several human emotions with a numerical value indicating the strength of the corresponding emotion.
    Second, Sentiments. This contains a scale from 1 to 9, each containing a numerical value indicating the magnitude of the sentiment of the topic of the conversation. A negative topic such as murder will have a high value lower in the scale, such as 1 or 2, and a positive topic will have a high value from 0 to 1 higher in the scale such as 8 or 9.
    Third, Toxicity. This contains several toxic representations such as hate, insult, etc, with a value from 0 to 1 for each representation identified in the audio.

    Your job is to provide a concise detailed one sentence breakdown of how the individual was feeling for the particular scores provided. You must be highly objective as your job is to discern whether or not a candidate was exhibiting traits which would or would not be fitting for a successful interview. 
    Model your answer beginning with something along the lines of "For this particular response, the candidate..."""
    
    sentiment_summariser = ClaudeChat("claude-3-5-sonnet-20240620", sentiment_system_prompt)

    evaluation_system_prompt = """You are a highly skilled interviewer currently tasked with reviewing a phone screening interview candidate to decide whether they are to pass on to the next stage of the interview process. There is a high volume of candidates and as such only 47% of candidates should be allowed to pass.
    You will be provided with the following information to aid your decision:
    1. A copy of the job description.
    2. Transcript of the phone interview between the interviewer and candidate.
    3. Sentiment analysis summary to provide you with insight into the tone of the candidate in the call
    4. A summary of any factual inaccuracies the candidate may have made during discussions in the call
    5. A copy of the candidate's CV, which will be provided next.

    You are to evaluate the candidate, primarily on the transcript, and use the additional information provided to identify any potential red-flags. Your response should include a detailed breakdown of why the candidate is chosen to continue onwards to further interviewing. You must end the breakdown with a simple one word response on a new line, "pass" or "fail"."""

    candidate_evaluator = ClaudeChatAssess("claude-3-5-sonnet-20240620", evaluation_system_prompt, "data/cvs/cv-deb.pdf")

except Exception as e:
    print(f"An error occurred during model setup: {e}")
    raise



In [25]:
# --- Reformat Chatlog ---
def reformat_chatlog(chatlog):
    dropped_context = chatlog[3:]
    outputchatlog = []

    for i in range(0, len(dropped_context), 2):
        if i + 1 < len(dropped_context):
            tempdict = {
                'interviewer': dropped_context[i]['content'],
                'candidate': dropped_context[i+1]['content']
            }
            outputchatlog.append(tempdict)
        else:
            tempdict = {
                'interviewer': dropped_context[i]['content'],
                'candidate': 'Thank you, goodbye'  
            }
            outputchatlog.append(tempdict)
            break 

    return outputchatlog

# --- Process Sentiments ---


In [26]:
def process_sentiments(chatlog_chat, timestamp):
    filepath = os.path.join('data', 'interviews', str(timestamp), 'audio')
    print(f"Current working directory: {os.getcwd()}")
    print(f"Full audio directory path: {os.path.abspath(filepath)}")

    files = [f for f in os.listdir(filepath) if os.path.isfile(os.path.join(filepath, f))]
    
    if len(chatlog_chat) < len(files):
        files = files[:len(chatlog_chat)]

    for f in files:
        print(f"File found: {f}")

    sentiments = []
    
    for count, file in enumerate(files, 1):
        full_file_path = os.path.join(filepath, file)
        print(f"Processing file: {full_file_path}")
        
        # Add a small delay and re-check file existence
        time.sleep(0.1)
        if not os.path.exists(full_file_path):
            print(f"File not found (after delay): {full_file_path}")
            continue

        try:
            result = sentiment_analyser.analyze_audio(full_file_path)
            sentiment_summary = sentiment_summariser.chat(str(result))
            sentiments.append((result, sentiment_summary))
            chatlog_chat[count-1]['sentiment'] = sentiment_summary
        except Exception as e:
            print(f"Error processing file {full_file_path}: {str(e)}")
            traceback.print_exc()

    return chatlog_chat



In [27]:
# --- Evaluate Candidate --- 
def evaluate_candidate(chatlog_chat):
    ConversationVerifier.process_qa_pair(chatlog_chat)
    print("The Feedback JSON from the sentiment analyser and accuracy verifier: \n")
    pprint(chatlog_chat)
    evaluation = candidate_evaluator.chat(str(chatlog_chat))
    return evaluation



In [28]:
# --- Main Execution Flow ---

chatlog_chat = reformat_chatlog(chatlog)  # Process the loaded chatlog


In [29]:
chatlog_chat = process_sentiments(chatlog_chat, timestamp) # Analyze sentiments


Current working directory: d:\Kent\University Of Kent UK\Projects\Disso\Screening-LLM
Full audio directory path: d:\Kent\University Of Kent UK\Projects\Disso\Screening-LLM\data\interviews\1724532643\audio
File found: audio_1_1724532643.wav
File found: audio_2_1724532643.wav
File found: audio_3_1724532643.wav
File found: audio_4_1724532643.wav
File found: audio_5_1724532643.wav
Processing file: data\interviews\1724532643\audio\audio_1_1724532643.wav
Analyzing audio file: data\interviews\1724532643\audio\audio_1_1724532643.wav
File exists: True
Job submitted successfully
Job completed
[{'source': {'type': 'file', 'filename': 'audio_1_1724532643.wav', 'content_type': 'audio/wav', 'md5sum': '1cb874ab676ba11b5c7b62032fa6e446'}, 'results': {'predictions': [{'file': 'audio_1_1724532643.wav', 'file_type': 'audio', 'models': {'language': {'metadata': {'confidence': 0.5488281, 'detected_language': 'hi'}, 'grouped_predictions': [{'id': 'unknown', 'predictions': [{'text': 'Hello.', 'position': {'b

In [30]:
#chatlog_chat_copy = chatlog_chat[1]
evaluation = evaluate_candidate(chatlog_chat) # Get the final evaluation





attempting to generate searc queries from answers
search queries generated for answer to question: Hello! Thank you for taking the time to speak with me today about the Entry-Level Morning Receptionist position at our research institute. I'd like to ask you a few questions to learn more about your qualifications and interest in the role. Could you start by telling me what attracted you to this position?

queries are: ['1. "Entry-Level Morning Receptionist position quick money experience University of Kent"', '2. "Qualifications for Entry-Level Morning Receptionist position research institute"']


search queries generated for answer to question: I see. Thank you for sharing that. Could you tell me more about your experience as a receptionist at the University of Kent? What were some of your main responsibilities in that role?

queries are: ['1. "Receptionist responsibilities University of Kent complaints handling"', '2. "University of Kent receptionist duties keychain fobs guest access 

FileNotFoundError: [Errno 2] No such file or directory: 'data/interviews/1724532643/outcome/chatlog.json'

In [31]:
chatlog_file_path = f"data/interviews/{timestamp}/outcome/"

if not os.path.exists(chatlog_file_path):
    os.makedirs(chatlog_file_path)

with open(chatlog_file_path, "w") as file:
    json.dump(chatlog_chat+"chatlog.json", file, indent=4)

PermissionError: [Errno 13] Permission denied: 'data/interviews/1724532643/outcome/chatlog.json'

In [None]:
pprint(evaluation) # Print the evaluation result 
evaluation_file_path = f"data/interviews/{timestamp}/outcome/evaluation.txt"
if not os.path.exists(chatlog_file_path):
    os.makedirs(chatlog_file_path)
with open(evaluation_file_path, "w") as file:
    file.write(str(evaluation).replace("\\n", "\n"))



('Based on the provided transcript and additional information, I will evaluate '
 "the candidate's performance and provide a detailed breakdown of their "
 'suitability for the Entry-Level RAG AI Engineer role.\n'
 '\n'
 '1. Technical Knowledge and Experience:\n'
 'The candidate demonstrates a foundational understanding of Python and '
 'machine learning, having transitioned from Java to Python during their time '
 'at the University of Kent. They completed a machine learning engineering '
 'course and developed a dissertation project on an evaluation screening '
 'system. This shows relevant academic experience, but their practical '
 'experience seems limited to about a year.\n'
 '\n'
 'Their knowledge of RAG pipelines appears basic. While they mention using RAG '
 "to retrieve updated data from the internet, they don't provide specifics "
 'about implementation or optimization strategies, which are crucial for this '
 'role.\n'
 '\n'
 'The candidate shows familiarity with various la