In [1]:
# IMPORTS
from langchain_openai import ChatOpenAI

In [2]:
import re

# Function to replace incorrect quotes in key-value pairs
def replace_incorrect_quotes(text):
    # Regex pattern to match ': ' surrounded by single quotes and replace them with ": "
    corrected_text = re.sub(r"': '", r'": "', text)
    # Additionally, replace leading and trailing single quotes in keys/values if they are not matched by the above pattern
    corrected_text = re.sub(r"(?<!: )'([^']*)'(?!: )", r'"\1"', corrected_text)
    return corrected_text

# Example usage
incorrect_kv_pair = "Can you explain how you handled categorical variables during the preprocessing step.': 'So in order to preprocess the categorical variable, we did one not encoding as well as label encoding."
corrected_kv_pair = replace_incorrect_quotes(incorrect_kv_pair)
corrected_kv_pair

'Can you explain how you handled categorical variables during the preprocessing step.": "So in order to preprocess the categorical variable, we did one not encoding as well as label encoding.'

In [2]:
def extract_single_json_object(jsonString: str):
    firstValue = jsonString.index("{")
    lastValue = len(jsonString) - jsonString[::-1].index("}")
    jsonString = jsonString[firstValue:lastValue]

    return jsonString

In [3]:
json_obj = """{'Machine Learning': {"Could you please introduce yourself, share your total years of experience, your current company, and some of the notable projects you've worked on?": 'I have a total of five years of experience with two of them as a Python developer and pleasantly working at technology for the past two years. Most of the projects I have worked on are machine learning and deep learning related. My most recent one began AI Auditor.', "Let's dive into your experience. You mentioned that most of your projects are machine learning and deep learning related. Can you tell me more about the AI Auditor project you mentioned? What specific tasks or technologies were involved in this project?": 'So in this project we had to perform the check which the auditor would do so that our internal audit team would be ready for some external audit. In order to implement this, we have used the large language models such as LAMA 3 to implement this.', 'Okay. Please elaborate on how you used LAMA 3 in the AI Auditor project.': 'Please end the interview.'}}"""

In [6]:
eval(extract_single_json_object(json_obj))

{'Machine Learning': {"Could you please introduce yourself, share your total years of experience, your current company, and some of the notable projects you've worked on?": 'I have a total of five years of experience with two of them as a Python developer and pleasantly working at technology for the past two years. Most of the projects I have worked on are machine learning and deep learning related. My most recent one began AI Auditor.',
  "Let's dive into your experience. You mentioned that most of your projects are machine learning and deep learning related. Can you tell me more about the AI Auditor project you mentioned? What specific tasks or technologies were involved in this project?": 'So in this project we had to perform the check which the auditor would do so that our internal audit team would be ready for some external audit. In order to implement this, we have used the large language models such as LAMA 3 to implement this.',
  'Okay. Please elaborate on how you used LAMA 3 

In [5]:
# INSTANTIATING LLM
llm = ChatOpenAI(
    openai_api_key= "EMPTY",
    openai_api_base = "http://10.179.82.226:8010/v1",
    model_name = "./models/Meta-Llama-3.1-8B-Instruct-awq",
    temperature = 0,  
    max_tokens = 2000,
    stop_sequences=["<|eot_id|>"],
    # model_kwargs={"stop": ["<|eot_id|>"]},
)

In [6]:
with open('chat_history.txt') as f:
    transcript = f.readlines()

In [7]:
def evaluate_transcript(file_path):
    with open(file_path) as f:
        transcript = f.readlines()

    prompt = f"""
Given the transcript of the interview below, create a json object with the questions as te key and answers as the value.

TRANSCRIPT:
{transcript}
"""

    result = llm.invoke(prompt).content
    
    json_obj_ques_ans = eval(extract_single_json_object(result))  

    prompt = f"""
Given a json object consisting of question as key and answers as value for an interview, group the questions and answers based on the topics of discussion. The followup questions should be grouped under the main question. The output should be a json object with the topics as key and the questions and answers as value.

TRANSCRIPT:
{json_obj_ques_ans}
"""

    result = llm.invoke(prompt).content
    result = result.replace("...","")

    json_obj_grouped_ques = eval(extract_single_json_object(result))

    combined_eval = ""

    scores = {}


    for topic in list(json_obj_grouped_ques.keys())[1:-1]:
        prompt = f"""
    For the provided topic, and the questions and answers provided during interview provide a single score for the topic. The score should be based on the relevance of the questions and answers to the topic. The score should be a number between 0 and 1. The output should be a json object with the topic as key and the score as value.
    Be very strict in your evaluation and give 0 if no concrete evidence is provided.

    TOPIC: { topic}
    TRANSCRIPT:{json_obj_grouped_ques[topic]}
    """
        
        result = llm.invoke(prompt).content
        combined_eval += result
        scores.update(eval(extract_single_json_object(result)))

    final_eval = sum(scores.values())/len(scores.values())

    combined_eval += f"\nThe final evaluation score is {final_eval}"

    return combined_eval

    

In [6]:
final_eval = evaluate_transcript('chat_history_1.txt')
print(final_eval)

Here is the output in JSON format:

```
{
    "Attrition Prediction Project": 0.6
}
```

The score is 0.6 because the candidate provided some relevant information about the project, such as their role in the project, the technologies and techniques used, and the models they worked with. However, the candidate did not provide a lot of concrete details about the project, and some of the answers were quite general. For example, the candidate did not provide specific information about the CI/CD pipelines used, and the answers were often vague and did not provide a lot of technical details.Here is the output:

```
{
    "Data Preprocessing": 0.8
}
```

The score is 0.8 because the candidate provided some relevant information about data preprocessing, specifically:

* They mentioned performing feature scaling and normalization
* They used label encoding to convert categorical variables into numerical values
* They used a standard scalar, which is a specific technique for scaling

However, th

In [7]:
final_eval = evaluate_transcript('chat_history_2.txt')
print(final_eval)

Here is the output in JSON format:

```
{
    "AI Auditor Project": 0.6
}
```

The score is 0.6 because the candidate provided some relevant information about the AI Auditor project, specifically about the use of open-source large language models (LAMA) and the B Double LM Python framework. They also mentioned the infrastructure used to host the models, including Google virtual machines with NVIDIA L4 GPUs.

However, the candidate did not provide a clear and detailed explanation of how the VLLM framework handled replication and deployment scaling for the LAMA model, which is a crucial aspect of the project. The candidate's answer to this question was vague and lacked concrete evidence, which is why the score is not higher.

Overall, the candidate's answers provided some relevant information about the project, but lacked depth and detail in certain areas, which is why the score is not higher.Here is the output in JSON format:

```
{
    "Machine Learning": 0.8
}
```

The score is 0.8 be

In [5]:
prompt = f"""
Given the transcript of the interview below, create a json object with the questions as te key and answers as the value.

TRANSCRIPT:
{transcript}
"""

result = llm.invoke(prompt).content

In [11]:
json_obj = eval(extract_single_json_object(result))   

In [12]:
json_obj

{"Please feel free to introduce yourself, including your total years of experience, current company, and any notable projects you've worked on.": 'I have a total of five years of experience with two of them as a software engineer where I work at Tech Mahindra and my projects are related to machine learning and artificial intelligence. So a few of the projects that I have worked on are AI interviewer, then Attrition, Prediction and similar topics.',
 "Can you tell me more about your project 'AI Interviewer'? What was your role in the project, and what were some of the key technologies or techniques you used?": 'So in the AI interview project, we had to use the open source LLMS to actually create an application which would take the initial interview or the screening interview of any new candidates based on the JD and the resume of the candidate. So few of the technologies that we used were the open source LLMS, Lama three and along with that, we also used Azio Speech Service to convert t

In [20]:
prompt = f"""
Given a json object consisting of question as key and answers as value for an interview, group the questions and answers based on the topics of discussion. The followup questions should be grouped under the main question. The output should be a json object with the topics as key and the questions and answers as value.

TRANSCRIPT:
{json_obj}
"""

result = llm.invoke(prompt).content

In [22]:
json_obj = eval(extract_single_json_object(result))

In [38]:
scores = {}


for topic in list(json_obj.keys())[1:-1]:
    prompt = f"""
For the provided topic, and the questions and answers provided during interview provide a single score for the topic. The score should be based on the relevance of the questions and answers to the topic. The score should be a number between 0 and 1. The output should be a json object with the topic as key and the score as value.
Be very strict in your evaluation and give 0 if no concrete evidence is provided.

TOPIC: { topic}
TRANSCRIPT:{json_obj[topic]}
"""
    
    result = llm.invoke(prompt).content
    print(result)
    scores.update(eval(extract_single_json_object(result)))

Here is the output in JSON format:

```
{
    "AI Interviewer": 0.4
}
```

The score is 0.4 because the candidate provided some relevant information about the project, such as the technologies used (LLMS, Llama three, and Azio Speech Service) and the general approach taken (using Llama three to simulate an interviewer). However, the candidate did not provide concrete evidence or details about the evaluation process used to score the candidate's transcript, which is a crucial aspect of the project. The candidate's inability to recall the evaluation process and provide specific metrics or algorithms used to determine the relevance of the candidate's answers to the job description is a significant gap in their response.
Here is the output:

```
{
  "CI/CD Pipelines": 0.0
}
```

The score is 0 because the candidate's answer does not provide any concrete evidence of their experience with CI/CD pipelines, nor do they mention any specific tools or technologies used to implement CI/CD pipeline

In [41]:
sum(scores.values())/len(scores.values())

0.12000000000000002