## Import Utilities

In [None]:
import re
import os
import csv
import json
import pandas as pd
from datetime import datetime
from openai import OpenAI
import sys

sys.path.append("..")  # Add the parent directory of LLM_Evaluations to the Python path
from llm_evaluation_utils import load_responses_df, \
                        check_and_store_response,   \
                        build_question_prompt,      \
                        QUESTIONS

api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

model_name = "gpt-4o"
max_tokens = 250
temperature = 1

Defined-Functions

In [None]:
def convert_timestamp(timestamp):
    """Converts a Unix timestamp to a datetime object."""
    return datetime.fromtimestamp(timestamp) if timestamp is not None else None

def retrieve_file_name(file_id, client: OpenAI):
    """Retrieve the filename associated with the given file ID."""
    try:
        return client.files.retrieve(file_id).filename
    except Exception as e:
        print(f"Error retrieving file ID {file_id}:", e)
        return ""

def upload_file(file_path, client: OpenAI):
    """Upload a JSONL file and return the file ID from OpenAI's server."""
    try:
        with open(file_path, "rb") as file:
            file_upload_response = client.files.create(
                file=file,
                purpose="batch"
            )
        return file_upload_response
    except Exception as e:
        print(e)
        return None

def create_batch(input_file_id, client: OpenAI):
    """Create a batch request and return the response."""
    try:
        batch_response = client.batches.create(
            input_file_id=input_file_id,
            endpoint="/v1/chat/completions",
            completion_window="24h"
        )
        return batch_response
    except Exception as e:
        print(e)
        return None
    
def check_batch(batch_id, client: OpenAI):
    """Retrieve batch information using the provided batch ID."""
    try:
        return client.batches.retrieve(batch_id)
    except Exception as e:
        print(e)
        return None

def cancel_batch(batch_id, client: OpenAI):
    """Cancel a batch with the provided batch ID and return the cancellation response."""
    try:
        return client.batches.cancel(batch_id)
    except Exception as e:
        print(e)
        return None
    
def extract_batch_num(filename):
    """
    Extract the batch number from the filename. 
    Filename should ends with an integer before the extension.
    Example: `prompts-batch_3.jsonl`
    """
    match = re.search(r'(\d+)\.', filename)
    return int(match.group(1)) if match else None

def prepare_response_file(input_file_name, response_files_dir, responses_file_name="responses.jsonl"):
    """Generate a response file name based on the batch number."""
    batch_num = extract_batch_num(input_file_name)
    if batch_num is not None:
        responses_file_name = f"{responses_file_name.split('.')[0]}-batch_{batch_num}.jsonl"
    responses_file_path = os.path.join(response_files_dir, responses_file_name)
    return responses_file_path, responses_file_name

## Test

In [None]:
import openai
import os

api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI(api_key=api_key)

model_name = "gpt-4o"

transcript = "Cluster headaches are treated in a multi-pronged approach. One, we give medications to try to stop the cluster headaches when the patient has them. These include medications like triptans, which are similarly used in migraine. Then we do a mini-preventive or short-term prophylaxis treatment. This is to try to give some quicker short-term relief in trying to decrease the number of the headaches. Then we use a long-term medication to give a more long-term prevention of the headaches."

# instruction = " ".join([QUESTION_HEAD, QUESTIONS[3], question_tail])
# message = [{"role": "user", "content": f"Instruction {instruction}\nTranscript: {transcript}\nScore:"}]

system = " ".join([QUESTION_HEAD, QUESTION_TAIL])
user = f"Question: {QUESTIONS[3]}\nTranscript: {transcript}"
message = [
            {"role": "system", "content": system},
            {"role": "user", "content": user}
        ]

try:
    response = client.chat.completions.create(
        model=model_name,
        messages=message,
        temperature=1,      # Between 0 and 2
        max_tokens=10,
        # top_p=0.1,        # alter this or termperature, but not both.
    )
    print(response.choices[0].message.content)

except openai.APIError as e:
    print(e)

In [None]:
video_id = "sjH6PE9-MTI"
transcript = "The trigger finger is a condition that involves the tendons of the hand and the fingers. The tendons of the fingers pass through a series of pulleys, much like a fishing line passes through a series of guide wires on a fishing pole. These pulleys give us a mechanical advantage and give us a very strong grip. However, oftentimes the tendons can become inflamed and the pulleys can become thickened. This is what is referred to as a trigger finger. The symptoms of trigger finger consist of pain at the base of the fingers that is often associated with a nodule or a small bump that can be felt and it can oftentimes be associated with catching or locking of the finger. In very severe cases, this does require manual assistance to obtain full extension of the fingers. Trigger finger is often caused by such conditions as rheumatoid arthritis, gout, or diabetes, but oftentimes the cause is undetermined. If you suspect that you may be suffering from trigger finger, the first thing to do is begin taking anti-inflammatory medications such as Aleve or ibuprofen. This will assist with the pain and will also decrease the inflammation around the tendon. You may also try gentle stretching at home, which would consist of locking the affected finger together and slowly stretching it out to try to stretch the tendon and prevent it from locking or triggering. If this does not work, it is advised to see your local hand surgeon, at which point you may receive a corticosteroid injection into the area of the pain at the base of the palm. A corticosteroid injection is extremely effective at decreasing the pain and oftentimes eliminating a trigger finger. If a corticosteroid injection does not work, a small surgery is recommended. For more information, please visit your local hand surgeon or visit www.orlandohandsurgery.com."

transcripts_dir = "../../Getting_Transcripts"
transcripts_file_name = "merged_filtered_videos_transcripts.csv"
responses_dir = "."

responses_df = load_responses_df(transcripts_dir, transcripts_file_name, responses_dir, model_name)
responses_df.drop(columns=["Transcript"], inplace=True)
responses_df["Video ID"] = range(0, len(responses_df))

responses_df.iloc[0, 1:16] = [5, 5, 5, 1, 1, 4, 3, 1, 2, 3, 1, 2, 1, 5, 5]     # Medical Expert 1 scores
# responses_df.iloc[0, 1:16] = [5, 5, 5, 3, 2, 3, 3, 2, 2, 4, 1, 2, 4, 5, 4]     # Medical Expert 2 scores
responses_df.head(2)

In [None]:
requests_limit_per_minute = 500
base_delay = 60.0 / requests_limit_per_minute

temperatures = [1.8,
                1.6,
                1.4,
                1.2,
                1,
                0.8,
                0.6,
                0.4,
                0.2,
]

prompt_templates = {
#     0: """You are a medical expert. Rate the following Transcripts according to the given Question.
# Question: {question}
# {QUESTION_TAIL}
# Transcript: {transcript}""",
    0: """{QUESTION_HEAD}
Question: {question}
{QUESTION_TAIL}
Transcript: {transcript}""",

#     1: """{QUESTION_HEAD} {question} {QUESTION_TAIL}
# Transcript: {transcript}"""
}

In [None]:
import time

index = 1
for temperature in temperatures:

    for template_key, prompt_template in prompt_templates.items():
        print("Template", template_key, "| Temperature:", temperature)
        for question_num, question in enumerate(QUESTIONS, start=1):
            prompt = prompt_template.format(QUESTION_TAIL=QUESTION_TAIL,
                                             QUESTION_HEAD=QUESTION_HEAD,
                                             question=question,
                                             transcript=transcript)
            
            message = [
                # {"role": "system", "content": system},
                {"role": "user", "content": prompt}
            ]
            response = client.chat.completions.create(
                    model=model_name,
                    messages=message,
                    temperature=temperature,      # Between 0 and 2
                    max_tokens=250,
                    # top_p=0.1,        # alter this or termperature, but not both.
                )            
            check_and_store_response(response.choices[0].message.content, responses_df, index, question_num)
            time.sleep(base_delay)
        index += 1
    index += 1

number_of_rows = index
# number_of_rows = len(temp_topk_pairs) * (len(prompt_templates.keys()) + 1)
responses_df.iloc[:,1:16].head(number_of_rows)

In [None]:
number_of_rows = 16
diff_df_abs = abs(responses_df.iloc[0, 1:16] - responses_df.iloc[0:number_of_rows, 1:16])
diff_df = responses_df.iloc[0, 1:16] - responses_df.iloc[0:number_of_rows, 1:16]

selected_data = diff_df.iloc[0:number_of_rows, 0:16]
sum_column = responses_df.iloc[0:number_of_rows, 1:16].sum(axis=1)
sum_column_diff = diff_df.iloc[0:number_of_rows, 1:16].sum(axis=1)
sum_column_abs_diff = diff_df_abs.iloc[0:number_of_rows, 1:16].sum(axis=1)
selected_data["Sum"] = sum_column
selected_data["Difference Sum"] = sum_column_diff
selected_data["Absolute Difference Sum"] = sum_column_abs_diff

selected_data.head(number_of_rows)

In [None]:
selected_data.to_csv('Different prompts and temperature responses with expert 1.csv', index=False)

## Count Tokens

In [None]:
import tiktoken


def count_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
    """Return the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
        }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif "gpt-3.5-turbo" in model:
        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
        return count_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
    elif "gpt-4" in model:
        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
        return count_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

# count_tokens_from_messages(message, model_name)