## Import Utilities

In [1]:
import google.generativeai as genai
import pandas as pd
import json
import time
import os
import sys
sys.path.append("..")  # Add the parent directory of LLM_Evaluations to the Python path
from llm_evaluation_utils import load_responses_df, \
                        check_and_store_response,    \
                        build_question_prompt,      \
                        QUESTIONS, QUESTION_HEAD, QUESTION_TAIL

api_key = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]

generation_config = {
  "temperature": 0.8,
  "top_p": 1,
  "top_k": 8,
  "max_output_tokens": 250,
}

model_name = "gemini-1.5-pro-latest"
model = genai.GenerativeModel(model_name=model_name,
                              safety_settings=safety_settings,
                            #   system_instruction=system,          # only in 1.5-pro
                              generation_config=generation_config)

## Test

### Prepare DataFrame

In [None]:
video_id = "sjH6PE9-MTI"
transcript = "The trigger finger is a condition that involves the tendons of the hand and the fingers. The tendons of the fingers pass through a series of pulleys, much like a fishing line passes through a series of guide wires on a fishing pole. These pulleys give us a mechanical advantage and give us a very strong grip. However, oftentimes the tendons can become inflamed and the pulleys can become thickened. This is what is referred to as a trigger finger. The symptoms of trigger finger consist of pain at the base of the fingers that is often associated with a nodule or a small bump that can be felt and it can oftentimes be associated with catching or locking of the finger. In very severe cases, this does require manual assistance to obtain full extension of the fingers. Trigger finger is often caused by such conditions as rheumatoid arthritis, gout, or diabetes, but oftentimes the cause is undetermined. If you suspect that you may be suffering from trigger finger, the first thing to do is begin taking anti-inflammatory medications such as Aleve or ibuprofen. This will assist with the pain and will also decrease the inflammation around the tendon. You may also try gentle stretching at home, which would consist of locking the affected finger together and slowly stretching it out to try to stretch the tendon and prevent it from locking or triggering. If this does not work, it is advised to see your local hand surgeon, at which point you may receive a corticosteroid injection into the area of the pain at the base of the palm. A corticosteroid injection is extremely effective at decreasing the pain and oftentimes eliminating a trigger finger. If a corticosteroid injection does not work, a small surgery is recommended. For more information, please visit your local hand surgeon or visit www.orlandohandsurgery.com."

transcripts_dir = "../../Getting_Transcripts"
transcripts_file_name = "merged_filtered_videos_transcripts.csv"
responses_dir = "."

responses_df = load_responses_df(transcripts_dir, transcripts_file_name, responses_dir, model_name)
responses_df.drop(columns=["Transcript"], inplace=True)
responses_df["Video ID"] = range(0, len(responses_df))

# responses_df.iloc[0, 1:16] = [5, 5, 5, 1, 1, 4, 3, 1, 2, 3, 1, 2, 1, 5, 5]     # Medical Expert 1 scores
responses_df.iloc[0, 1:16] = [5, 5, 5, 3, 2, 3, 3, 2, 2, 4, 1, 2, 4, 5, 4]     # Medical Expert 2 scores
responses_df.head(2)

### Prepare different model parameters

In [None]:
def configure_model(temperature, top_k):
    generation_config = {
    "temperature": temperature,
    "top_p": 1,
    "top_k": top_k,
    "max_output_tokens": 250,
    }

    return genai.GenerativeModel(model_name=model_name,
                                safety_settings=safety_settings,
                                generation_config=generation_config)

requests_limit_per_minute = 15
base_delay = 60.0 / requests_limit_per_minute

temp_topk_pairs = [(0.8, 10), 
                   (0.8, 8), 
                   (0.7, 10), 
                   (0.7, 8), 
                   (0.6, 10), 
                   (0.6, 8), 
                   (0.5, 10), 
                   (0.5, 8), 
]

prompt_templates = {
    0: """You are a medical expert. Rate the following Transcripts according to the given Question. {QUESTION_TAIL}
Question: {question}
Transcript: {transcript}""",

    1: """{QUESTION_HEAD} {question} {QUESTION_TAIL}
Transcript: {transcript}"""
}

### Testing three prompt templates

In [None]:
index = 1
for temperature, top_k in temp_topk_pairs:
    model = configure_model(temperature, top_k)

    for template_key, prompt_template in prompt_templates.items():
        print("Template", template_key, "| Temperature:", temperature, "| Top_k:", top_k)
        for question_num, question in enumerate(QUESTIONS, start=1):
            prompt = prompt_template.format(QUESTION_TAIL=QUESTION_TAIL,
                                             QUESTION_HEAD=QUESTION_HEAD,
                                             question=question,
                                             transcript=transcript)
            response = model.generate_content(prompt)
            check_and_store_response(response.text, responses_df, index, question_num)
            time.sleep(base_delay)
        index += 1
    index += 1

number_of_rows = index
# number_of_rows = len(temp_topk_pairs) * (len(prompt_templates.keys()) + 1)
responses_df.iloc[:,1:16].head(number_of_rows)

### Comparing Scores

In [None]:
diff_df_abs = abs(responses_df.iloc[0, 1:16] - responses_df.iloc[0:number_of_rows, 1:16])
diff_df = responses_df.iloc[0, 1:16] - responses_df.iloc[0:number_of_rows, 1:16]

selected_data = diff_df.iloc[0:number_of_rows, 0:16]
sum_column = responses_df.iloc[0:number_of_rows, 1:16].sum(axis=1)
sum_column_diff = diff_df.iloc[0:number_of_rows, 1:16].sum(axis=1)
sum_column_abs_diff = diff_df_abs.iloc[0:number_of_rows, 1:16].sum(axis=1)
selected_data["Sum"] = sum_column
selected_data["Difference Sum"] = sum_column_diff
selected_data["Absolute Difference Sum"] = sum_column_abs_diff

selected_data.head(number_of_rows)

In [None]:
selected_data.to_csv('Different prompts and temperature responses with expert 2.csv', index=False)

## Gemini Evaluation

In [None]:
experts_file = "../../../Videos_and_DISCERN_data/filtered_experts_scores.csv"
experts_df = pd.read_csv(experts_file)

responses_df.insert(2, "Topic", experts_df["Topic"])
responses_df = responses_df[responses_df['Topic'].isin(["Nocturnal Enuresis", "Delayed Ejaculation"])]
col_to_drop = [f"Q{i}" for i in range(6,16)] + [f"Response_{i}" for i in range(6,16)]
responses_df.drop(columns=col_to_drop, inplace=True)
print("responses_df shape:", responses_df.shape)
responses_df.head(2)

In [None]:
for idx, row in responses_df.iterrows():
    video_id = row['Video ID']
    for question_num in range(1, 6):
        response = row[f"Response_{question_num}"]
        if pd.notna(response):
            print(video_id, question_num)
            check_and_store_response(response, responses_df, video_id, question_num, rating_scale=1)

OWtlKJbg_Pc 1
OWtlKJbg_Pc 2
OWtlKJbg_Pc 3
OWtlKJbg_Pc 4
OWtlKJbg_Pc 5
95T3ABnnhOs 1
95T3ABnnhOs 2
95T3ABnnhOs 3
95T3ABnnhOs 4
95T3ABnnhOs 5
n8kmsN0em90 1
n8kmsN0em90 2
n8kmsN0em90 3
n8kmsN0em90 4
n8kmsN0em90 5
XFitRQ9S68Q 1
XFitRQ9S68Q 2
XFitRQ9S68Q 3
XFitRQ9S68Q 4
XFitRQ9S68Q 5
qhNV0YhvaNQ 1
qhNV0YhvaNQ 2
qhNV0YhvaNQ 3
qhNV0YhvaNQ 4
qhNV0YhvaNQ 5
McNoYWn4RVs 1
McNoYWn4RVs 2
McNoYWn4RVs 3
McNoYWn4RVs 4
McNoYWn4RVs 5
AGQUSK7YTx4 1
AGQUSK7YTx4 2
AGQUSK7YTx4 3
AGQUSK7YTx4 4
AGQUSK7YTx4 5
PY5nb3RYDWg 1
PY5nb3RYDWg 2
PY5nb3RYDWg 3
PY5nb3RYDWg 4
PY5nb3RYDWg 5
S-hGcEjpcJ8 1
S-hGcEjpcJ8 2
S-hGcEjpcJ8 3
S-hGcEjpcJ8 4
S-hGcEjpcJ8 5
ouUdrm-LlUc 1
ouUdrm-LlUc 2
ouUdrm-LlUc 3
ouUdrm-LlUc 4
ouUdrm-LlUc 5
A-aSE0mMsRk 1
A-aSE0mMsRk 2
A-aSE0mMsRk 3
A-aSE0mMsRk 4
A-aSE0mMsRk 5
UAuZte7iYM8 1
UAuZte7iYM8 2
UAuZte7iYM8 3
UAuZte7iYM8 4
UAuZte7iYM8 5
gQBt-EP8gNs 1
gQBt-EP8gNs 2
gQBt-EP8gNs 3
gQBt-EP8gNs 4
gQBt-EP8gNs 5
363ObJffZQc 1
363ObJffZQc 2
363ObJffZQc 3
363ObJffZQc 4
363ObJffZQc 5
hS_CdE_yJDk 1
hS_CdE