### Import Modules and Set up Keys

In [2]:
from google import genai
from openai import OpenAI
import os, requests, warnings, glob, re, sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore') 

my_gemini_api_key = os.environ["GOOGLE_API_KEY"]
my_gemini_model   = "gemini-1.5-flash"

OpenAI.api_key    = os.environ["OPENAI_API_KEY"]
openai_client     = OpenAI(api_key = OpenAI.api_key) 
headers           = {"Content-Type": "application/json","Authorization": f"Bearer {OpenAI.api_key}"}
my_gpt_model      = "gpt-4o-mini"

def extract_first_integer(text):
  """Extracts the first integer from a string or returns the integer if already an int."""
  if isinstance(text, int):
    return text
  if isinstance(text, str):
    match = re.search(r'\d+', text)
    if match:
      return int(match.group(0))
  return None

### 1 Define Slices, Paths, and Instruction Prompts 

In [3]:
slices = [1.0, 0.75, 0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.01]

speech_transcript_folder = '../data/02_transcripts_corrected/'
speech_appendix          = '_cut_corrected.txt'
output_folder            = '../data/05_ratings/'

# Prompts
# Prompt 1: Direct and Minimalist
# Prompt 2: Emphasizing Holistic Quality
# Prompt 3: Neutral and Instructional
# Prompt 4: Framing as an Expert Evaluation
# Prompt 5: Avoiding Explicit Criteria While Keeping the Task Clear
instruct_prompt_list = ["Here is a transcript from a public presentation on a science/research topic. Please rate the speech quality on a scale from 1 (worst) to 10 (best). Consider factors such as clarity, engagement, and how easy it is to follow. Return only the single rating number as a plain integer, with no other text or characters. Here is the speech text: ",
                        "You will receive a transcript of a science/research presentation. Rate the overall rhetorical quality on a scale from 1 (worst) to 10 (best), considering clarity, engagement, structure, and delivery. Return only the single rating number as a plain integer, with no other text or characters. Here is the speech text: ",
                        "Given the following transcript of a science/research presentation, assess its overall speech quality. Focus on aspects such as clarity, engagement, and coherence. Provide only a single numerical rating from 1 (worst) to 10 (best), without any additional text. Here is the speech text: ",
                        "Imagine you are an expert in public speaking evaluation. Below is a transcript from a science/research presentation. Please rate the effectiveness of the speech on a scale of 1 (worst) to 10 (best) based on clarity, engagement, and ease of understanding. Return only the single rating number as a plain integer, with no other text or characters. Here is the speech text: ",
                        "Please evaluate the following transcript of a public science/research presentation. Assign a quality rating from 1 (worst) to 10 (best) based on your assessment. Return only a single rating number as a plain integer, with no other text or characters. Here is the speech text: "]
num_instruction_prompts = len(instruct_prompt_list)

### 2 Subjects

In [10]:
df           = pd.read_csv('../data/list_of_subjects.csv')
speech_names = list(df.name.values)
num_speeches = len(speech_names)
print(num_speeches)
speech_names[:3]

128


['SUB001_SPEECH001', 'SUB002_SPEECH001', 'SUB003_SPEECH001']

### 3 Concept Prompt and Execution

In [6]:
# loop over slices
for curr_slice in slices: 
    print('Current Slice Length:')
    print(curr_slice)

    # loop over different prompting strategies
    for curr_instruct_prompt_index in range(num_instruction_prompts):
        #print('Current Instruction Prompt:')
        #print(curr_instruct_prompt_index)
        curr_instruct_prompt_text = instruct_prompt_list[curr_instruct_prompt_index]
    
        # loop over the individual speeches
        gemini_names, gemini_ratings, openai_names, openai_ratings = [], [], [], []
        for curr_speech_name in speech_names:#[:1]:
            #print(curr_speech_name)
    
            # read in the speech text and limit to the part defined by the slice length
            input_text_file = speech_transcript_folder + curr_speech_name + speech_appendix
            with open(input_text_file, "r", encoding="utf-8") as file:
                 content = file.read()
            index = int(len(content)* curr_slice)  #this is the part where the text corresponding to the subslice is limited/selected
            part_of_speech = content[0:index]    

            # create the full prompt to be submitted and submit to LLM
            full_prompt = curr_instruct_prompt_text + part_of_speech
                
            # Google Gemini     
            client = genai.Client(api_key=my_gemini_api_key)
            response = client.models.generate_content(model=my_gemini_model, contents=full_prompt)
            llm_response_gemini =int(response.text[0])  
            gemini_names.append( str(curr_speech_name)) 
            gemini_ratings.append(int(extract_first_integer(llm_response_gemini)))

            # OpenAI GPT4o mini
            payload = {"model": my_gpt_model, "messages": [{ "role": "user", "content": [ { "type": "text", "text": full_prompt  },]}],}
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
            llm_response_openai  = response.json()['choices'][0]['message']['content']
            openai_names.append( str(curr_speech_name)) 
            openai_ratings.append(int(extract_first_integer(llm_response_openai)))
        
    
        # convert results to df and save
        df = pd.DataFrame({"name": gemini_names, "rating": gemini_ratings})        
        out_name = output_folder + "corrected_transcripts_gemini1.5flash_slice#" + str(curr_slice) + "_prompt#"  + str(curr_instruct_prompt_index + 1) +  "_rating.csv"
        df.to_csv(out_name, index=False)

        df = pd.DataFrame({"name": openai_names, "rating": openai_ratings})        
        out_name = output_folder + "corrected_transcripts_gpt4omini_slice#" + str(curr_slice) + "_prompt#"  + str(curr_instruct_prompt_index + 1) +  "_rating.csv"
        df.to_csv(out_name, index=False)

Current Slice Length:
1.0
Current Slice Length:
0.75
Current Slice Length:
0.5
Current Slice Length:
0.4
Current Slice Length:
0.3
Current Slice Length:
0.2
Current Slice Length:
0.1
Current Slice Length:
0.05
Current Slice Length:
0.01
