# LLM Loading

In [None]:
# pip install openai

# Benchmarking

In [1]:
import torch
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
DATASET = "FoCus"
SET = "train"           #train,   valid       


COT_SETUP = False

# Determine COT_ value based on COT_SETUP
COT_ = "-COT" if COT_SETUP else ""

LLM = "gpt-4-turbo"  # gpt-3.5-turbo, gpt-4-turbo, gpt-4o-mini

MAX_NEW_TOKEN = 220 if COT_SETUP else 110

## Response Generation

In [3]:
torch.cuda.empty_cache()

# Loading the prompt
df = pd.read_csv(f'./Prompts/{DATASET}-{SET}.csv')

print(df.shape)
df.head(3)

(1500, 3)


Unnamed: 0,personas,context,act_response
0,I want to learn more about the Royal Navy.I ha...,"User1: I know this place, but I dont remember ...",User2: The shit was controversial he scrapped ...
1,I collect lighthouses.Ive never been to to Eng...,"User1: Wow, this is amazing! What is this?\nUs...",User2: Yes! An episode from ChuckleVision and ...
2,I am interested in region.I would like to visi...,User1: Where is this place?\nUser2: This is Ac...,User2: Waterways are essential to the commerci...


In [4]:
# Function to generate a prompt
def create_benchmarking_prompt(personas, context, include_cot=False):

    prompt = (
        "I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.\n"
        "As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.\n\n"
        "Participant Personas:\n"
        f"{personas}\n\n"
        "Conversation Context:\n"
        f"{context}\n\n"
        "Task Instruction:\n"
        "* Provide an unannotated response.\n"
        "* If only one persona is available, personalize the response accordingly.\n"
        "* If the conversation context is a single query, respond appropriately to the query.\n"
    )
    
    if include_cot:
        prompt += (
            "* Apply Chain of Thought reasoning to reflect on the alignment of your response with the personas.\n"
        )

    prompt += (
        "\nOutput Format: only give a JSON of the following format:\n"
        "{\n"
    )
    
    if include_cot:
        prompt += (
            '  "reasoning": "briefly describe your personalization process (in {max_word_count} words or less)."\n'
        )
        
    prompt += (
        '  "response": "provide the personalized natural language response here (in {max_word_count} words or less)."\n'
        "}\n"
    )

    return prompt


In [5]:
first_row = df.iloc[20]
personas = first_row['personas']
context = first_row['context']

# Example usage
prompt = create_benchmarking_prompt(personas, context, include_cot=True)
print(prompt)

I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.
As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.

Participant Personas:
I am going on a vacation trip to Boston, Massachusetts.I love visiting aquariums.I love whales.I like exhibitions about marine animals.I like to see penguins.

Conversation Context:
User1: Where is this place?
User2: This is the New England Aquarium, a public aquarium located in Boston, Massachusetts. As you told me that you are going on vacation there and that you love visiting aquariums, you should visit this place.
User1: Did you say something about a whale-related program outside the park.. What is it about?
User2: The aquarium has a whale watching cruise program; on the tour, observers interact with naturalists and educators at a location 30 miles east of Boston called the Stellwagen Bank N

In [6]:
from dotenv import load_dotenv

load_dotenv()                   # Loading API key from .env file

True

In [7]:
import openai
import os

openai.api_key = os.getenv("OPENAI_API_KEY")

client = openai.OpenAI()

# Generate a response from the model using the updated API
chat_completion = client.chat.completions.create(
    model= LLM, 
    messages=[
        # https://platform.openai.com/docs/guides/json-mode
        {"role": "system", "content": "You are a helpful response generator to output JSON."},
        {"role": "user", "content": prompt}
    ],
    temperature = 0,      # Based on FELM paper (Greedy Setup)
    max_tokens = MAX_NEW_TOKEN      # Based on max response length + reasoning
)

## Retrieve and return the response text
response_text = chat_completion.choices[0].message.content
response_text

'{\n  "reasoning": "Considering the user\'s interest in penguins, the response highlights the penguin exhibit at the New England Aquarium, aligning with their preferences.",\n  "response": "Yes, the New England Aquarium has a wonderful penguin exhibit! You\'ll be able to see different species of penguins, including the playful African penguins and the majestic rockhopper penguins. It\'s a delightful experience for anyone who enjoys watching these charming birds."\n}'

In [8]:
import openai
import time
import os
import json
import pandas as pd
from tqdm import tqdm

# Initialize OpenAI client once
client = openai.OpenAI()

# Result containers
gen_responses = []
reasoning_responses = []
response_times = []

# Ensure debugging log directory exists
log_dir = "./debugging logs"
os.makedirs(log_dir, exist_ok=True)

# Prepare the log file path
log_file_path = os.path.join(log_dir, f'{DATASET}_{LLM}_{COT_}.txt')

# Open the log file for writing
with open(log_file_path, 'w') as log_file:

    for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating responses"):
        personas = row['personas']
        context = row['context']

        prompt = create_benchmarking_prompt(personas, context, COT_SETUP)

        # Measure the start time
        start_time = time.time()

        try:
            # Generate a response from the model
            chat_completion = client.chat.completions.create(
                model=LLM,
                messages=[
                    {"role": "system", "content": "You are a helpful response generator to output JSON."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0,
                max_tokens=MAX_NEW_TOKEN
            )
            end_time = time.time()
            response_time = end_time - start_time

            # Retrieve raw response text
            response_text = chat_completion.choices[0].message.content
            gen_responses.append(response_text)

            # Try parsing just to extract reasoning (optional)
            try:
                response_data = json.loads(response_text)
                if COT_SETUP:
                    reasoning_responses.append(response_data.get('reasoning', ''))
                else:
                    reasoning_responses.append('')
            except json.JSONDecodeError as e:
                error_message = (f"Failed to parse JSON for response at index {index}:\n"
                                 f"{response_text}\n"
                                 f"Error: {str(e)}\n")
                print(error_message)
                log_file.write(error_message + "\n")
                reasoning_responses.append(None if COT_SETUP else '')

            response_times.append(response_time)

        except Exception as e:
            error_message = f"OpenAI API error at index {index}: {str(e)}\n"
            print(error_message)
            log_file.write(error_message + "\n")
            gen_responses.append(None)
            reasoning_responses.append(None if COT_SETUP else '')
            response_times.append(None)

# Create the response DataFrame
if COT_SETUP:
    response_df = pd.DataFrame({
        'gen_response': gen_responses,
        'reasoning': reasoning_responses,
        'response_time': response_times
    })
else:
    response_df = pd.DataFrame({
        'gen_response': gen_responses,
        'response_time': response_times
    })


Generating responses:   7%|▋         | 111/1500 [04:48<1:00:11,  2.60s/it]

Failed to parse JSON for response at index 110:
{
  "response": "Certainly! Valpolicella is not only renowned for its rich winemaking history but also for its beautiful landscapes and cultural heritage. The area is dotted with charming villages and historic estates where you can explore ancient cellars and taste some of the finest wines, including the famous Amarone. The proximity to Lake Garda offers a scenic backdrop and opportunities for leisurely activities like boating and hiking. Visiting Valpolicella will give you a wonderful blend of natural beauty, history, and exquisite wines, making your
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  25%|██▌       | 376/1500 [17:47<1:20:38,  4.30s/it]

Failed to parse JSON for response at index 375:
{
  "response": "Absolutely! The Strawberry Line Trail is a delightful path that follows part of the old railway line. It's perfect for walking and enjoying nature, stretching about 10 miles from Yatton to Cheddar. You'll love the scenic views and the lush greenery, making it a great spot for nature lovers like yourself. Don't forget your insect repellent, though, as the trail can be quite wooded in places. After your walk, you can refresh yourself with some fresh water, just like you enjoy after a mountain
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  32%|███▏      | 481/1500 [23:20<1:01:17,  3.61s/it]

Failed to parse JSON for response at index 480:
{
  "response": "Absolutely! Bryce Canyon National Park is renowned for its stunning red rock formations and expansive vistas. The park offers numerous hiking trails that vary in difficulty, providing opportunities for both casual walks and more challenging hikes. The natural amphitheaters and hoodoos create a breathtaking landscape that changes beautifully with the sunlight, making it a photographer's paradise. Additionally, the park's high elevation results in cooler temperatures, which can be a pleasant escape during the hot summer months. It's a place where you can truly appreciate the beauty and diversity
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  37%|███▋      | 550/1500 [26:39<54:36,  3.45s/it]  

Failed to parse JSON for response at index 549:
{
  "response": "Certainly! Rottnest Island, or Wadjemup, is not only a beautiful destination with its clear waters and scenic views but also rich in history and culture. It's a car-free zone, which makes it a peaceful retreat from the bustling city life. You can explore the island by bike, which is a popular way to see the sights. The island also offers a chance to see the adorable quokkas, small marsupials that are native to the island. With your interests in swimming and the No
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  45%|████▍     | 674/1500 [32:35<54:40,  3.97s/it]  

Failed to parse JSON for response at index 673:
{
  "response": "Yes, one remarkable aspect of the Mosque of Sultan al-Muayyad is its location. It was built over the ruins of a prison, next to the famous Bab Zuweila gate, which is one of the gates in the old city walls of Cairo. The mosque's minarets offer a panoramic view of the city, which is quite unique. Additionally, the mosque is noted for its intricate Mamluk architecture, featuring beautiful domes and minarets that reflect the construction styles of the 15
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  65%|██████▍   | 969/1500 [44:57<25:30,  2.88s/it]  

Failed to parse JSON for response at index 968:
{
  "response": "Another interesting fact about the Memorial to the Murdered Jews of Europe is that it consists of 2,711 concrete slabs arranged in a grid pattern on a sloping field, which aims to create a confusing and uneasy atmosphere as you walk through it, reflecting the disorienting and isolating effects of the Holocaust. Additionally, the architect, Peter Eisenman, intended for the design to be abstract to allow visitors to interpret it in their own ways, which ties into the broader goal of encouraging reflection and remembrance."

Error: Expecting ',' delimiter: line 3 column 1 (char 544)



Generating responses:  68%|██████▊   | 1021/1500 [46:54<19:27,  2.44s/it]

Failed to parse JSON for response at index 1020:
{
  "response": "To reach the Sarala Temple, you can use various means of transport depending on where you're starting from in India. If you're flying in, the nearest airport is Bhubaneswar Airport. From there, you can hire a taxi or take a bus to Jagatsinghpur, and then local transport to the temple. It's an excellent way to immerse yourself in the local culture and observe the daily life of the people, which aligns well with your interest in getting to know the local culture in
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  81%|████████  | 1213/1500 [55:17<12:26,  2.60s/it]

Failed to parse JSON for response at index 1212:
{
  "response": "The Jesus Boat, also known as the Ancient Galilee Boat, is an ancient fishing boat from the 1st century AD, discovered in 1986 on the northwest shore of the Sea of Galilee. It's believed to be the type of boat that Jesus and his disciples might have used. The boat is now preserved and displayed at the Yigal Allon Museum in Kibbutz Ginosar, and it offers a fascinating glimpse into the maritime life of the time. Visiting it could be a
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  88%|████████▊ | 1324/1500 [1:00:27<12:18,  4.20s/it]

Failed to parse JSON for response at index 1323:
{
  "response": "Certainly! Pulau Ubin is a treasure trove of natural beauty and history. It's a great place for nature enthusiasts like yourself to explore. The island's rustic charm is preserved through its traditional kampongs and untouched forests. Besides its rich biodiversity, Ubin is also culturally significant, with historical sites like the restored Tudor-style House No. 1 and the Chek Jawa wetlands, which are a hotspot for biodiversity. It's a perfect spot for someone interested in both the natural environment and the
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses:  93%|█████████▎| 1395/1500 [1:04:22<05:23,  3.08s/it]

Failed to parse JSON for response at index 1394:
{
  "response": "The architecture of Angkor Wat is a stunning example of classical Khmer architecture, known for its grand scale and intricate detail. The temple's design reflects the traditional Khmer idea of the temple mountain, where the central tower symbolizes Mount Meru, the center of the universe in Hindu and Buddhist cosmology. The five towers are arranged in a quincunx pattern, which is a distinctive feature. The bas-reliefs and carvings throughout the temple complex depict various mythological scenes and figures, adding to its
Error: Unterminated string starting at: line 2 column 15 (char 16)



Generating responses: 100%|██████████| 1500/1500 [1:09:40<00:00,  2.79s/it]


In [9]:
print("\nMissing Values:\n", response_df.isnull().sum())


Missing Values:
 gen_response     0
response_time    0
dtype: int64


In [11]:
## Save the response DataFrame to a CSV and Excel file
response_df.to_csv(f'./Raw Responses/{DATASET}/{LLM}-train{COT_}.csv', index = False)

# Post Processing

In [None]:
# import re
# import os
# import pandas as pd

# # Construct the path to your log file
# log_file_path = os.path.join('./debugging logs', f'{DATASET}_{LLM}_{COT_}.txt')

# # Read the log file
# with open(log_file_path, 'r') as f:
#     log_content = f.read()
    
# log_content

'Failed to parse JSON for response at index 74: {\n  "response": "Malappuram has a diverse demographic profile. The population primarily consists of Malayalis, with a significant presence of various religious communities, including Hindus, Muslims, and Christians. The literacy rate is quite high, reflecting the emphasis on education in the region. The city is known for its rich cultural heritage, influenced by its historical significance and the coexistence of different communities. This diversity adds to the unique character of Malappuram, making it an interesting place to explore, especially for someone like you who enjoys researching historic figures\nError: Unterminated string starting at: line 2 column 15 (char 16)\n\nFailed to parse JSON for response at index 95: {\n  "response": "Fort Corcoran played a significant role during the Civil War, serving as a strategic point for the Union Army. It was part of a series of fortifications built to protect Washington D.C. from Confederate

In [None]:
# # Updated pattern based on your actual file content
# pattern = r"Failed to parse JSON for response at index (\d+):\s*\{(.*?)\nError:"

# # Find all matches
# matches = re.findall(pattern, log_content, re.DOTALL)

# # Create DataFrame
# recovered_df = pd.DataFrame([
#     {"index": int(index), "gen_response": response.strip()}
#     for index, response in matches
# ])

# # Sort and preview
# recovered_df.sort_values("index", inplace=True)
# recovered_df.reset_index(drop=True, inplace=True)

# print(recovered_df.shape)
# recovered_df.head()

(14, 2)


Unnamed: 0,index,gen_response
0,74,"""response"": ""Malappuram has a diverse demograp..."
1,95,"""response"": ""Fort Corcoran played a significan..."
2,462,"""response"": ""In addition to the stunning biodi..."
3,480,"""response"": ""Bryce Canyon National Park is tru..."
4,600,"""response"": ""One of the most notable landmarks..."


In [5]:
import pandas as pd

response_df = pd.read_csv(f'Raw Responses/{DATASET}/{LLM}-train{COT_}.csv')

print("\nMissing Values:\n", response_df.isnull().sum())


Missing Values:
 gen_response     14
response_time     0
dtype: int64


In [None]:
# # Ensure index column is the same type
# recovered_df['index'] = recovered_df['index'].astype(int)

# # Iterate through recovered responses and update response_df
# for _, row in recovered_df.iterrows():
#     idx = row['index']
#     new_response = row['gen_response']
    
#     # Only update if response is missing or empty
#     if idx < len(response_df):
#         current_response = response_df.loc[idx, 'gen_response']
#         if pd.isna(current_response) or current_response == '':
#             response_df.loc[idx, 'gen_response'] = new_response


In [7]:
ds = response_df
print("Shape:", ds.shape)

print("\nMissing Values:\n", ds.isnull().sum())

Shape: (1500, 2)

Missing Values:
 gen_response     0
response_time    0
dtype: int64


In [8]:
import re
import json
import pandas as pd

# Define functions
def find_first_valid_json(text):
    if not isinstance(text, str) or text.strip() == "":
        return None  # skip if not a string or empty string
    
    json_objects = re.findall(r'\{.*?\}', text, re.DOTALL)
    for obj in json_objects:
        try:
            json_obj = json.loads(obj)
            if "response" in json_obj:  # Only check for "response"
                return json_obj
        except json.JSONDecodeError:
            continue
    return None

def get_response(text):
    if text is not None:  # Check if text is not None
        try:
            return text['response']
        except (ValueError, SyntaxError, KeyError):
            return None
    return None


# Replace empty strings in 'gen_response' with None
ds.loc[ds['gen_response'] == '', 'gen_response'] = None

# Apply the find_first_valid_json function
ds['gen_response'] = ds['gen_response'].apply(lambda x: find_first_valid_json(x))

# Convert gen_response to None if it's not a valid string
ds['gen_response'] = ds['gen_response'].apply(lambda x: None if pd.isna(x) or x == 'nan' or isinstance(x, float) else x)

# Extract 'response' from the JSON objects
ds['gen_response'] = ds['gen_response'].apply(lambda x: get_response(x))

# Keep the 'response_time' column unchanged
ds['response_time'] = ds['response_time']

# Define the new column order
new_column_order = ['gen_response', 'response_time']

# Reorder the columns
ds = ds[new_column_order]

print("\nMissing Values:\n", ds.isnull().sum())
print(ds.shape)
ds



Missing Values:
 gen_response     1500
response_time       0
dtype: int64
(1500, 2)


Unnamed: 0,gen_response,response_time
0,,1.257862
1,,1.235367
2,,1.664207
3,,1.642170
4,,2.650969
...,...,...
1495,,1.039717
1496,,1.548410
1497,,1.639071
1498,,1.485940


In [9]:
ds.to_csv(f'Responses/{DATASET}/{LLM}-train{COT_}.csv', index=False)