In [63]:
from datasets import load_dataset
from openai import OpenAI
import json

import csv
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt


In [64]:
client = OpenAI(
    api_key="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
)

def generate_cluster(selected_texts, instruction):
  response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
      {"role": "system", "content": instruction},
      {"role": "user", "content": f"{list(selected_texts)}"}
    ]
  )

  result = response.choices[0].message.content
  return result

In [65]:
def extract_final_answer(answer: str):       
    if not answer:
        return "<INVALID>"

    model_pred = answer.lower()
    preds = model_pred.split("<ans_start>")

    pred = preds[-1].split("<ans_end>")[0].strip()

    if len(pred) == 0:
        return "<INVALID>"

    return pred

In [66]:
dataset = load_dataset("clinc_oos", "small")

test_split = dataset["test"]
texts = test_split["text"]
intents = test_split["intent"]

# Filter out intent 42
filtered_pairs = [(t, i) for (t, i) in zip(texts, intents) if i != 42]
filtered_texts, filtered_intents = zip(*filtered_pairs)

In [67]:
# Initialize an empty list to store the results
selected_pairs = [(text, intent) for text, intent in filtered_pairs if 0 <= intent <= 7]
selected_texts, selected_intents = zip(*selected_pairs)

n = len(selected_texts)
k = len(set(selected_intents))

In [68]:
# Load prompt
with open('prompt_template.json', 'r', encoding="utf-8") as file:
    prompt_template = json.load(file)

for prompt in prompt_template.keys():
    prompt_template[prompt] = prompt_template[prompt].replace("{n}", str(n)).replace("{k}", str(k))

prompt_template

{'fewshot': "You are given a dataset of 240 sentences which you need to cluster into one of the 8 clusters. Output exactly 240 cluster labels.\nFor each sentence, assignment it to one of the 8 cluster label and output the cluster number. Your output should ONLY contain a list of 240 integers in the format <ANS_START>[cluster asignments]<ANS_END>. Do not include any other texts.\n  \nExample:\nInput Sentences: ['sentence1', 'sentence2', 'sentence3']\nOutput Labels: [1, 0, 2]\n \n\n\n[Question] ['create a playlist of my favorite songs', 'find the nearest Thai restaurant', 'which team won the world series in 2020', 'how many calories are burnt in a 30-minute run', 'can you show me my calendar for today', 'what is the current exchange rate for usd to eur', 'write a report on the latest tech trends', 'locate a public library near me', 'turn on the AC to 70 degrees', 'what ingredients are needed for chocolate cake', 'display my latest photos', 'how to perform CPR', 'suggest a workout plan fo

In [69]:
for prompt in prompt_template.keys():
    results = []
    instruction = prompt_template[prompt]
    print(f"#### Running with prompt - {prompt}\n")
    with open(f'clustering_result/clinc_prompt/prompting_results_{prompt}.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for i in tqdm(range(0, 50)):
            try:
                result = generate_cluster(selected_texts, instruction)
            except:
                print("GPT Error")
                break
            try:
                processed_result = extract_final_answer(result)         # Extract the final answer from the result
            except:
                print("INVALID OUTPUT")
                print(result)
                break
            label_count = len(processed_result[1:-1].split(", "))         # Count the number of labels in the processed result
            writer.writerow([i, label_count, processed_result])
            results.append({'Iteration': i, 'Label Count': label_count, 'Processed Result': processed_result})
            
    # Convert the results to a DataFrame
    df_results = pd.DataFrame(results)
    df_label_counts = pd.read_csv('prompting_label_counts.csv')
    df_label_counts[f"{prompt}"] = df_results["Label Count"]

    df_label_counts.to_csv('prompting_label_counts.csv', index=False)

#### Running with prompt - fewshot



100%|██████████| 50/50 [08:24<00:00, 10.09s/it]


#### Running with prompt - cot



100%|██████████| 50/50 [07:58<00:00,  9.56s/it]


#### Running with prompt - pw_w_reasoning



100%|██████████| 50/50 [09:10<00:00, 11.00s/it]
