In [1]:
from datasets import load_dataset
from openai import OpenAI
import httpx
import json
import os
import csv
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
client = OpenAI(
    api_key="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
)

def generate_cluster(selected_texts, instruction):
  response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
      {"role": "system", "content": instruction},
      {"role": "user", "content": f"{list(selected_texts)}"}
    ]
  )

  result = response.choices[0].message.content
  return result

In [3]:
def extract_final_answer(answer: str):       
    if not answer:
        return "<INVALID>"

    model_pred = answer.lower()
    preds = model_pred.split("<ans_start>")

    pred = preds[-1].split("<ans_end>")[0].strip()

    if len(pred) == 0:
        return "<INVALID>"

    return pred

In [36]:
# Load data from banking77_sent2label.json
with open('dataset/hwu64_sent2label.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Extract texts and cluster labels
texts = list(data.keys())
cluster_labels = list(data.values())

# Print the extracted texts and cluster labels
print(texts)
print(cluster_labels)

['Siri, shuffle playlist', 'repeat song', 'Repeat the music', 'Please, put radio in shuffle.', 'repeat music', 'Please turn off shuffle setting', 'repeat this song', 'Lower battery consumption to save setting.', 'repeat jazz', 'repeat same song for 10 times', 'play only particular singer songs', 'Show current track in music player.', 'Save settings', 'shuffle music by aaron carter', 'repeat song no 10 from main list', "Please lower the shuffle rate of hips don't lie.", 'please go to settings of the music player and make the equalizer in the flat sound mode.', 'Please check my playlists for jazz music', "Please program volume settings for 16 when I'm in the room at 14 for when I've gone to bed.", 'REPLAY THE MUSICS', 'open music player settings', 'Please proceed to the next available rock song', 'Music change', 'please would you say again.', "please could you try what you've said once more.", 'could you say that one more time please.', 'could you tell me it once more please.', 'please s

In [37]:
n = len(texts)
k = len(set(cluster_labels))

In [7]:
# Load prompt
with open('prompt_template.json', 'r', encoding="utf-8") as file:
    prompt_template = json.load(file)

for prompt in prompt_template.keys():
    prompt_template[prompt] = prompt_template[prompt].replace("{n}", str(n)).replace("{k}", str(k))

prompt_template

{'vanilla': "You are given a dataset of 240 sentences which you need to cluster into one of the 8 clusters. Output exactly 240 cluster labels.\nFor each sentence, assignment it to one of the 8 cluster label and output the cluster number. Your output should ONLY contain a list of 240 integers in the format <ANS_START>[cluster asignments]<ANS_END>. Do not include any other texts.\n  \nExample:\nInput Sentences: ['sentence1', 'sentence2', 'sentence3']\nOutput Labels: [1, 0, 2]\n",
 'fewshot': "You are given a dataset of 240 sentences which you need to cluster into one of the 8 clusters. Output exactly 240 cluster labels.\nFor each sentence, assignment it to one of the 8 cluster label and output the cluster number. Your output should ONLY contain a list of 240 integers in the format <ANS_START>[cluster asignments]<ANS_END>. Do not include any other texts.\n  \nExample:\nInput Sentences: ['sentence1', 'sentence2', 'sentence3']\nOutput Labels: [1, 0, 2]\n \n\n\n[Question] ['create a playlist

In [None]:
for prompt in prompt_template.keys():
    results = []
    instruction = prompt_template[prompt]
    print(f"#### Running with prompt - {prompt}\n")
    with open(f'clustering_result/hwu64_prompt/prompting_results_{prompt}_hwu64.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for i in tqdm(range(0, 50)):
            try:
                result = generate_cluster(texts, instruction)
            except Exception as e:
                print(f"GPT Error: {e}")
                break
            try:
                processed_result = extract_final_answer(result)         # Extract the final answer from the result
            except:
                print("INVALID OUTPUT")
                print(result)
                break
            label_count = len(processed_result[1:-1].split(", "))         # Count the number of labels in the processed result
            writer.writerow([i, label_count, processed_result])
            results.append({'Iteration': i, 'Label Count': label_count, 'Processed Result': processed_result})
            
    # Convert the results to a DataFrame
    # df_results = pd.DataFrame(results)
    # df_label_counts = pd.read_csv('prompting_label_counts_hwu64.csv')
    # df_label_counts[f"{prompt}"] = df_results["Label Count"]

    # df_label_counts.to_csv('prompting_label_counts_hwu64.csv', index=False)

In [30]:
hwu64_count = pd.read_csv("clustering_result/count_statistics/prompting_label_counts_hwu64.csv")

for prompt in prompt_template.keys():
    df = pd.read_csv(f'clustering_result/hwu64_prompt/prompting_results_{prompt}_hwu64.csv')
    df.columns = ['Index', 'Label Count', 'Cluster Assignment']
    for i, row in df.iterrows():
        df.at[i, 'Index'] = int(i)+1
        cluster = [x.strip() for x in row[2][1:-1].split(",")]
        df.at[i, 'Label Count'] = int(len(cluster))
    df['Label Count'] = df['Label Count'].astype(int)
    df.to_csv(f'clustering_result/hwu64_prompt/prompting_results_{prompt}_hwu64.csv', index=None)
    hwu64_count[f"{prompt}"] = df['Label Count']



  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]


In [31]:
threshold = 240

counts = {}

for col in hwu64_count.columns:
    counts[col] = {
        'less than': (hwu64_count[col] < threshold).sum(),
        'equal to': (hwu64_count[col] == threshold).sum(),
        'greater than': (hwu64_count[col] > threshold).sum()
    }

counts_df = pd.DataFrame(counts).T
counts_df

Unnamed: 0,less than,equal to,greater than
vanilla,12,1,37
cot,7,0,43
fewshot,19,2,29
pw_wo_reasoning,0,1,49
pw_w_reasoning,6,0,44


In [32]:
for prompt in prompt_template.keys():
    df = pd.read_csv(f'clustering_result/hwu64_prompt/prompting_results_{prompt}_hwu64.csv')
    df.columns = ['Index', 'Label Count', 'Cluster Assignment']
    counter = 0
    for i, row in df.iterrows():
        try:
            cluster = [x.strip() for x in row[2][1:-1].split(",")]
            labels = list(map(int, cluster))
        except:
            print(i)
        if len(set(labels)) < 8:
            counter += 1
    print(f"{prompt}: {counter}")

vanilla: 20
fewshot: 16
cot: 12
pw_wo_reasoning: 17
pw_w_reasoning: 27


  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]
  cluster = [x.strip() for x in row[2][1:-1].split(",")]


In [38]:
from sklearn.metrics.cluster import normalized_mutual_info_score

for prompt in prompt_template.keys():
    df = pd.read_csv(f'clustering_result/hwu64_prompt/prompting_results_{prompt}_hwu64.csv')
    df.columns = ['Index', 'Label Count', 'Cluster Assignment']
    for i, row in df.iterrows():
        if row["Label Count"] == n:
            labels = list(map(int, row[2][1:-1].split(", ")))
            nmi = normalized_mutual_info_score(labels, cluster_labels)
            print(f"{prompt}: {nmi}")

vanilla: 0.7893023483840297
fewshot: 0.7939332460566361
fewshot: 0.7686341924216898
pw_wo_reasoning: 0.8233843028539108


  labels = list(map(int, row[2][1:-1].split(", ")))
  labels = list(map(int, row[2][1:-1].split(", ")))
  labels = list(map(int, row[2][1:-1].split(", ")))
  labels = list(map(int, row[2][1:-1].split(", ")))
