- All codes are written by Jaehyung Kim (jaehyungkim@kaist.ac.kr)

In [1]:
import pprint
import json
import copy
import numpy as np
from tqdm import tqdm
import time
pp = pprint.PrettyPrinter(indent=4)

In [2]:
from datetime import timedelta, datetime

# Dataset Load 
- From https://github.com/MikeWangWZHL/Solo-Performance-Prompting

In [3]:
dataset = './datasets/logic_grid_puzzle_200.jsonl'

In [4]:
import jsonlines

temp = 0
raw_data = []
all_data = []
with jsonlines.open(dataset) as f:
    for line in f.iter():
        if temp == 0:
            print(line.keys())
            for item in line.keys():
                print(item)
                print(line[item])
        temp += 1
        raw_data.append(line['inputs'])
        all_data.append(line)
print(f"Number of examples: {temp}")

dict_keys(['idx', 'inputs', 'targets', 'multiple_choice_targets', 'multiple_choice_scores'])
idx
0
inputs
Q: There are 4 houses in a row, numbered 1 on the left to 4 on the right. There is one person living in each house. The people in these houses have different characteristics:
 - Each person has different flowers in their foyer: one has a carnations arrangement, one has a bouquet of daffodils, one has a vase of tulips, and one has a bouquet of lilies
 - Each person plays a different musical instrument: one is a guitarist, one is a pianist, one is a percussionist, and one is a flutist

Clue(s):
1. The flutist lives in the second house.
2. The person who has a vase of tulips lives directly left of the guitarist.
3. The person who has a bouquet of lilies lives directly left of the person who has a carnations arrangement.
4. There is one house between where the flutist lives and where the pianist lives.

What is the number of the house where the person who has a vase of tulips lives?
  

In [5]:
log = './datasets/logic_grid_puzzle_200.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__with_sys_mes.jsonl'

In [6]:
raw_responses = []

with jsonlines.open(log) as f:
    for line in f.iter():
        try:
            raw_responses.append(line['raw_response'][0]['choices'][0]['message']['content'])
        except:
            raw_responses.append('N/A')

# Detecting Digression

In [8]:
import openai

[TODO] You need to insert your own api_key of openai

In [9]:
openai.api_key = "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"

In [10]:
def prompt_digression(example, response):
    
    text = f"Please analyze the following conversation for any digressions and answer 'yes' or 'no'.\n"
    text += f"Then, highlight any statements that do not directly contribute to solving the puzzle or are unrelated to the clues provided.\n"
    text += f"### Input\n\n {example} \n\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Answer: "
    
    return text

In [11]:
def prompt_digression_reason(example, response, label):
    
    text = f"Please analyze the following conversation for any digressions and answer 'yes' or 'no'.\n"
    text += f"Then, highlight any statements that do not directly contribute to solving the puzzle or are unrelated to the clues provided.\n"
    text += f"### Input\n\n {example} \n\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Answer: {label}.\nThen, explicitly highlight any statements that do not directly contribute to solving the puzzle or are unrelated to the clues provided. "
    text += f"Also, providing the rationale why such statements are considered as digression. Or you can change the asnwer, but please also provide the reason for such change and provide the changed answer at the end, e.g., '... Therefore, the answer is xx.'. "
    
    return text

In [12]:
def call_api_gpt4(query):
    model = "gpt-4-1106-preview"
    waiting_time = 0.5
    
    response = None
    while response is None:
        try:
            messages = [
                    {"role": "system", "content": query},
            ]
            
                # ChatGPT API 호출하기
            response = openai.ChatCompletion.create(
                model=model,
                messages=messages,
                temperature=0.0,
                max_tokens=256
            )
        except:
            time.sleep(waiting_time)
            if waiting_time < 5:
                waiting_time += 0.5
            else:
                break
    if response is not None:
        try:
            answer = response['choices'][0]['message']['content']
        except:
            answer = 'N/A'
        n_input_tokens = response['usage']['prompt_tokens']
        n_output_tokens = response['usage']['completion_tokens']
    else:
        answer = 'N/A'
        n_input_tokens = 0
        n_output_tokens = 0
        
    return answer, n_input_tokens, n_output_tokens

In [17]:
def detect_digression(examples, responses, method='gpt3'):
    n_examples = len(examples)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if responses[i] != 'N/A':
            query = prompt_digression(examples[i], responses[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
        break
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [18]:
def detect_digression_reason(examples, responses, labels, method='gpt3'):
    n_examples = len(examples)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if labels[i] != 'N/A':
            query = prompt_digression_reason(examples[i], responses[i], labels[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
        break
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [16]:
def postprocess_label(labels):
    res = []
    
    for item in labels:
        if 'yes' in item.lower():
            res.append('Yes')
        elif 'no' in item.lower():
            res.append('No')
        elif 'n/a' in item.lower():
            res.append('N/A')
        else:
            raise ValueError(f'{item}')
    return res

In [19]:
res_gpt4 = detect_digression(raw_data, raw_responses, 'gpt4')

  0%|                                                                                                                                                                               | 0/200 [00:05<?, ?it/s]

Method: gpt4
All input tokens: 681, All output tokens: 54





In [20]:
res_gpt4_post = postprocess_label(res_gpt4)

In [21]:
res_gpt4_reason = detect_digression_reason(raw_data, raw_responses, res_gpt4_post, 'gpt4')

  0%|                                                                                                                                                                               | 0/200 [00:04<?, ?it/s]

Method: gpt4
All input tokens: 758, All output tokens: 59





In [24]:
no_counts = 0

yes_indices, no_indices = [], []

for i, item in enumerate(res_gpt4_reason): 
    if 'not contain any digression' in item.lower():
        no_indices.append(i)
    elif 'no statements in the conversation that digress' in item.lower():
        no_indices.append(i)    
    elif 'no digressions' in item.lower():
        no_indices.append(i)
    elif 'do not directly contribute' in item.lower():
        yes_indices.append(i)
    elif '### digressions' in item.lower() or 'following statements are digressions' in item.lower():
        yes_indices.append(i)    
    elif i in [8, 18, 21, 34, 42, 48, 119]:
        yes_indices.append(i)
    elif i in [38, 40, 46, 110, 126, 167, 194, 196]:
        no_indices.append(i)    
    else:
        print(i)
        print(item)
        break

print(len(yes_indices))
print(len(no_indices))

0
1


## Measuring Task Accuracy Change with Digression  

In [25]:
preds = []
labels = []

for i, item in enumerate(raw_responses):
    try:
        preds.append(int(item[-2]))
    except:
        try:
            preds.append(int(item[-1]))
        except:
            preds.append(0)
    labels.append(int(all_data[i]['targets'][0]))

preds = np.array(preds)
labels = np.array(labels)

In [20]:
print("Accuracy: {}".format(100 * (preds == labels).sum() / 200))

Accuracy: 66.0


In [21]:
yes_indices = np.array(yes_indices).astype(np.int64)
print("Accuracy (yes_digression): {}".format(100 * (preds == labels)[yes_indices].sum() / len(yes_indices)))

Accuracy (yes_digression): 53.03030303030303


In [22]:
no_indices = np.array(no_indices).astype(np.int64)
print("Accuracy (no_digression): {}".format(100 * (preds == labels)[no_indices].sum() / len(no_indices)))

Accuracy (no_digression): 72.38805970149254


# Classification of Digression

In [26]:
def prompt_classify(response, reason):
    
    text = f"Please classify the given reasoning into the one of the following categories: [1] Procedural or Introductory Statements, [2] Repetitive or Confirmatory Statements, [3] Transitional Statements, [4] Irrelevant or Off-Topic Contributions, [5] Misleading or Incorrect Reasoning, [6] Closing or Summarizing Remarks, and [7] Constructive Contributions. You response should be in the form of [], e.g., [1] or [3].\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Reasonings\n\n {reason} \n\n"
    text += f"### Answer (You response should be in the simple form, e.g., [1] or [3]): "
    
    return text

In [27]:
def classify_reason(responses, reasons, method='gpt3'):
    n_examples = len(responses)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if responses[i] != 'N/A':
            query = prompt_classify(responses[i], reasons[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [25]:
dig_responses = [res_gpt4_reason[idx] for idx in yes_indices]

In [26]:
classification_gpt4 = classify_reason([raw_responses[idx] for idx in yes_indices], dig_responses, 'gpt4')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 66/66 [02:55<00:00,  2.66s/it]

Method: gpt4
All input tokens: 61844, All output tokens: 383





# Human-like Conversation

In [46]:
def prompt_human2(example, response):
    
    text = f"Please analyze the provided conversation and assess whether it resembles a dialogue between human participants or appears to be an artificial conversation created by multiple AI agents. Please answer 'yes' if it is closed to human conversation or 'no' otherwise.\n"
    text += f"Consider factors such as the flow of the conversation, the naturalness of responses, the presence of emotional or personal interjections, and the complexity or variability of language used.\n"
    #text += f"Highlight any characteristics that particularly suggest a human or artificial origin of the conversation.\n"
    #text += f"### Input\n\n {example} \n\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Answer: "
    
    return text

In [47]:
def prompt_human_reason(example, response, label):
    
    text = f"Please analyze the provided conversation and assess whether it resembles a dialogue between human participants or appears to be an artificial conversation created by multiple AI agents. Please answer 'yes' if it is closed to human conversation or 'no' otherwise.\n"
    text += f"Consider factors such as the flow of the conversation, the naturalness of responses, the presence of emotional or personal interjections, and the complexity or variability of language used.\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Answer: {label}\n\n"
    text += f"Then, providing the explicit rationale for the answer by highlight any characteristics that particularly suggest a human or artificial origin of the conversation. Or you can change the asnwer, but please also provide the reason for such change and provide the changed answer at the end, e.g., '... Therefore, the answer is xx.'. "
    
    return text

In [48]:
def detect_human(examples, responses, method='gpt3'):
    n_examples = len(examples)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if responses[i] != 'N/A':
            query = prompt_human2(examples[i], responses[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [49]:
def detect_human_reason(examples, responses, labels, method='gpt3'):
    n_examples = len(examples)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if labels[i] != 'N/A':
            query = prompt_human_reason(examples[i], responses[i], labels[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [None]:
res_gpt4_human = detect_human(raw_data, raw_responses, 'gpt4')

In [None]:
res_gpt4_human_post = postprocess_label(res_gpt4_human)

In [None]:
res_gpt4_human_reason = detect_human_reason(raw_data, raw_responses, res_gpt4_human_post, 'gpt4')

In [None]:
human_indices = []
ai_indices = []

for i, item in enumerate(res_gpt4_human_reason):
    if 'yes' in item.lower() and 'human' in item.lower():
        human_indices.append(i)
    elif 'does resemble a dialogue between human participants' in item.lower() or 'closer to a human conversation' in item.lower():
        human_indices.append(i)
    elif 'closer to a dialogue between human participants' in item.lower() or i == 162:
        human_indices.append(i)
    elif 'possibly created by ai agents' in item.lower() or i == 167:
        ai_indices.append(i)
    elif 'not closely resemble a dialogue between human participants' in item.lower():
        ai_indices.append(i)
    elif 'created by multiple ai agents' in item.lower():
        ai_indices.append(i)
    elif 'not resemble a natural dialogue between human participants' in item.lower():
        ai_indices.append(i)
    elif 'no' in item.split('\n\n')[0].lower():
        ai_indices.append(i)
    else:
        print(i)
        print(item)
        break
        
print(len(human_indices))
print(len(ai_indices))

## Comparison of Digression Patterns 

In [65]:
human_indices_dig = []
ai_indices_dig = []

for i, item in enumerate(yes_indices):
    if onehot_human[item] == 1:
        human_indices_dig.append(i)
    else:
        ai_indices_dig.append(i)
human_indices_dig = np.array(human_indices_dig).astype(np.int64)
ai_indices_dig = np.array(ai_indices_dig).astype(np.int64)

In [66]:
classification_gpt4_post2 = np.array(classification_gpt4_post).astype(np.int64)[human_indices_dig]
n_classes = np.zeros(7)

for i in range(1, 8):
    n_classes[i-1] = (classification_gpt4_post2 == i).sum()
print(n_classes)

[ 5.  1.  0. 14.  2.  4.  0.]


In [67]:
classification_gpt4_post3 = np.array(classification_gpt4_post).astype(np.int64)[ai_indices_dig]
n_classes = np.zeros(7)

for i in range(1, 8):
    n_classes[i-1] = (classification_gpt4_post3 == i).sum()
print(n_classes)

[ 6.  1.  0. 27.  5.  1.  0.]


In [37]:
human_indices = []
ai_indices = []

for i, item in enumerate(res_gpt4_human_all):
    if 'yes' in item.lower() and 'human' in item.lower():
        human_indices.append(i)
    elif 'does resemble a dialogue between human participants' in item.lower() or 'closer to a human conversation' in item.lower():
        human_indices.append(i)
    elif 'closer to a dialogue between human participants' in item.lower() or i == 162:
        human_indices.append(i)
    elif 'possibly created by ai agents' in item.lower() or i == 167:
        ai_indices.append(i)
    elif 'not closely resemble a dialogue between human participants' in item.lower():
        ai_indices.append(i)
    elif 'created by multiple ai agents' in item.lower():
        ai_indices.append(i)
    elif 'not resemble a natural dialogue between human participants' in item.lower():
        ai_indices.append(i)
    elif 'no' in item.split('\n\n')[0].lower():
        ai_indices.append(i)
    else:
        print(i)
        print(item)
        break
        
print(len(human_indices))
print(len(ai_indices))

82
118


In [38]:
human_indices = np.array(human_indices).astype(np.int64)
ai_indices = np.array(ai_indices).astype(np.int64)

In [39]:
onehot_human = np.zeros(200)

for item in human_indices:
    onehot_human[item] = 1
print(onehot_human.sum())

82.0


In [69]:
onehot_dig = np.zeros(200)

for item in yes_indices:
    onehot_dig[item] = 1

In [None]:
dig_indices = (onehot_dig == 1).nonzero()[0]
ai_dig_indices = [dig_indices[idx] for idx in ai_indices_dig]
ai_dig_indices_irre = []

for i, item in enumerate(ai_dig_indices):
    if classification_gpt4_post3[i] == 4:
        ai_dig_indices_irre.append(item)

In [None]:
onehot_ai_irrelevant = np.zeros(200)

for item in ai_dig_indices_irre:
    onehot_ai_irrelevant[item] = 1

In [None]:
dig_indices = (onehot_dig == 1).nonzero()[0]
human_dig_indices = [dig_indices[idx] for idx in human_indices_dig]
human_dig_indices_irre = []

for i, item in enumerate(human_dig_indices):
    if classification_gpt4_post2[i] == 4:
        human_dig_indices_irre.append(item)

In [None]:
onehot_human_irrelevant = np.zeros(200)

for item in human_dig_indices_irre:
    onehot_human_irrelevant[item] = 1

## Measuring Accuracy Change 

In [70]:
temp = np.array(((onehot_dig == 1) * (onehot_human == 1)).nonzero()[0]).astype(np.int64)
print("Accuracy (human & digression): {}".format(100 * (preds == labels)[temp].sum() / len(temp)))

Accuracy (human & digression): 53.84615384615385


In [71]:
temp = np.array(((onehot_dig == 0) * (onehot_human == 1)).nonzero()[0]).astype(np.int64)
print("Accuracy (human & no digression): {}".format(100 * (preds == labels)[temp].sum() / len(temp)))

Accuracy (human & no digression): 67.85714285714286


In [72]:
temp = np.array(((onehot_dig == 1) * (onehot_human == 0)).nonzero()[0]).astype(np.int64)
print("Accuracy (human & digression): {}".format(100 * (preds == labels)[temp].sum() / len(temp)))

Accuracy (human & digression): 52.5


In [73]:
temp = np.array(((onehot_dig == 0) * (onehot_human == 0)).nonzero()[0]).astype(np.int64)
print("Accuracy (human & digression): {}".format(100 * (preds == labels)[temp].sum() / len(temp)))

Accuracy (human & digression): 75.64102564102564


In [90]:
temp = np.array(((onehot_dig == 1) * (onehot_human == 0) * (onehot_ai_irrelevant == 1)).nonzero()[0]).astype(np.int64)
print("Accuracy (human & digression): {}".format(100 * (preds == labels)[temp].sum() / len(temp)))

Accuracy (human & digression): 51.851851851851855


In [88]:
temp = np.array(((onehot_dig == 1) * (onehot_human == 1) * (onehot_human_irrelevant == 1)).nonzero()[0]).astype(np.int64)
print("Accuracy (human & digression): {}".format(100 * (preds == labels)[temp].sum() / len(temp)))

Accuracy (human & digression): 57.142857142857146


# Collaborative Effect

In [37]:
def prompt_collaborative(example, response):
    
    text = f"Please analyze the provided conversation to determine the collaborative dynamics among the participants which means that evaluate whether the final conclusion is a result of a combined effort, with each participant contributing significantly, or if it is predominantly led by a single participant. Please answer 'yes' if there is combined effort or 'no' otherwise.\n"
    #text += f"Then, highlight any statements that do not directly contribute to solving the puzzle or are unrelated to the clues provided.\n"
    text += f"### Input\n\n {example} \n\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Answer: "
    
    return text

In [38]:
def prompt_collaborative_reason(example, response, label):
    
    text = f"Please analyze the provided conversation to determine the collaborative dynamics among the participants which means that evaluate whether the final conclusion is a result of a combined effort, with each participant contributing significantly, or if it is predominantly led by a single participant. Please answer 'yes' if there is combined effort or 'no' otherwise.\n"
    text += f"### Input\n\n {example} \n\n"
    text += f"### Conversation\n\n {response} \n\n"
    text += f"### Answer: {label}\n\n"
    text += f"Then, providing the explicit rationale for the answer. Or you can change the asnwer, but please also provide the reason for such change and provide the changed answer at the end, e.g., '... Therefore, the answer is xx.'. "
    
    return text

In [41]:
def detect_collaborative(examples, responses, method='gpt3'):
    n_examples = len(examples)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if responses[i] != 'N/A':
            query = prompt_collaborative(examples[i], responses[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [42]:
def detect_collaborative_reason(examples, responses, labels, method='gpt3'):
    n_examples = len(examples)
    all_input_tokens = 0
    all_output_tokens = 0
    res = []
    
    for i in tqdm(range(n_examples)):
        if labels[i] != 'N/A':
            query = prompt_collaborative_reason(examples[i], responses[i], labels[i])
            if method == 'gpt3':
                ans, n_input_token, n_output_token = call_api(query)
            elif method == 'gpt4':
                ans, n_input_token, n_output_token = call_api_gpt4(query)
            res.append(ans)
            all_input_tokens += n_input_token
            all_output_tokens += n_output_token
        else:
            res.append('N/A')
    print("Method: {}".format(method))
    print("All input tokens: {}, All output tokens: {}".format(all_input_tokens, all_output_tokens))
    return res

In [43]:
res_gpt4_collabo = detect_collaborative(raw_data, raw_responses, 'gpt4')

  0%|                                                                                                                                                                               | 0/200 [00:00<?, ?it/s]

Method: gpt4
All input tokens: 702, All output tokens: 1





In [44]:
res_gpt4_collabo_post = postprocess_label(res_gpt4_collabo)

In [45]:
res_gpt4_collabo_reason = detect_collaborative_reason(raw_data, raw_responses, res_gpt4_collabo_post, 'gpt4')

  0%|                                                                                                                                                                               | 0/200 [00:08<?, ?it/s]

Method: gpt4
All input tokens: 752, All output tokens: 196



