# Compare Step by Step data sets

Compare resurrected items between the data generated with k=5, k=10, k=20 samples for GPT3-turbo and k=5 samples for GPT4o

## Load sbs data

In [1]:
import csv
import numpy as np
import scipy as sp

from utils import group_entropies_by_dialogue_id, has_resurrected_items, group_sbs_data_by_dialogue_id

In [2]:
five_samples_data_path = "./data/generation/8_mcrae/sbs_entropy_k_five.csv"
ten_samples_data_path = "./data/generation/8_mcrae/sbs_entropy_k_ten.csv"
twenty_samples_data_path = "./data/generation/8_mcrae/sbs_entropy_k_twenty.csv"
five_gpt4o_samples_data_path = "./data/generation/8_mcrae/sbs_entropy_k_five_gpt4o.csv"
ten_gpt4o_samples_data_path = "./data/generation/8_mcrae/sbs_entropy_k_ten_gpt4o.csv"

five_rf = open(five_samples_data_path, 'r', newline='')
ten_rf = open(ten_samples_data_path, 'r', newline='')
twenty_rf = open(twenty_samples_data_path, 'r', newline='')
five_gpt4o_rf = open(five_gpt4o_samples_data_path, 'r', newline='')
ten_gpt4o_rf = open(ten_gpt4o_samples_data_path, 'r', newline='')

five_reader = csv.DictReader(five_rf, delimiter=",")
ten_reader = csv.DictReader(ten_rf, delimiter=",")
twenty_reader = csv.DictReader(twenty_rf, delimiter=",")
five_gpt4o_reader = csv.DictReader(five_gpt4o_rf, delimiter=",")
ten_gpt4o_reader = csv.DictReader(ten_gpt4o_rf, delimiter=",")

five_entropies, _ = group_entropies_by_dialogue_id(five_reader)
ten_entropies, _ = group_entropies_by_dialogue_id(ten_reader)
twenty_entropies, _ = group_entropies_by_dialogue_id(twenty_reader)
five_gpt4o_entropies, _ = group_entropies_by_dialogue_id(five_gpt4o_reader)
ten_gpt4o_entropies, _ = group_entropies_by_dialogue_id(ten_gpt4o_reader)

five_rf.close()
ten_rf.close()
twenty_rf.close()
five_gpt4o_rf.close()
ten_gpt4o_rf.close()

## Count dialogues with resurrected items

In [7]:
five_samples_resurrected_items_count = 0
ten_samples_resurrected_items_count = 0
twenty_samples_resurrected_items_count = 0
five_gpt4o_samples_resurrected_items_count = 0
ten_gpt4o_samples_resurrected_items_count = 0

for dialogue_id in five_entropies:
    dialogue_entropies = five_entropies[dialogue_id]
    if(has_resurrected_items(dialogue_entropies)):
      five_samples_resurrected_items_count += 1
      
for dialogue_id in ten_entropies:
    dialogue_entropies = ten_entropies[dialogue_id]
    if(has_resurrected_items(dialogue_entropies)):
      ten_samples_resurrected_items_count += 1

for dialogue_id in twenty_entropies:
    dialogue_entropies = twenty_entropies[dialogue_id]
    if(has_resurrected_items(dialogue_entropies)):
      twenty_samples_resurrected_items_count += 1

for dialogue_id in five_gpt4o_entropies:
    dialogue_entropies = five_gpt4o_entropies[dialogue_id]
    if(has_resurrected_items(dialogue_entropies)):
      five_gpt4o_samples_resurrected_items_count += 1

for dialogue_id in ten_gpt4o_entropies:
    dialogue_entropies = ten_gpt4o_entropies[dialogue_id]
    if(has_resurrected_items(dialogue_entropies)):
      ten_gpt4o_samples_resurrected_items_count += 1
      
print("Resurrected items GPT3 (k=5, k=10, k=20): ", five_samples_resurrected_items_count, ten_samples_resurrected_items_count, twenty_samples_resurrected_items_count)
print("Resurrected items GPT4o (k=5, k=10): ", five_gpt4o_samples_resurrected_items_count, ten_gpt4o_samples_resurrected_items_count)

Resurrected items GPT3 (k=5, k=10, k=20):  78 77 82
Resurrected items GPT4o (k=5, k=10):  33 40


## Compute resurrected items percentage

In [8]:
output = {
  "five_samples" : {
    "resurrected_items_count" : five_samples_resurrected_items_count,
    "percentage" : five_samples_resurrected_items_count/len(five_entropies) # / 88
  },
  "ten_samples" : {
    "resurrected_items_count" : ten_samples_resurrected_items_count,
    "percentage" : ten_samples_resurrected_items_count/len(ten_entropies) # / 88
  },
    "twenty_samples" : {
    "resurrected_items_count" : twenty_samples_resurrected_items_count,
    "percentage" : twenty_samples_resurrected_items_count/len(twenty_entropies) # / 88
  },
    "five_samples_gpt4o" : {
    "resurrected_items_count" : five_gpt4o_samples_resurrected_items_count,
    "percentage" : five_gpt4o_samples_resurrected_items_count/len(five_gpt4o_entropies) # / 88
  },
    "ten_samples_gpt4o" : {
    "resurrected_items_count" : ten_gpt4o_samples_resurrected_items_count,
    "percentage" : ten_gpt4o_samples_resurrected_items_count/len(ten_gpt4o_entropies) # / 88
  }
  
}

print(output)

{'five_samples': {'resurrected_items_count': 78, 'percentage': 0.8863636363636364}, 'ten_samples': {'resurrected_items_count': 77, 'percentage': 0.875}, 'twenty_samples': {'resurrected_items_count': 82, 'percentage': 0.9318181818181818}, 'five_samples_gpt4o': {'resurrected_items_count': 33, 'percentage': 0.375}, 'ten_samples_gpt4o': {'resurrected_items_count': 40, 'percentage': 0.45454545454545453}}


## Compute probability of resurrected items

### Grouping sbs data by dialogue id

In [20]:
five_samples_sbs_data_path = "./data/generation/8_mcrae/dialogues_sbs_k_five_distr_ver2.csv"
ten_samples_sbs_data_path = "./data/generation/8_mcrae/dialogues_sbs_k_ten_distr_ver2.csv"
twenty_samples_sbs_data_path = "./data/generation/8_mcrae/dialogues_sbs_k_twenty_distr.csv"
five_gpt4o_samples_sbs_data_path = "./data/generation/8_mcrae/dialogues_sbs_k_five_gpt4o.csv"
ten_gpt4o_samples_sbs_data_path = "./data/generation/8_mcrae/dialogues_sbs_k_ten_gpt4o.csv"

# grouping row together by dialogue id

five_rf = open(five_samples_sbs_data_path, 'r', newline='')
five_reader = csv.DictReader(five_rf, delimiter=",")
five_data = group_sbs_data_by_dialogue_id(five_reader)

ten_rf = open(ten_samples_sbs_data_path, 'r', newline='')
ten_reader = csv.DictReader(ten_rf, delimiter=",")
ten_data = group_sbs_data_by_dialogue_id(ten_reader)

twenty_rf = open(twenty_samples_sbs_data_path, 'r', newline='')
twenty_reader = csv.DictReader(twenty_rf, delimiter=",")
twenty_data = group_sbs_data_by_dialogue_id(twenty_reader)

five_gpt4o_rf = open(five_gpt4o_samples_sbs_data_path, 'r', newline='')
five_gpt4o_reader = csv.DictReader(five_gpt4o_rf, delimiter=",")
five_gpt4o_data = group_sbs_data_by_dialogue_id(five_gpt4o_reader)

ten_gpt4o_rf = open(ten_gpt4o_samples_sbs_data_path, 'r', newline='')
ten_gpt4o_reader = csv.DictReader(ten_gpt4o_rf, delimiter=",")
ten_gpt4o_data = group_sbs_data_by_dialogue_id(ten_gpt4o_reader)
# print(ten_gpt4o_data)

### For each dialogue step, find resurrected items and compute the sum of their probabilities

In [18]:
def compute_resurrected_items_p_sum(grouped_data):
  p_summatory = 0
  counter = 0
  for dialogue_id, dialogue in enumerate(grouped_data):
    #print("DIALOGUE: , ", dialogue["dialogue_id"])
    intra_dialogues = dialogue["intra_dialogues"]
    for i in range(0, len(dialogue["intra_dialogues"])):
      resurrected_items = []
      resurrected_p = 0
      
      if(i != 0):
        current_distr = intra_dialogues[i]["p_distribuition"]
        previous_distr = intra_dialogues[i-1]["p_distribuition"]
        
        # finding resurrected items
        for item in list(current_distr.keys()):
          if(previous_distr[item] == 0 and current_distr[item] > previous_distr[item]):
            resurrected_items.append(item)
        
        for item in resurrected_items:
          resurrected_p += current_distr[item]
          
        p_summatory += resurrected_p
        counter += 1
        
      grouped_data[dialogue_id]["intra_dialogues"][i]["resurrected_items"] = resurrected_items
      grouped_data[dialogue_id]["intra_dialogues"][i]["resurrected_items_p"] = resurrected_p
  
  resurrected_p_average = p_summatory / counter
  
  return grouped_data, resurrected_p_average

five_analyzed_data, five_r_p_average = compute_resurrected_items_p_sum(five_data)
ten_analyzed_data, ten_r_p_average = compute_resurrected_items_p_sum(ten_data)
twenty_analyzed_data, twenty_r_p_average = compute_resurrected_items_p_sum(twenty_data)
five_gpt4o_analyzed_data, five_gpt4o_r_p_average = compute_resurrected_items_p_sum(five_gpt4o_data)
ten_gpt4o_analyzed_data, ten_gpt4o_r_p_average = compute_resurrected_items_p_sum(ten_gpt4o_data)
print("Resurrected avg GPT3 (k=5, k=10, k=20): ", five_r_p_average, ten_r_p_average, twenty_r_p_average)
print("Resurrected avg GPT4o (k=5, k=10): ", five_gpt4o_r_p_average, ten_gpt4o_r_p_average)
# print(five_gpt4o_analyzed_data)

Resurrected avg GPT3 (k=5, k=10, k=20):  0.11246965888689413 0.09104739676840211 0.07166732495511678
Resurrected avg GPT4o (k=5, k=10):  0.046403590664272895 0.05322728904847397


In [15]:
def dump(path, grouped_data):
  with open(path, "w") as f:
    for dialogue in grouped_data:
      dialogue_id = dialogue["dialogue_id"]
      intra_dialogues = dialogue["intra_dialogues"]
      for step in intra_dialogues:
        
        for r_item in step["resurrected_items"]:
          step["p_distribuition"].pop(r_item)
          
        step["p_distribuition"]["resurrected_items_p"] = step["resurrected_items_p"]
        writer = csv.writer(f)
        writer.writerow([
          dialogue_id,
          step["intra_dialogue_id"],
          step["p_distribuition"],
          step["resurrected_items"],
          step["resurrected_items_p"]
        ])
        
dump("./data/generation/8_mcrae/dialogues_sbs_k_five_distr_w_resurr_p.csv", five_analyzed_data)
dump("./data/generation/8_mcrae/dialogues_sbs_k_ten_distr_w_resurr_p.csv", ten_analyzed_data)
dump("./data/generation/8_mcrae/dialogues_sbs_k_twenty_distr_w_resurr_p.csv", twenty_analyzed_data)
dump("./data/generation/8_mcrae/dialogues_sbs_k_five_gpt4o_distr_w_resurr_p.csv", five_gpt4o_data)
dump("./data/generation/8_mcrae/dialogues_sbs_k_ten_gpt4o_distr_w_resurr_p.csv", ten_gpt4o_data)

## Bayesian apocalypse

In [21]:
def count_zero_distr(data):
  zeros_list = np.zeros(8)
  count_zero_distr = 0
  total = 0
  for dialogue_id, dialogue in enumerate(data):
    intra_dialogues = dialogue["intra_dialogues"]
    for i in range(0, len(dialogue["intra_dialogues"])):
      total += 1
      current_distr = intra_dialogues[i]["p_distribuition"]
      if np.array_equal(zeros_list, list(current_distr.values())):
        count_zero_distr += 1
  return total, count_zero_distr

print("Zero distribution GPT3 (k=5, k=10, k=20): ", count_zero_distr(five_data), count_zero_distr(ten_data), count_zero_distr(twenty_data))
print("Zero distribution GPT4o (k=5, k=10): ", count_zero_distr(five_gpt4o_data), count_zero_distr(ten_gpt4o_data))

Zero distribution GPT3 (k=5, k=10, k=20):  (644, 0) (644, 0) (644, 0)
Zero distribution GPT4o (k=5, k=10):  (644, 53) (644, 46)


In [24]:
def dump_dialogue_error(path, grouped_data):
  count_err = 0
  with open(path, "w") as f:
    csv.writer(f).writerow([
      "dialogue_id",
      "intra_dialogue_id",
      "question",
      "answer",
      "target",
      "canditates",
      "dist"
    ])
    for dialogue in grouped_data:
      dialogue_id = dialogue["dialogue_id"]
      intra_dialogues = dialogue["intra_dialogues"]
      for step in intra_dialogues:
        dist = list(step["p_distribuition"].values())
        canditates = list(step["p_distribuition"].keys())
        target = step["target"]
        if dist != [0, 0, 0, 0, 0, 0, 0, 0]:
          entropy = sp.stats.entropy(dist)
        if (entropy == 0 and target != canditates[np.argmax(dist)]) or dist == [0, 0, 0, 0, 0, 0, 0, 0]:
          if entropy == 0 and target != canditates[np.argmax(dist)] and dist != [0, 0, 0, 0, 0, 0, 0, 0]:
              count_err += 1
          writer = csv.writer(f)
          writer.writerow([
            dialogue_id,
            step["intra_dialogue_id"],
            step["question"],
            step["answer"],
            target,
            canditates,
            dist
          ])
    return count_err

# print(len(five_gpt4o_data))
print("Error k=5:", dump_dialogue_error("./data/generation/8_mcrae/dialogues_error_target_gpt4o_k_five.csv", five_gpt4o_data))
print("Error k=10:", dump_dialogue_error("./data/generation/8_mcrae/dialogues_error_target_gpt4o_k_ten.csv", ten_gpt4o_data))

     

Error k=5: 21
Error k=10: 24
