In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import copy
from tqdm import tqdm
tqdm.pandas()

import ast
import json
import os
import math
import string
import re
import random
random.seed(42)

from itertools import combinations
from scipy.stats import kendalltau, spearmanr
import rbo

In [2]:
model_to_system_path = 'model_annotations/human_annotation_gt_formatted.json'
examples_50 = 'examples.json'

In [3]:
with open(model_to_system_path, 'r') as model:
    model_to_system = json.load(model)

model.close()

with open(examples_50, 'r') as example:
    examples_all = json.load(example)
    
example.close()

In [4]:
print(len(examples_all))
print(len(model_to_system))

50
50


- Calculate all 15C2 and store it as a tuple for each pair, then store all tuples in a list. 
    - We also have to store the model info (e.g., gpt4, alpaca). 
    
- Then, for each list of instruction, randomly sample few of them. 
- Match each sampled tuple with their model info. 
- Create a js file that contains: 
    - instruction, reference, system A, and system B


- Need a separate functions for bandwagon and order, for they need to be prompted twice. 
    - Bandwagon: Iterate A and B on the sentence "85%..." for each pair
    - Order: Switch A and B -> one sample twice, but order switches (A/B -> B/A)
    - Distraction: include one distraction from a bank of 4 sentences -> include one sample twice with two different distraction sentences. 

In [5]:
# def distribute_into_kgroups(total_lst, k):

#     # Initialize sublists A_1 to A_15
#     sublists = [[] for _ in range(k)]

#     assert (len(total_lst) % k == 0)

#     # Distribute elements
#     for i in range(0, len(total_lst), 2):  # step by 2 since we're dealing with pairs
#         sublist_idx = (i // 2) % k  # which sublist to place the pair into
#         sublists[sublist_idx].extend(total_lst[i:i+2])

#     return sublists


In [6]:
# def create_random_sets_bandwagon(num_sample, num_pairs, num_kgroups):

#     json_lst = []
#     random.seed(42)

#     # Randomly sample instructions & corresponding model_to_system outputs
#     random_idx = random.sample(range(50), k=num_sample)
#     random_examples = [examples_all[i] for i in random_idx]
#     random_model_to_system = [model_to_system[i] for i in random_idx]

#     # Compute all combinations of model pairs -> 15C2 = 105
#     system_names = ['System {}'.format(alphabet) for alphabet in list(string.ascii_uppercase)[:15]]
#     system_pairs = list(combinations(system_names, 2))

#     # For each instruction, 
#     for idx, (example, example_system) in enumerate(zip(random_examples, random_model_to_system)):

#         # (1) Randomly sample 15 pairs
#         random.seed(42)
#         random_system_pairs = random.sample(system_pairs, num_pairs)

#         # (2) Keep track of instruction idx, retrieve instruction & reference
#         instruction_idx = idx
#         instruction = example['instruction']
#         reference = example['reference']

#         # (3) Extract all model names and corresponding system names
#         model_lst = list(example_system.keys())
#         system_lst = list(example_system.values())

#         # (4) For each randomly sampled pair (system A, system F), 
#         for pair_idx, each_pair in enumerate(random_system_pairs):
#             # Order: prompt twice
#             pair_data = {}
#             systemA = each_pair[0]
#             systemB = each_pair[1]

#             modelA_idx = system_lst.index(systemA)
#             modelB_idx = system_lst.index(systemB)
#             modelA = model_lst[modelA_idx]
#             modelB = model_lst[modelB_idx]

#             systemA_opinion = example[systemA]
#             systemB_opinion = example[systemB]

#             pair_data['instruction_idx'] = instruction_idx
#             pair_data['instruction'] = instruction
#             pair_data['reference'] = reference
#             pair_data['pair_idx'] = pair_idx
#             pair_data['System A'] = systemA_opinion
#             pair_data['System B'] = systemB_opinion
#             pair_data['Model A'] = modelA 
#             pair_data['Model B'] = modelB
#             pair_data['real_A'] = systemA 
#             pair_data['real_B'] = systemB
#             pair_data['bandwagon_order'] = "A"

#             # (1) First prompt
#             json_lst.append(pair_data.copy())
#             # (2) Second prompt - swapping A and B
#             pair_data['bandwagon_order'] = "B"
#             json_lst.append(pair_data.copy())

#     # Divide into k groups (batches)
#     distributed_json_lst = distribute_into_kgroups(json_lst, num_kgroups)
#     # Shuffle each group so that one pair should be located in the same grouop
#     for lst_idx, lst in enumerate(distributed_json_lst):
#         elements = lst
#         random.shuffle(elements)
#         distributed_json_lst[lst_idx] = elements

#         print(f"Randomly sampled the {lst_idx}th {len(distributed_json_lst[lst_idx])} bandwagon examples... Now, creating a json file....")
#         with open('../js/bias/bandwagon/random_sampled_examples_bandwagon_{}.js'.format(lst_idx+1), 'w', encoding='utf-8') as f, open('human_bias/bandwagon/random_sampled_examples_bandwagon_{}.json'.format(lst_idx+1), 'w', encoding='utf-8') as j:
#             f.write("const A = ")
#             json.dump(distributed_json_lst[lst_idx], f, indent=4)
#             f.write(";")
#             json.dump(distributed_json_lst[lst_idx], j, indent=4)

In [7]:
def create_random_sets_bandwagon(num_sample, num_pairs):

    random.seed(42)

    # Randomly sample instructions & corresponding model_to_system outputs
    random_idx = random.sample(range(50), k=num_sample)
    random_examples = [examples_all[i] for i in random_idx]
    random_model_to_system = [model_to_system[i] for i in random_idx]

    # Compute all combinations of model pairs -> 15C2 = 105
    system_names = ['System {}'.format(alphabet) for alphabet in list(string.ascii_uppercase)[:15]]
    system_pairs = list(combinations(system_names, 2))

    # For each instruction, 
    for idx, (example, example_system) in enumerate(zip(random_examples, random_model_to_system)):

        # (1) Randomly sample 15 pairs
        random.seed(42)
        random_system_pairs = random.sample(system_pairs, num_pairs)
        json_lst = []

        # (2) Keep track of instruction idx, retrieve instruction & reference
        instruction_idx = idx
        instruction = example['instruction']
        reference = example['reference']

        # (3) Extract all model names and corresponding system names
        model_lst = list(example_system.keys())
        system_lst = list(example_system.values())

        # (4) For each randomly sampled pair (system A, system F), 
        for pair_idx, each_pair in enumerate(random_system_pairs):
            # Order: prompt twice
            pair_data = {}
            systemA = each_pair[0]
            systemB = each_pair[1]

            modelA_idx = system_lst.index(systemA)
            modelB_idx = system_lst.index(systemB)
            modelA = model_lst[modelA_idx]
            modelB = model_lst[modelB_idx]

            systemA_opinion = example[systemA]
            systemB_opinion = example[systemB]

            pair_data['instruction_idx'] = instruction_idx
            pair_data['instruction'] = instruction
            pair_data['reference'] = reference
            pair_data['pair_idx'] = pair_idx
            pair_data['System A'] = systemA_opinion
            pair_data['System B'] = systemB_opinion
            pair_data['Model A'] = modelA 
            pair_data['Model B'] = modelB
            pair_data['real_A'] = systemA 
            pair_data['real_B'] = systemB
            pair_data['bandwagon_order'] = "A"

            # (1) First prompt
            json_lst.append(pair_data.copy())
            # (2) Second prompt - swapping A and B
            pair_data['bandwagon_order'] = "B"
            json_lst.append(pair_data.copy())

        random.Random().shuffle(json_lst)

        print(f"Randomly sampled the {idx}th {len(json_lst)} bandwagon examples... Now, creating a json file....")
        with open('../js/bias/bandwagon/real/random_sampled_examples_bandwagon_{}.js'.format(idx+1), 'w', encoding='utf-8') as f, open('human_bias/bandwagon/real/random_sampled_examples_bandwagon_{}.json'.format(idx+1), 'w', encoding='utf-8') as j:
            f.write("const A = ")
            json.dump(json_lst, f, indent=4)
            f.write(";")
            json.dump(json_lst, j, indent=4)

In [8]:
create_random_sets_bandwagon(25, 15)

Randomly sampled the 0th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 1th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 2th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 3th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 4th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 5th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 6th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 7th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 8th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 9th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 10th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 11th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 12th 30 bandwagon

In [9]:
def create_random_sets_order(num_sample, num_pairs):

    random.seed(42)

    # Randomly sample instructions & corresponding model_to_system outputs
    random_idx = random.sample(range(50), k=num_sample)
    random_examples = [examples_all[i] for i in random_idx]
    random_model_to_system = [model_to_system[i] for i in random_idx]

    # Compute all combinations of model pairs -> 15C2 = 105
    system_names = ['System {}'.format(alphabet) for alphabet in list(string.ascii_uppercase)[:15]]
    system_pairs = list(combinations(system_names, 2))

    # For each instruction, 
    for idx, (example, example_system) in enumerate(zip(random_examples, random_model_to_system)):

        json_lst = []

        # (1) Randomly sample 15 pairs
        random.seed(42)
        random_system_pairs = random.sample(system_pairs, num_pairs)

        # (2) Keep track of instruction idx, retrieve instruction & reference
        instruction_idx = idx
        instruction = example['instruction']
        reference = example['reference']

        # (3) Extract all model names and corresponding system names
        model_lst = list(example_system.keys())
        system_lst = list(example_system.values())

        # (4) For each randomly sampled pair (system A, system F), 
        for pair_idx, each_pair in enumerate(random_system_pairs):
            # Order: prompt twice
            pair_data = {}
            systemA = each_pair[0]
            systemB = each_pair[1]

            modelA_idx = system_lst.index(systemA)
            modelB_idx = system_lst.index(systemB)
            modelA = model_lst[modelA_idx]
            modelB = model_lst[modelB_idx]

            systemA_opinion = example[systemA]
            systemB_opinion = example[systemB]

            pair_data['instruction_idx'] = instruction_idx
            pair_data['instruction'] = instruction
            pair_data['reference'] = reference
            pair_data['pair_idx'] = pair_idx
            pair_data['System A'] = systemA_opinion
            pair_data['System B'] = systemB_opinion
            pair_data['Model A'] = modelA 
            pair_data['Model B'] = modelB
            pair_data['real_A'] = systemA 
            pair_data['real_B'] = systemB
            pair_data['order'] = 'first'

            # (1) First prompt
            json_lst.append(pair_data.copy())
            # (2) Second prompt - swapping A and B
            pair_data['System A'] = systemB_opinion
            pair_data['System B'] = systemA_opinion
            pair_data['Model A'] = modelB
            pair_data['Model B'] = modelA
            pair_data['real_A'] = systemB 
            pair_data['real_B'] = systemA
            pair_data['order'] = 'last'

            json_lst.append(pair_data.copy())

        random.Random().shuffle(json_lst)

        print(f"Randomly sampled the {idx}th {len(json_lst)} bandwagon examples... Now, creating a json file....")
        with open('../js/bias/order/real/random_sampled_examples_order_{}.js'.format(idx+1), 'w', encoding='utf-8') as f, open('human_bias/order/real/random_sampled_examples_order_{}.json'.format(idx+1), 'w', encoding='utf-8') as j:
            f.write("const A = ")
            json.dump(json_lst, f, indent=4)
            f.write(";")
            json.dump(json_lst, j, indent=4)

In [10]:
create_random_sets_order(25, 15)

Randomly sampled the 0th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 1th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 2th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 3th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 4th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 5th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 6th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 7th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 8th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 9th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 10th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 11th 30 bandwagon examples... Now, creating a json file....
Randomly sampled the 12th 30 bandwagon

In [11]:
def choose_distraction(distractor):
    distractions = [
                    f'{distractor} likes to eat apples and oranges.',
                    f'The coolest thing that {distractor} can do is a 60 second handstand.',
                    f'{distractor} plays a lot of soccer and basketball.',
                    f'{distractor} has been all around Europe two times.'
                ]
    return distractions

In [12]:
random.choice(choose_distraction('system A'))

'system A likes to eat apples and oranges.'

In [13]:
def create_random_sets_distraction(num_sample, num_pairs):

    random.seed(42)

    # Randomly sample instructions & corresponding model_to_system outputs
    random_idx = random.sample(range(50), k=num_sample)
    random_examples = [examples_all[i] for i in random_idx]
    random_model_to_system = [model_to_system[i] for i in random_idx]

    # Compute all combinations of model pairs -> 15C2 = 105
    system_names = ['System {}'.format(alphabet) for alphabet in list(string.ascii_uppercase)[:15]]
    system_pairs = list(combinations(system_names, 2))

    # For each instruction, 
    for idx, (example, example_system) in enumerate(zip(random_examples, random_model_to_system)):

        json_lst = []
        # (1) Randomly sample 15 pairs
        random.seed(42)
        random_system_pairs = random.sample(system_pairs, num_pairs)

        # (2) Keep track of instruction idx, retrieve instruction & reference
        instruction_idx = idx
        instruction = example['instruction']
        reference = example['reference']

        # (3) Extract all model names and corresponding system names
        model_lst = list(example_system.keys())
        system_lst = list(example_system.values())

        # (4) For each randomly sampled pair (system A, system F), 
        for pair_idx, each_pair in enumerate(random_system_pairs):
            # Order: prompt twice
            pair_data = {}
            systemA = each_pair[0]
            systemB = each_pair[1]

            modelA_idx = system_lst.index(systemA)
            modelB_idx = system_lst.index(systemB)
            modelA = model_lst[modelA_idx]
            modelB = model_lst[modelB_idx]

            systemA_opinion = example[systemA]
            systemB_opinion = example[systemB]

            pair_data['instruction_idx'] = instruction_idx
            pair_data['instruction'] = instruction
            pair_data['reference'] = reference
            pair_data['pair_idx'] = pair_idx
            pair_data['System A'] = systemA_opinion
            pair_data['System B'] = systemB_opinion
            pair_data['Model A'] = modelA 
            pair_data['Model B'] = modelB
            pair_data['real_A'] = systemA 
            pair_data['real_B'] = systemB
            pair_data['distractor'] = random.Random().choice(choose_distraction('System A'))

            # (1) First prompt
            json_lst.append(pair_data.copy())
            # (2) Second prompt - swapping A and B with random distractor sentence
            pair_data['distractor'] = random.Random().choice(choose_distraction('System B'))

            json_lst.append(pair_data.copy())

        random.Random().shuffle(json_lst)

    # # Divide into k groups (batches)
    # distributed_json_lst = distribute_into_kgroups(json_lst, num_kgroups)
    # # Shuffle each group so that one pair should be located in the same grouop
    # for lst_idx, lst in enumerate(distributed_json_lst):
    #     elements = lst
    #     random.shuffle(elements)
    #     distributed_json_lst[lst_idx] = elements

        print(f"Randomly sampled the {idx}th {len(json_lst)} distraction examples... Now, creating a json file....")
        with open('../js/bias/distraction/real/random_sampled_examples_distraction_{}.js'.format(idx+1), 'w', encoding='utf-8') as f, open('human_bias/distraction/real/random_sampled_examples_distraction_{}.json'.format(idx+1), 'w', encoding='utf-8') as j:
            f.write("const A = ")
            json.dump(json_lst, f, indent=4)
            f.write(";")
            json.dump(json_lst, j, indent=4)

In [14]:
create_random_sets_distraction(25, 15)

Randomly sampled the 0th 30 distraction examples... Now, creating a json file....
Randomly sampled the 1th 30 distraction examples... Now, creating a json file....
Randomly sampled the 2th 30 distraction examples... Now, creating a json file....
Randomly sampled the 3th 30 distraction examples... Now, creating a json file....
Randomly sampled the 4th 30 distraction examples... Now, creating a json file....
Randomly sampled the 5th 30 distraction examples... Now, creating a json file....
Randomly sampled the 6th 30 distraction examples... Now, creating a json file....
Randomly sampled the 7th 30 distraction examples... Now, creating a json file....
Randomly sampled the 8th 30 distraction examples... Now, creating a json file....
Randomly sampled the 9th 30 distraction examples... Now, creating a json file....
Randomly sampled the 10th 30 distraction examples... Now, creating a json file....
Randomly sampled the 11th 30 distraction examples... Now, creating a json file....
Randomly sampl