### Experiment Code for IDR Indentification, Classification, and Recommendation

In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
from tasks import TaskHandler
from evaluation_v2 import Evaluator
from utils import load_config


# if you want to change model params, logprob, output paths, etc. go to config.yml
config_path = './config.yml'
CONFIG = load_config(config_path)
task_config = CONFIG['task_config']

# user decide which exp to run
exp_tasks = ["identification", "classification", "recommendation", "generation", "output_evaluation"]
current_task = exp_tasks[1]
eval_type = None
provider, model_name = "gpt", "gpt-4o-mini-2024-07-18"

# map the args to actual task
if current_task == "output_evaluation":
    eval_type = eval_type
    evaluator = Evaluator(type=eval_type)
else:
    # task_handler = TaskHandler(provider=args.provider, model_name=args.model_name, lm_config_path=config_path, save_path=save_path, **task_config)
    task_handler = TaskHandler(provider=provider, model_name=model_name, lm_config_path=config_path, **task_config)
    task_func = task_handler[current_task]
    task_func()

100%|██████████| 5/5 [00:06<00:00,  1.20s/it]


### Swiss Tournament Sample

In [10]:
import random

class Player:
    def __init__(self, name):
        self.name = name
        self.score = 0
        self.opponents = []  # Keep track of player names this player has already faced
        self.bye = False     # Flag to check if this player already received a bye

    def __repr__(self):
        return f"{self.name} (Score: {self.score})"

def swiss_pairings(players):
    """
    Pair players based on current scores.
    If the number of players is odd, a bye is given to the lowest-ranked
    player who hasn't already received one.
    """
    # Sort players first by score (highest first), then alphabetically
    players_sorted = sorted(players, key=lambda x: (-x.score, x.name))
    pairings = []
    used = set()  # Keep track of players already paired this round

    # Handle odd number of players: give a bye to one player
    if len(players_sorted) % 2 == 1:
        # choose the lowest-ranked player (last in the sorted list)
        for player in reversed(players_sorted):
            if not player.bye:
                print(f"{player.name} receives a bye this round.")
                player.score += 1   # Award one point for the bye
                player.bye = True
                used.add(player)
                break

    # Pair remaining players
    # We loop through the sorted list and for each unpaired player, 
    # find the next unpaired opponent that they haven't faced before.
    for i, player in enumerate(players_sorted):
        if player in used:
            continue
        for j in range(i + 1, len(players_sorted)):
            opponent = players_sorted[j]
            if opponent in used:
                continue
            # Pair if the two players haven't met yet
            if opponent.name not in player.opponents:
                pairings.append((player, opponent))
                used.add(player)
                used.add(opponent)
                break

    return pairings

def simulate_match(player1, player2):
    """
    Simulate a match between two players by choosing a random winner.
    Update their scores and record the matchup.
    """
    winner = random.choice([player1, player2])
    if winner == player1:
        player1.score += 1
        print(f"{player1.name} wins against {player2.name}")
    else:
        player2.score += 1
        print(f"{player2.name} wins against {player1.name}")
    # Record that these players have met
    player1.opponents.append(player2.name)
    player2.opponents.append(player1.name)

def swiss_tournament(players, rounds=3):
    """
    Run a Swiss tournament for a specified number of rounds.
    Each round, pair players according to their scores and then simulate their matches.
    After each round, print the current standings.
    """
    for round_number in range(1, rounds + 1):
        print(f"\n--- Round {round_number} ---")
        pairings = swiss_pairings(players)
        # Simulate all the matches in this round
        for p1, p2 in pairings:
            simulate_match(p1, p2)
        
        # Print the standings after this round
        standings = sorted(players, key=lambda x: (-x.score, x.name))
        print("\nStandings after Round", round_number)
        for player in standings:
            print(player)

if __name__ == "__main__":
    # Create a list of players for the tournament
    player_names = ["Alice", "Bob", "Charlie", "David", "Eva", "May"]
    players = [Player(name) for name in player_names]
    
    # Run the Swiss tournament for 3 rounds
    swiss_tournament(players, rounds=3)


--- Round 1 ---
Alice wins against Bob
David wins against Charlie
May wins against Eva

Standings after Round 1
Alice (Score: 1)
David (Score: 1)
May (Score: 1)
Bob (Score: 0)
Charlie (Score: 0)
Eva (Score: 0)

--- Round 2 ---
David wins against Alice
Bob wins against May
Charlie wins against Eva

Standings after Round 2
David (Score: 2)
Alice (Score: 1)
Bob (Score: 1)
Charlie (Score: 1)
May (Score: 1)
Eva (Score: 0)

--- Round 3 ---
Bob wins against David
Alice wins against Charlie

Standings after Round 3
Alice (Score: 2)
Bob (Score: 2)
David (Score: 2)
Charlie (Score: 1)
May (Score: 1)
Eva (Score: 0)


### Regex Test

In [39]:
import re

data = None
text = "Your verdict:Yes.\nConfidence score:90.\nhhl Reason:The response is correct."
pattern = r"Your verdict:\s*(?P<verdict>Yes|No).?\s*Confidence score:\s*(?P<score>\d+).?"
match = re.search(pattern, text, re.IGNORECASE)
if match:
    data = match.groupdict()

if data:
    print(data)
else:
    print("No match found.")


{'verdict': 'Yes', 'score': '90'}


### ECE Plot

In [79]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def calculate_ece_and_plot(file_path, output_path, type=None):
    """
    Reads JSON files from the given file path, calculates ECE, and saves plots to the output path.

    Args:
        file_path (str): Path to the directory containing JSON files.
        output_path (str): Path to the directory where plots will be saved.
    """
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    files = os.listdir(file_path)
    files = [f for f in files if f.endswith('.json') and (f.startswith('exp_1') or f.startswith('exp_2'))]

    for f in files:
        print(f"Reading file {f}...")
        # Read single file
        df = pd.read_json(os.path.join(file_path, f))
        output = df["verb_conf"]
        bins = np.linspace(0, 100, 10)
        bin_indices = np.digitize(output, bins=bins)
        df["adj_correct"] = df.apply(lambda row: row["y_true"] == row["y_pred"], axis=1)

        bin_acc = {}
        for i, b in enumerate(bin_indices):
            which_bin = 0.1 * np.average([b - 1, b])
            if which_bin in bin_acc:
                bin_acc[which_bin].append(df["adj_correct"].iloc[i])
            else:
                bin_acc[which_bin] = [df["adj_correct"].iloc[i]]

        # Calculate ECE for each bin
        ece = 0
        for key in bin_acc:
            if key > 0.05:
                ece += len(bin_acc[key]) / len(df) * np.abs(np.average(bin_acc[key]) - key)

        print(f"ECE for {f}: {format(ece, '.4f')}\n")

        # Prepare data for plotting
        plt_data = sorted(bin_acc.items())

        # Plot
        plt.figure(figsize=(10, 8))
        plt.title(f"ECE for {f}: {format(ece, '.4f')}\n", fontsize=25)
        plt.plot(np.linspace(0, 1, 20), np.linspace(0, 1, 20), linestyle='dashed')
        plt.scatter([d[0] for d in plt_data], [np.average(d[1]) for d in plt_data],
                    s=[len(d[1]) / len(df) * 10000 for d in plt_data], alpha=0.5, color='red', marker='.')

        plt.xlabel("Confidence Bin", fontsize=20)
        plt.ylabel("Accuracy", fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        plt.grid(True)

        # Save the plot
        if type:
            output_file_name = f"{os.path.splitext(f)[0]}_{type}.png"
        else:
            output_file_name = f"{os.path.splitext(f)[0]}.png"
        output_file = os.path.join(output_path, output_file_name)
        plt.savefig(output_file)
        plt.close()
        print(f"Plot saved to {output_file}")

input_path = './output'
output_path = './eval_output/figures'
calculate_ece_and_plot(input_path, output_path)

Reading file exp_2_2_deepseek-reasoner_critical_fewshot.json...
ECE for exp_2_2_deepseek-reasoner_critical_fewshot.json: 0.2392

Plot saved to ./eval_output/figures/exp_2_2_deepseek-reasoner_critical_fewshot.png
Reading file exp_1_gpt-4o-mini-2024-07-18_critical.json...
ECE for exp_1_gpt-4o-mini-2024-07-18_critical.json: 0.0322

Plot saved to ./eval_output/figures/exp_1_gpt-4o-mini-2024-07-18_critical.png
Reading file exp_1_llama-3.3-70b-instruct_critical.json...
ECE for exp_1_llama-3.3-70b-instruct_critical.json: 0.1912

Plot saved to ./eval_output/figures/exp_1_llama-3.3-70b-instruct_critical.png
Reading file exp_2_2_gpt-4o-mini-2024-07-18_critical_fewshot.json...
ECE for exp_2_2_gpt-4o-mini-2024-07-18_critical_fewshot.json: 0.1016

Plot saved to ./eval_output/figures/exp_2_2_gpt-4o-mini-2024-07-18_critical_fewshot.png
Reading file exp_2_1_o3-mini-2025-01-31_critical.json...
ECE for exp_2_1_o3-mini-2025-01-31_critical.json: 0.4126

Plot saved to ./eval_output/figures/exp_2_1_o3-mini-

In [59]:
from tqdm import tqdm

data = {
    "col_1": [1, 2, 3],
    "col_2": [4, 5, 6],
    "col_3": [2, 3, 4],
}

df = pd.DataFrame(data)
df_size = df.shape[0]

df = df[1:]

for i, each in tqdm(df.iterrows(), total=df_size-1):
    pass
    # print("IN!!")

100%|██████████| 2/2 [00:00<00:00, 8971.77it/s]


In [80]:
# check size of data files
data_files = os.listdir("./data")
data_files = sorted([f for f in data_files if f.endswith('.json')])
for f in data_files:
    # get the length of the file
    file_path = os.path.join("./data", f)
    df = pd.read_json(file_path)
    df_size = df.shape[0]
    print(f"File: {f}, Size: {df_size}")

# check size of result files
res_files = os.listdir("./output")
res_files = sorted([f for f in res_files if f.endswith('.json') and ("o1-mini" in f or "o3-mini" in f)])
for f in res_files:
    # get the length of the file
    file_path = os.path.join("./output", f)
    df = pd.read_json(file_path)
    df_size = df.shape[0]
    print(f"File: {f}, Size: {df_size}")



File: data_exp_1.json, Size: 1074
File: data_exp_2_1.json, Size: 1074
File: data_exp_2_2.json, Size: 1134
File: data_exp_2_3.json, Size: 1074
File: data_exp_3_1.json, Size: 154
File: data_exp_3_1_OLD.json, Size: 18
File: data_exp_3_2.json, Size: 111
File: data_exp_3_2_OLD.json, Size: 18
File: data_exp_4_1.json, Size: 182
File: exp_1_o1-mini-2024-09-12_critical.json, Size: 1075
File: exp_1_o1-mini-2024-09-12_critical_fewshot.json, Size: 1074
File: exp_1_o3-mini-2025-01-31_critical.json, Size: 1075
File: exp_1_o3-mini-2025-01-31_critical_fewshot.json, Size: 1075
File: exp_2_1_o1-mini-2024-09-12_critical.json, Size: 1075
File: exp_2_1_o1-mini-2024-09-12_critical_fewshot.json, Size: 1075
File: exp_2_1_o3-mini-2025-01-31_critical.json, Size: 1075
File: exp_2_1_o3-mini-2025-01-31_critical_fewshot.json, Size: 1075
File: exp_2_2_o1-mini-2024-09-12_critical.json, Size: 1116
File: exp_2_2_o1-mini-2024-09-12_critical_fewshot.json, Size: 1131
File: exp_2_2_o3-mini-2025-01-31_critical.json, Size: 1

In [None]:
import anthropic

client = anthropic.Anthropic(api_key="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")

message = client.messages.create(
    model="claude-3-7-sonnet-20250219",
    max_tokens=1000,
    temperature=1,
    system="You are a world-class poet. Respond only with short poems.",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Why is the ocean salty?"
                }
            ]
        }
    ]
)


Message(id='msg_012pd7H23QATnZ3bSAV8c7Yt', content=[TextBlock(citations=None, text="# Salt Sea\n\nFrom eons of earth's tears and toil,\nAs rivers sweep the weathered soil,\nThey carry minerals to the deep,\nA briny harvest oceans keep.\n\nWhat lands release, the waves embrace—\nIn endless blue, find resting place.", type='text')], model='claude-3-7-sonnet-20250219', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=29, output_tokens=63))

In [92]:
# example code of slicing a dataframe into chunks and saving them into separate files
import pandas as pd
import os


path = "./data"

files = ["data_exp_3_1", "data_exp_3_2"]
# Example DataFrame
df = pd.read_json(os.path.join(path, files[1] + ".json"))

print(len(df))

# # Define the chunk size
chunk_size = 50

# # Output directory
# output_dir = "./chunks"
# os.makedirs(output_dir, exist_ok=True)

# Split the DataFrame into chunks and save each chunk
for i, chunk in enumerate(range(0, len(df), chunk_size)):
    chunk_df = df.iloc[chunk:chunk + chunk_size]
    output_file = os.path.join(path, f"{files[1]}_part_{i+1}.json")
    chunk_df.to_json(output_file, index=False, indent=2, orient="records")
    print(f"Saved {output_file}")



188
Saved ./data/data_exp_3_2_part_1.json
Saved ./data/data_exp_3_2_part_2.json
Saved ./data/data_exp_3_2_part_3.json
Saved ./data/data_exp_3_2_part_4.json
