In [None]:
try:
    import binutil
except ModuleNotFoundError:
    import bin.binutil

from dreamcoder.program import *
from dreamcoder.domains.relation import *
from dreamcoder.domains.relation.relation_primitives import *

get_baseline_primitives()
get_clevr_primitives()
get_clevr_primitives_unconfounded()

In [None]:
# read txt file
mode = "test"
domain = "kandinsky"
seed = 2
file = f"../consoleOutputs/{domain}/eval_json/{seed}/{mode}_programs.out"


with open(file, "r") as f:
    data = f.read()

if mode == "train":
    data = data.split("\n")
    data = [line for line in data if line != ""]
    task_programs = {}
    task_flag = False
    for line in data:
        if task_flag:
            program = line.split("\t")[1]
            task_programs[task_id] = program
            task_flag = False
        if "task" in line:
            task_flag = True
            task_id = line
    NUMBER_TEST_TASKS = len(task_programs)
    print("Number of all tasks: ", len(task_programs))
else:
    data = data.split("\n")
    NUMBER_TEST_TASKS = len(data)
    print("Number of all tasks: ", len(data))
    task_programs = {}
    for task in data:
        if not "HIT" in task:
            continue
        task = task.split("w/")
        name = task[0].replace(" ", "")
        name = name.replace("HIT", "")
        program = task[1].split(";")[0][1:]
        task_programs[name] = program

In [None]:
print(len(task_programs))
task_programs

cba = 89.67
cba_50 = cba * (len(task_programs) / NUMBER_TEST_TASKS) + (
    (1 - len(task_programs) / NUMBER_TEST_TASKS) * 50
)
print("cba 50: ", cba_50)
print("ration solved tasks: ", len(task_programs), "/", NUMBER_TEST_TASKS)
print("percent solved tasks: ", len(task_programs) / NUMBER_TEST_TASKS)

In [None]:
import pandas as pd
import json
import os
from tqdm import tqdm


path = f"../data/curi/confounded/train/query"

json_files = [f.path for f in os.scandir(path) if f.path.endswith(".json")]

task_results = []

# iterate over tasks
for task_name in tqdm(task_programs.keys()):
    task_file = path + "/" + task_name + ".json"
    try:
        f = open(task_file)
        examples = json.load(f)
    except:
        print("task not found: ", task_name)

    parsed_examples = []

    true_positives = 0
    false_positives = 0
    true_negatives = 0
    false_negatives = 0

    for example in examples:
        input = example["input"]
        output = example["output"]

        # execute program for input
        program = task_programs[task_name]
        program = Program.parse(program)
        try:
            program_output = program.evaluate([])(input)
        except:
            continue

        # categorize prediction
        if output:
            if program_output:
                true_positives += 1
            else:
                false_negatives += 1
        else:
            if program_output:
                false_positives += 1
            else:
                true_negatives += 1

    # collect results
    results = {
        "task_name": task_name,
        "TP": true_positives,
        "FP": false_positives,
        "TN": true_negatives,
        "FN": false_negatives,
    }
    task_results.append(results)


results_df = pd.DataFrame(task_results)
results_df

In [None]:
results_df["Accuracy"] = (results_df["TP"] + results_df["TN"]) / (
    results_df["TP"] + results_df["TN"] + results_df["FP"] + results_df["FN"]
)
results_df["CBA"] = (
    (results_df["TP"] / (results_df["TP"] + results_df["FN"]))
    + (results_df["TN"] / (results_df["TN"] + results_df["FP"]))
) / 2
results_df["Precision"] = results_df["TP"] / (results_df["TP"] + results_df["FP"])
# get number of tasks with accuracy not nan
len(results_df[~results_df["Accuracy"].isna()])

In [None]:
results_df.sort_values(by="CBA", ascending=True)

In [None]:
print(" ", mode)
# get mean accuracy
print(results_df["Accuracy"].mean())
# get mean CBA
print(results_df["CBA"].mean())
# get precision
print(results_df["Precision"].mean())

In [None]:
print("kandinsky ", seed)
cba = results_df["CBA"].mean()
print("Class balanced accuracy (solved): ", results_df["CBA"].mean())
cba_50 = cba * (len(task_programs) / NUMBER_TEST_TASKS) + (
    (1 - (len(task_programs) / NUMBER_TEST_TASKS)) * 0.50
)
print("Class balanced accuracy (all): ", cba_50)

In [None]:
results_df["CBA"].mean() * (len(results_df) / 100)
len(results_df)

In [None]:
# save dataframe
results_df.to_csv(f"../experimentOutputs/kandinsky/kandinsky_{mode}_image_{seed}.csv")

In [None]:
# get @all accuracies
import pandas as pd
import numpy as np

domain = "clevr"
mode = "test"
NUMBER_TEST_TASKS = 100

results = []
for seed in range(3):
    results_df = pd.read_csv(
        f"../experimentOutputs/{domain}/kandinsky_image_{seed}.csv"
    )
    results.append(results_df)

mean_accs = [df["CBA"].mean() for df in results]
print(mean_accs)

cba_all = [0, 0, 0]
for seed in range(3):
    cba_all[seed] = mean_accs[seed] * (len(results[seed]) / NUMBER_TEST_TASKS) + 0.5 * (
        1 - (len(results[seed]) / NUMBER_TEST_TASKS)
    )

mean_accs = np.array(cba_all) * 100
print(mean_accs)
mean_acc = np.mean(mean_accs)
std = np.std(mean_accs)
print(round(mean_acc, 2), round(std, 2))

In [None]:
# get @solved accuracies
import pandas as pd
import numpy as np

results = []
for seed in range(3):
    results_df = pd.read_csv(
        f"../experimentOutputs/{domain}/kandinsky_image_{seed}.csv"
    )
    results.append(results_df)

mean_accs = [df["CBA"].mean() for df in results]
print(mean_accs)
mean_accs = np.array(mean_accs) * 100
mean_acc = np.mean(mean_accs)
std = np.std(mean_accs)
print(round(mean_acc, 2), round(std, 2))