In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm, trange
import torch
import matplotlib.pyplot as plt
import pickle5 as pickle
import plotly.express as px
import itertools
import argparse
import scipy.stats
import scipy.special as special
from typing import Dict, List, Any, Tuple

from subset_utils import *

In [2]:
seed_everything(42)

### Randomly sample examples

In [None]:
DATASET_NAMES = ["42/cogs"]
METRICS = ["Inverse PPL", "CHIA", "BLEU"]
CRITERIA = ["Easy to Learn", "Ambiguous", "Hard to Learn"]
CONVERGE_EPOCHS = [10, 20]
RATIO = 0.05

for DATASET_NAME, CONVERGE_EPOCH in zip(DATASET_NAMES, CONVERGE_EPOCHS):
    OUTPUTS_PATH = outputs_path(DATASET_NAME)
    idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE)
    df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
    for METRIC in METRICS:
        for CRITERION in CRITERIA:
            print(DATASET_NAME.split("/")[-1], METRIC, CRITERION, "\n")
            idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
            subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, ratio=RATIO, write=False)
            samples = subset_df.sample(1, replace=False)
            for idx in range(len(samples)):
                sample = samples.iloc[idx]
                print(sample[7])
                print(sample[8], "\n")

### Calculate subset statistics

In [None]:
DATASET_NAMES_LIST = [["0/cogs", "42/cogs", "123/cogs"], ["0/cfq", "42/cfq", "123/cfq"]]
METRICS = ["Inverse PPL", "CHIA", "BLEU"]
CRITERIA = ["Easy to Learn", "Ambiguous", "Hard to Learn", "Random"]
CONVERGE_EPOCHS = [10, 20]
RATIO = 0.33

for DATASET_NAMES, CONVERGE_EPOCH in zip(DATASET_NAMES_LIST, CONVERGE_EPOCHS):
    for METRIC in METRICS:
        for CRITERION in CRITERIA:
            DESCRIBE_SUM = None
            for DATASET_NAME in DATASET_NAMES:
                OUTPUTS_PATH = outputs_path(DATASET_NAME)
                idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE)
                df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
                idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
                subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, ratio=RATIO)
                if DESCRIBE_SUM is None:
                    DESCRIBE_SUM = subset_df.describe()
                else:
                    DESCRIBE_SUM += subset_df.describe()
            DESCRIBE_SUM /= len(DATASET_NAMES)
            print(DATASET_NAME.split("/")[-1], METRIC, CRITERION)
            print("Length", f'{DESCRIBE_SUM["In Len"][1]:.2f} / {DESCRIBE_SUM["Out Len"][1]:.2f}')
            print("Rarity", f'{DESCRIBE_SUM["In Rarity"][1]:.2f} / {DESCRIBE_SUM["Out Rarity"][1]:.2f}')

### Read subset lengths for validation

In [None]:
DATASET_NAMES = ["0/cogs", "0/cfq"]
METRICS = ["Inverse PPL", "CHIA", "BLEU"]
CRITERIA = ["Easy to Learn", "Ambiguous", "Hard to Learn", "Random"]
CONVERGE_EPOCHS = [10, 20]
RATIOS = [0.33]

for DATASET_NAME, CONVERGE_EPOCH in zip(DATASET_NAMES, CONVERGE_EPOCHS):
    OUTPUTS_PATH = outputs_path(DATASET_NAME)
    for METRIC in METRICS:
        for CRITERION in CRITERIA:
            for RATIO in RATIOS:
                idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
                subset_df = read_pickle("subsets/" + DATASET_NAME + "/" + idx_fname)
                print(len(subset_df))

### Combine subsets

In [None]:
DATASET_NAME = "cogs"
OUTPUTS_PATH = outputs_path(DATASET_NAME)

METRICS = ["Inverse PPL"]#, "CHIA"]
CRITERIA = ["Hard to Learn", "Ambiguous", "Easy to Learn"] #, "Ambiguous",  "Random"]
COMBINED_CRITERIA = list(itertools.combinations(["Hard to Learn", "Ambiguous", "Easy to Learn"], 2))
RATIOS = [0.5]
CONVERGE_EPOCHS = [10]

for RATIO in RATIOS:
    for CONVERGE_EPOCH in CONVERGE_EPOCHS:
        idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE)
        df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
        for METRIC in METRICS:
            merge_dfs = []
            for CRITERION in CRITERIA:
                idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
                subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, ratio=RATIO)
                merge_df = pd.merge(df, subset_df, on=["Index", "In", "Out", "In abbv.", "Out abbv.", "In Len", "Out Len", "In Rarity", "Out Rarity", \
                                                       'Confidence - Inverse PPL', 'Variability - Inverse PPL', \
                                                        'Confidence - CHIA', 'Variability - CHIA', \
                                                        'Confidence - BLEU', 'Variability - BLEU'], indicator=f"merge_{crit2abv[CRITERION]}", how='outer')
                merge_dfs.append(merge_df)

            merge_df = merge_dfs[0]
            for i in range(1, len(merge_dfs)):
                merge_df = pd.merge(merge_df, merge_dfs[i], on=["Index", "In", "Out", "In abbv.", "Out abbv.", "In Len", "Out Len", "In Rarity", "Out Rarity", \
                                                        'Confidence - Inverse PPL', 'Variability - Inverse PPL', \
                                                        'Confidence - CHIA', 'Variability - CHIA', \
                                                        'Confidence - BLEU', 'Variability - BLEU'], how='outer')
                print(merge_df.columns)

In [None]:
DATASET_NAME = "cfq"
OUTPUTS_PATH = outputs_path(DATASET_NAME)

METRICS = ["Inverse PPL"]#, "CHIA"]
CRITERIA = ["Hard to Learn", "Ambiguous", "Easy to Learn"] #, "Ambiguous",  "Random"]
COMBINED_CRITERIA = list(itertools.combinations(["Hard to Learn", "Ambiguous", "Easy to Learn"], 2))
RATIOS = [0.33, 0.5]
CONVERGE_EPOCHS = [20]

for RATIO in RATIOS:
    for CONVERGE_EPOCH in CONVERGE_EPOCHS:
        idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE)
        df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
        for METRIC in METRICS:
            merge_dfs = []
            for CRITERION in CRITERIA:
                idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
                subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, ratio=RATIO)
                merge_df = pd.merge(df, subset_df, on=["Index", "In", "Out", "In abbv.", "Out abbv.", "In Len", "Out Len", "In Rarity", "Out Rarity", \
                                                       'Confidence - Inverse PPL', 'Variability - Inverse PPL', \
                                                        'Confidence - CHIA', 'Variability - CHIA', \
                                                        'Confidence - BLEU', 'Variability - BLEU'], indicator=f"merge_{crit2abv[CRITERION]}", how='outer')
                merge_dfs.append(merge_df)

            merge_df = merge_dfs[0]
            for i in range(1, len(merge_dfs)):
                merge_df = pd.merge(merge_df, merge_dfs[i], on=["Index", "In", "Out", "In abbv.", "Out abbv.", "In Len", "Out Len", "In Rarity", "Out Rarity", \
                                                        'Confidence - Inverse PPL', 'Variability - Inverse PPL', \
                                                        'Confidence - CHIA', 'Variability - CHIA', \
                                                        'Confidence - BLEU', 'Variability - BLEU'], how='outer')
                print(merge_df.columns)

            merge_df["combined"] = merge_df["merge_ambiguous"].astype(str) + merge_df["merge_easy_to_learn"].astype(str) + merge_df["merge_hard_to_learn"].astype(str)
            plot(merge_df, plot_type="inv_ppl", color_column="combined")

In [None]:
DATASET_NAME = "cfq"
OUTPUTS_PATH = outputs_path(DATASET_NAME)
METRICS = ["BLEU", "Inverse PPL", "CHIA"]
CRITERIA = ["Hard to Learn", "Ambiguous", "Easy to Learn", "Random"]
RATIOS = [0.5]
CONVERGE_EPOCHS = [20]

for RATIO in RATIOS:
    for CONVERGE_EPOCH in CONVERGE_EPOCHS:
        idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE, min_epoch=3)
        df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
        for METRIC in METRICS:
            for CRITERION in CRITERIA:
                idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
                subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, ratio=RATIO)

In [None]:
merge_df["combined"].unique()

In [16]:
DATASET_NAME = "42/cogs"
OUTPUTS_PATH = outputs_path(DATASET_NAME)

METRICS = ["Inverse PPL", "CHIA", "BLEU"]
CRITERIA = ["Hard to Learn", "Easy to Learn", "Ambiguous"]
COMBINED_CRITERIA = list(itertools.combinations(["Hard to Learn", "Ambiguous", "Easy to Learn"], 2))
CONVERGE_EPOCH = 10

for METRIC in METRICS:
    for CRITERIA in COMBINED_CRITERIA:
        subset_dfs = []
        for CRITERION in CRITERIA:
            idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE)
            df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
            idx_fname = create_fname(METRIC, CRITERION, CONVERGE_EPOCH)
            subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, write=False)
            subset_dfs.append(subset_df)
        idx_fname = create_comb_fname(METRIC, CRITERIA[0], CRITERIA[1], CONVERGE_EPOCH)
        combined_set_df = combine_subsets(df, subset_dfs, DATASET_NAME, idx_fname)
        
        print(len(combined_set_df) / len(df))
        desc_df = subset_df.describe()
        #print(METRIC, CRITERION, f'In Len Mean: {desc_df["In Len"][1]}', f'Out Len Mean: {desc_df["Out Len"][1]}', f'In Rar Mean: {desc_df["In Rarity"][1]}', f'Out Rar Mean: {desc_df["Out Rarity"][1]}')

In [18]:
DATASET_NAME = "42/cogs"
OUTPUTS_PATH = outputs_path(DATASET_NAME)

METRICS = ["Inverse PPL", "CHIA", "BLEU"]
CRITERIA = ["Hard to Learn", "Easy to Learn", "Ambiguous"]
COMBINED_CRITERIA = list(itertools.combinations(["Hard to Learn", "Ambiguous", "Easy to Learn"], 2))
CONVERGE_EPOCH = 10

for METRIC in METRICS:
    for CRITERIA in COMBINED_CRITERIA:
            idx_fname = create_comb_fname(METRIC, CRITERIA[0], CRITERIA[1], CONVERGE_EPOCH)
            print(idx_fname, len(list(read_pickle(os.path.join("subsets", DATASET_NAME, idx_fname)))))


# for METRIC in METRICS:
#    for CRITERION in CRITERIA:
#        for RATIO in RATIOS:
#            idx_fname = create_ratio_fname(METRIC, CRITERION, CONVERGE_EPOCH, RATIO)
#            print(idx_fname, len(list(read_pickle(os.path.join("subsets", DATASET_NAME, idx_fname)))))

inv_ppl_hard_to_learn_ambiguous_10.pickle 12078
inv_ppl_hard_to_learn_easy_to_learn_10.pickle 12077
inv_ppl_ambiguous_easy_to_learn_10.pickle 12077
chia_hard_to_learn_ambiguous_10.pickle 12078
chia_hard_to_learn_easy_to_learn_10.pickle 12077
chia_ambiguous_easy_to_learn_10.pickle 12077
bleu_hard_to_learn_ambiguous_10.pickle 12078
bleu_hard_to_learn_easy_to_learn_10.pickle 12077
bleu_ambiguous_easy_to_learn_10.pickle 12077


In [None]:
DATASET_NAME = "42/cfq"
OUTPUTS_PATH = outputs_path(DATASET_NAME)

METRICS = ["Inverse PPL", "CHIA", "BLEU"]
CRITERIA = ["Hard to Learn", "Easy to Learn", "Ambiguous"]
COMBINED_CRITERIA = list(itertools.combinations(["Hard to Learn", "Ambiguous", "Easy to Learn"], 2))
CONVERGE_EPOCH = 20

for METRIC in METRICS:
    for CRITERIA in COMBINED_CRITERIA:
        subset_dfs = []
        for CRITERION in CRITERIA:
            idx_dict, i2s = get_scores(OUTPUTS_PATH, CONVERGE_EPOCH, STRING_TRUNCATE)
            df = calculate_statistics(CONVERGE_EPOCH, idx_dict, i2s)
            idx_fname = create_fname(METRIC, CRITERION, CONVERGE_EPOCH)
            subset_df = choose_subset(df, METRIC, CRITERION, DATASET_NAME, idx_fname, write=False)
            subset_dfs.append(subset_df)
        idx_fname = create_comb_fname(METRIC, CRITERIA[0], CRITERIA[1], CONVERGE_EPOCH)
        combined_set_df = combine_subsets(df, subset_dfs, DATASET_NAME, idx_fname)
        
        print(len(combined_set_df) / len(df))
        desc_df = subset_df.describe()
        #print(METRIC, CRITERION, f'In Len Mean: {desc_df["In Len"][1]}', f'Out Len Mean: {desc_df["Out Len"][1]}', f'In Rar Mean: {desc_df["In Rarity"][1]}', f'Out Rar Mean: {desc_df["Out Rarity"][1]}')