In [11]:
import pandas as pd

In [12]:
df = pd.read_pickle("data/resmat.pkl")

In [13]:
results = df.columns

print(type(df.columns))

<class 'pandas.core.indexes.multi.MultiIndex'>


In [14]:
df.columns.names

FrozenList(['input.text', 'scenario', 'benchmark'])

In [15]:

scenario = df.columns.get_level_values('scenario').unique().to_list()
benchmark = df.columns.get_level_values('benchmark').unique().to_list()

scenario_benchmark_combinations = df.columns.to_frame()[['scenario', 'benchmark']].drop_duplicates().values.tolist()

In [16]:
tests = pd.DataFrame(scenario_benchmark_combinations, columns=['scenario', 'benchmark'])
tests

Unnamed: 0,scenario,benchmark
0,lsat_qa,classic
1,truthful_qa,classic
2,synthetic_reasoning,classic
3,babi_qa,classic
4,wikifact,classic
5,bbq,classic
6,thai_exam,thaiexam
7,dyck_language_np=3,classic
8,legal_support,classic
9,civil_comments,classic


In [17]:
scenario

['lsat_qa',
 'truthful_qa',
 'synthetic_reasoning',
 'babi_qa',
 'wikifact',
 'bbq',
 'thai_exam',
 'dyck_language_np=3',
 'legal_support',
 'civil_comments',
 'legalbench',
 'raft',
 'air_bench_2024',
 'math',
 'med_qa',
 'gsm',
 'boolq',
 'mmlu',
 'entity_matching',
 'entity_data_imputation',
 'commonsense',
 'imdb']

In [18]:
from sklearn.preprocessing import MultiLabelBinarizer
def add_ability_tags_to_df(df):
    """
    Adds an 'ability_tags' column to a DataFrame based on its 'scenario' column.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'scenario' column.

    Returns:
        pd.DataFrame: The DataFrame with the new 'ability_tags' column.
    """
    # Mapping of dataset names to one or more ability levels.
    ability_mapping = {
        'truthful_qa': ['Foundational Knowledge'],
        'wikifact': ['Foundational Knowledge'],
        'lsat_qa': ['Analytical Reasoning'],
        'synthetic_reasoning': ['Analytical Reasoning'],
        'babi_qa': ['Analytical Reasoning'],
        'bbq': ['Analytical Reasoning'],
        'dyck_language_np=3': ['Analytical Reasoning'],
        'civil_comments': ['Analytical Reasoning'],
        'raft': ['Analytical Reasoning'],
        'math': ['Analytical Reasoning'],
        'gsm': ['Analytical Reasoning'],
        'entity_matching': ['Analytical Reasoning'],
        'entity_data_imputation': ['Analytical Reasoning'],
        'commonsense': ['Analytical Reasoning'],
        'imdb': ['Analytical Reasoning'],
        'legal_support': ['Higher-Order Skills', 'Analytical Reasoning'],
        'legalbench': ['Higher-Order Skills', 'Analytical Reasoning'],
        'air_bench_2024': ['Higher-Order Skills'],
        'med_qa': ['Foundational Knowledge', 'Higher-Order Skills'],
        'boolq': ['Foundational Knowledge', 'Analytical Reasoning'],
        'mmlu': ['Foundational Knowledge', 'Analytical Reasoning', 'Higher-Order Skills'],
        'thai_exam': ['Foundational Knowledge', 'Analytical Reasoning'],
    }

    # Use the .get() method to avoid errors for unclassified scenarios.
    # It will return 'Unclassified' if a scenario is not in our mapping.
    df['ability_tags'] = df['scenario'].apply(
        lambda x: ability_mapping.get(x, ['Unclassified'])
    )
    
    return df

def add_ability_tags_to_df_2d(df):
    """
    Adds a 2D 'ability_tags' column to a DataFrame based on its 'scenario' column.
    """
    # Mapping of scenarios to the two new ability dimensions.
    ability_mapping_2d = {
        # Primarily Knowledge Acquisition (KA)
        'truthful_qa': ['Knowledge Acquisition'],
        'wikifact': ['Knowledge Acquisition'],

        # Primarily Applied Reasoning (AR)
        'lsat_qa': ['Applied Reasoning'],
        'synthetic_reasoning': ['Applied Reasoning'],
        'babi_qa': ['Applied Reasoning'],
        'bbq': ['Applied Reasoning'],
        'dyck_language_np=3': ['Applied Reasoning'],
        'civil_comments': ['Applied Reasoning'],
        'raft': ['Applied Reasoning'],
        'math': ['Applied Reasoning'],
        'gsm': ['Applied Reasoning'],
        'entity_matching': ['Applied Reasoning'],
        'entity_data_imputation': ['Applied Reasoning'],
        'commonsense': ['Applied Reasoning'],
        'imdb': ['Applied Reasoning'],
        'legal_support': ['Applied Reasoning'], # Merged 'Higher-Order' and 'Analytical'
        'legalbench': ['Applied Reasoning'],    # Merged 'Higher-Order' and 'Analytical'
        'air_bench_2024': ['Applied Reasoning'], # Was 'Higher-Order Skills'

        # Requiring both KA and AR
        'med_qa': ['Knowledge Acquisition', 'Applied Reasoning'],
        'boolq': ['Knowledge Acquisition', 'Applied Reasoning'],
        'mmlu': ['Knowledge Acquisition', 'Applied Reasoning'],
        'thai_exam': ['Knowledge Acquisition', 'Applied Reasoning'],
    }

    df['ability_tags'] = df['scenario'].apply(
        lambda x: ability_mapping_2d.get(x, ['Unclassified'])
    )
    
    return df

tests_with_tags = add_ability_tags_to_df_2d(tests)

mlb = MultiLabelBinarizer()

one_hot_encoded_df = pd.DataFrame(
    mlb.fit_transform(tests_with_tags['ability_tags']),
    columns=mlb.classes_,
    index=tests_with_tags.index
)
final_df = pd.concat([tests_with_tags, one_hot_encoded_df], axis=1).reset_index(names=['index'])

In [19]:
final_df

Unnamed: 0,index,scenario,benchmark,ability_tags,Applied Reasoning,Knowledge Acquisition
0,0,lsat_qa,classic,[Applied Reasoning],1,0
1,1,truthful_qa,classic,[Knowledge Acquisition],0,1
2,2,synthetic_reasoning,classic,[Applied Reasoning],1,0
3,3,babi_qa,classic,[Applied Reasoning],1,0
4,4,wikifact,classic,[Knowledge Acquisition],0,1
5,5,bbq,classic,[Applied Reasoning],1,0
6,6,thai_exam,thaiexam,"[Knowledge Acquisition, Applied Reasoning]",1,1
7,7,dyck_language_np=3,classic,[Applied Reasoning],1,0
8,8,legal_support,classic,[Applied Reasoning],1,0
9,9,civil_comments,classic,[Applied Reasoning],1,0


In [20]:
abimap = final_df[['Knowledge Acquisition', 'Applied Reasoning']].to_numpy()
abimap.tofile("data/qmat_2d.npy")
abimap


array([[0, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 1],
       [0, 1],
       [1, 1],
       [1, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1]])

In [21]:
values = df.columns.values.tolist()
sm = []
for v in values:
  sm.append(final_df.loc[final_df['scenario'] == v[1], 'index'].values[0])

In [22]:
import numpy as np

sm_n = np.array(sm, dtype=np.int32)  # Ensure integer type
sm_n.tofile("data/scenario_map_2d.npy")
sm_n  # This will show the integer array

# When loading, specify the dtype to match what was saved
loaded = np.fromfile("data/scenario_map_2d.npy", dtype=np.int32)
loaded

array([ 0,  0,  0, ..., 21, 21, 21], shape=(78712,), dtype=int32)