In [43]:
import pandas as pd

In [44]:
df = pd.read_pickle("data/resmat.pkl")

In [45]:
results = df.columns

print(type(df.columns))

<class 'pandas.core.indexes.multi.MultiIndex'>


In [46]:
df.columns.names

FrozenList(['input.text', 'scenario', 'benchmark'])

In [47]:

scenario = df.columns.get_level_values('scenario').unique().to_list()
benchmark = df.columns.get_level_values('benchmark').unique().to_list()

scenario_benchmark_combinations = df.columns.to_frame()[['scenario', 'benchmark']].drop_duplicates().values.tolist()

In [48]:
tests = pd.DataFrame(scenario_benchmark_combinations, columns=['scenario', 'benchmark'])
tests

Unnamed: 0,scenario,benchmark
0,lsat_qa,classic
1,truthful_qa,classic
2,synthetic_reasoning,classic
3,babi_qa,classic
4,wikifact,classic
5,bbq,classic
6,thai_exam,thaiexam
7,dyck_language_np=3,classic
8,legal_support,classic
9,civil_comments,classic


In [49]:
scenario

['lsat_qa',
 'truthful_qa',
 'synthetic_reasoning',
 'babi_qa',
 'wikifact',
 'bbq',
 'thai_exam',
 'dyck_language_np=3',
 'legal_support',
 'civil_comments',
 'legalbench',
 'raft',
 'air_bench_2024',
 'math',
 'med_qa',
 'gsm',
 'boolq',
 'mmlu',
 'entity_matching',
 'entity_data_imputation',
 'commonsense',
 'imdb']

In [50]:
from sklearn.preprocessing import MultiLabelBinarizer
def add_ability_tags_to_df(df):
    """
    Adds an 'ability_tags' column to a DataFrame based on its 'scenario' column.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'scenario' column.

    Returns:
        pd.DataFrame: The DataFrame with the new 'ability_tags' column.
    """
    # Mapping of dataset names to one or more ability levels.
    ability_mapping = {
        'truthful_qa': ['Foundational Knowledge'],
        'wikifact': ['Foundational Knowledge'],
        'lsat_qa': ['Analytical Reasoning'],
        'synthetic_reasoning': ['Analytical Reasoning'],
        'babi_qa': ['Analytical Reasoning'],
        'bbq': ['Analytical Reasoning'],
        'dyck_language_np=3': ['Analytical Reasoning'],
        'civil_comments': ['Analytical Reasoning'],
        'raft': ['Analytical Reasoning'],
        'math': ['Analytical Reasoning'],
        'gsm': ['Analytical Reasoning'],
        'entity_matching': ['Analytical Reasoning'],
        'entity_data_imputation': ['Analytical Reasoning'],
        'commonsense': ['Analytical Reasoning'],
        'imdb': ['Analytical Reasoning'],
        'legal_support': ['Higher-Order Skills', 'Analytical Reasoning'],
        'legalbench': ['Higher-Order Skills', 'Analytical Reasoning'],
        'air_bench_2024': ['Higher-Order Skills'],
        'med_qa': ['Foundational Knowledge', 'Higher-Order Skills'],
        'boolq': ['Foundational Knowledge', 'Analytical Reasoning'],
        'mmlu': ['Foundational Knowledge', 'Analytical Reasoning', 'Higher-Order Skills'],
        'thai_exam': ['Foundational Knowledge', 'Analytical Reasoning'],
    }

    # Use the .get() method to avoid errors for unclassified scenarios.
    # It will return 'Unclassified' if a scenario is not in our mapping.
    df['ability_tags'] = df['scenario'].apply(
        lambda x: ability_mapping.get(x, ['Unclassified'])
    )
    
    return df

def add_ability_tags_to_df_2d(df):
    """
    Adds a 2D 'ability_tags' column based on a Linguistic vs. Scientific framework.
    """
    # Mapping of scenarios to the two new domain dimensions.
    ability_mapping_2d_domain = {
        # Primarily Linguistic & Logical Reasoning (LLR)
        'truthful_qa': ['Linguistic & Logical Reasoning'],
        'wikifact': ['Linguistic & Logical Reasoning'],
        'lsat_qa': ['Linguistic & Logical Reasoning'],
        'synthetic_reasoning': ['Linguistic & Logical Reasoning'],
        'babi_qa': ['Linguistic & Logical Reasoning'],
        'bbq': ['Linguistic & Logical Reasoning'],
        'dyck_language_np=3': ['Linguistic & Logical Reasoning'],
        'civil_comments': ['Linguistic & Logical Reasoning'],
        'raft': ['Linguistic & Logical Reasoning'],
        'entity_matching': ['Linguistic & Logical Reasoning'],
        'entity_data_imputation': ['Linguistic & Logical Reasoning'],
        'commonsense': ['Linguistic & Logical Reasoning'],
        'imdb': ['Linguistic & Logical Reasoning'],
        'legal_support': ['Linguistic & Logical Reasoning'],
        'legalbench': ['Linguistic & Logical Reasoning'],
        'boolq': ['Linguistic & Logical Reasoning'],

        # Primarily Scientific & Quantitative Reasoning (SQR)
        'math': ['Scientific & Quantitative Reasoning'],
        'gsm': ['Scientific & Quantitative Reasoning'],
        'med_qa': ['Scientific & Quantitative Reasoning'],

        # Requiring both LLR and SQR
        'air_bench_2024': ['Linguistic & Logical Reasoning', 'Scientific & Quantitative Reasoning'],
        'mmlu': ['Linguistic & Logical Reasoning', 'Scientific & Quantitative Reasoning'],
        'thai_exam': ['Linguistic & Logical Reasoning', 'Scientific & Quantitative Reasoning'],
    }

    df['ability_tags'] = df['scenario'].apply(
        lambda x: ability_mapping_2d_domain.get(x, ['Unclassified'])
    )
    
    return df


tests_with_tags = add_ability_tags_to_df_2d(tests)

mlb = MultiLabelBinarizer()

one_hot_encoded_df = pd.DataFrame(
    mlb.fit_transform(tests_with_tags['ability_tags']),
    columns=mlb.classes_,
    index=tests_with_tags.index
)
final_df = pd.concat([tests_with_tags, one_hot_encoded_df], axis=1).reset_index(names=['index'])

In [51]:
final_df

Unnamed: 0,index,scenario,benchmark,ability_tags,Linguistic & Logical Reasoning,Scientific & Quantitative Reasoning
0,0,lsat_qa,classic,[Linguistic & Logical Reasoning],1,0
1,1,truthful_qa,classic,[Linguistic & Logical Reasoning],1,0
2,2,synthetic_reasoning,classic,[Linguistic & Logical Reasoning],1,0
3,3,babi_qa,classic,[Linguistic & Logical Reasoning],1,0
4,4,wikifact,classic,[Linguistic & Logical Reasoning],1,0
5,5,bbq,classic,[Linguistic & Logical Reasoning],1,0
6,6,thai_exam,thaiexam,"[Linguistic & Logical Reasoning, Scientific & ...",1,1
7,7,dyck_language_np=3,classic,[Linguistic & Logical Reasoning],1,0
8,8,legal_support,classic,[Linguistic & Logical Reasoning],1,0
9,9,civil_comments,classic,[Linguistic & Logical Reasoning],1,0


In [52]:
import numpy as np

ability_tags = ['Linguistic & Logical Reasoning', 'Scientific & Quantitative Reasoning']
ability_tags_str = '_'.join(ability_tags).replace(' ', '_')
name_file = f"qmat_2d_{ability_tags_str}"
abimap = final_df[ability_tags].to_numpy()
abimap.tofile(f"data/{name_file}.npy")

# To load it back correctly, you need to reshape it
loaded_abimap = np.fromfile(f"data/{name_file}.npy", dtype=abimap.dtype).reshape(22, 2)
loaded_abimap


array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0]])

In [53]:
values = df.columns.values.tolist()
sm = []
for v in values:
  sm.append(final_df.loc[final_df['scenario'] == v[1], 'index'].values[0])

In [54]:
import numpy as np

sm_n = np.array(sm, dtype=np.int32)  # Ensure integer type
sm_n.tofile(f"data/scenario_map_2d_{ability_tags_str}.npy")
sm_n  # This will show the integer array

# When loading, specify the dtype to match what was saved
loaded = np.fromfile(f"data/scenario_map_2d_{ability_tags_str}.npy", dtype=np.int32)
loaded

array([ 0,  0,  0, ..., 21, 21, 21], shape=(78712,), dtype=int32)