In [1]:
import os
import pandas as pd

In [19]:
# @title Helper Functions


def convert_yes_df_to_wide(df, save_dir, save_fname, verified = True, matrix_completion = False):
    '''This function converts a dataframe to wide format. Fill the missing values with 0 and rest with 1'''
    if not matrix_completion:
        if verified:
            # add a column 'Response' with value 'Yes'
            df['Response'] = 1
             # the concept names are in the column 'Concept', the feature names are in the column 'Feature', the response is in the column 'Response'
            df_wide = df.pivot_table(index='Concept', columns='Feature', values='Response', aggfunc='first').fillna(0)
            df_wide.to_csv(os.path.join(save_dir,save_fname))
        else:
            print('unverified')
            # the concept names are in the column 'Concept', the feature names are in the column 'Feature', the response is in the column 'Response'
            df_wide = df.pivot_table(index='Concept', columns='Feature', values='Yes/No', aggfunc='first').fillna(0)
            df_wide.to_csv(os.path.join(save_dir,save_fname))
    return df_wide

def preprocess_flan_responses_on_leuven(df):
    ''' This function preprocesses the responses from the flan model on the Leuven dataset'''
    # replace 'mask>' in the column 'response' with 'No'
    df['response'] = df['response'].str.replace('mask>', 'No')
    # replave 'True' and 'Yes' with 1
    df['response'] = df['response'].str.replace('True', '1')
    df['response'] = df['response'].str.replace('Yes', '1')
    # replace 'False' and 'No' with 0
    df['response'] = df['response'].str.replace('False', '0')
    df['response'] = df['response'].str.replace('No', '0')
    # convert the column 'response' to int
    df['response'] = df['response'].astype(int)
    return df

def convert_flan_df_to_wide(df):
    '''This function converts a dataframe to wide format'''
    # the concept names are in the column 'concept', the feature names are in the column 'feature', the response is in the column 'response'
    df_wide = df.pivot_table(index='concept', columns='feature', values='response', aggfunc='first').fillna(0)
    return df_wide


# ICLR

In [56]:
save_dir = '../data/leuven/flan'
flan_responses = pd.read_csv(os.path.join(save_dir, 'flan_leuven_prompts_answers.csv'))
flan_responses = preprocess_flan_responses_on_leuven(flan_responses)
flan_responses_wide = convert_flan_df_to_wide(flan_responses)
flan_responses_wide.to_csv(os.path.join(save_dir, '../flan_leuven_norms.csv'))

In [58]:
save_dir = '../data/leuven/flan'
flan_responses = pd.read_csv(os.path.join(save_dir, 'flan_leuven_broad_prompt.csv'))
flan_responses = preprocess_flan_responses_on_leuven(flan_responses)
flan_responses_wide = convert_flan_df_to_wide(flan_responses)
flan_responses_wide.to_csv(os.path.join(save_dir, '../flan_leuven_norms_broad_prompt.csv'))