In [None]:
import os
import swifter
import json
from functools import partial
import numpy as np
import pandas as pd
from scipy.stats import wasserstein_distance
import helpers as ph
from termcolor import colored
from helpers import PEW_SURVEY_LIST

In [None]:
DATASET_DIR = './data/human_resp/'
RESULT_DIR = './data/runs'
CONTEXT = "default" # One of ["default", "steer-qa", "steer-bio", "steer-portray"]
OUTPUT_DIR = f'./data/distributions'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [None]:
if CONTEXT == "default":
    SURVEY_LIST = [f'American_Trends_Panel_W{SURVEY_WAVE}' for SURVEY_WAVE in PEW_SURVEY_LIST] + \
                ['Pew_American_Trends_Panel_disagreement_500']
else:
    SURVEY_LIST = ['Pew_American_Trends_Panel_disagreement_500']
    steer_df = pd.read_csv(f'./data/model_input/{CONTEXT}.csv',
                       delimiter='\t')

In [None]:
for SURVEY_NAME in SURVEY_LIST:
    print(colored(SURVEY_NAME, "red"))
    RESULT_FILES = [f for f in os.listdir(RESULT_DIR) if SURVEY_NAME in f and f'context={CONTEXT}' in f]
    
    ## Read human responses and survey info
    info_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'info.csv'))
    info_df['option_ordinal'] = info_df.apply(lambda x: eval(x['option_ordinal']), axis=1)
    info_df['references'] = info_df.apply(lambda x: eval(x['references']), axis=1)

    md_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'metadata.csv'))
    md_df['options'] = md_df.apply(lambda x: eval(x['options']), axis=1)
    md_order = {'Overall': {'Overall': 0}}
    md_order.update({k: {o: oi for oi, o in enumerate(opts)} for k, opts in zip(md_df['key'], md_df['options'])})

    ## Get model opinion distribution
    print(colored('--Getting LM opinion distribution--', 'blue'))
    model_df = ph.get_model_opinions(RESULT_DIR, RESULT_FILES, info_df)
    
    ## Get human opinion distribution
    print(colored('--Getting human opinion distribution--', 'blue'))
    if SURVEY_NAME != "Pew_American_Trends_Panel_disagreement_500":
        resp_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'responses.csv'))
        human_df = pd.concat([ph.extract_human_opinions(resp_df, 
                                                        model_df, 
                                                        md_df, 
                                                        demographic=demographic, 
                                                        wave=int(SURVEY_NAME.split('_W')[1]))
                   for demographic in ph.DEMOGRAPHIC_ATTRIBUTES])
    else:
        if CONTEXT != "default":
            contexts = np.unique(model_df['context'])
            steer_dict = ph.get_steering_group(CONTEXT, steer_df, contexts)
            model_df['steer_attribute'] = model_df.apply(lambda x: steer_dict[x['context']]['attribute'], 
                                                 axis=1)
            model_df['steer_group'] = model_df.apply(lambda x: steer_dict[x['context']]['group'], 
                                                         axis=1)
            steer_groups = list(set(model_df['steer_group'].values)) + ['Overall']

        human_df = []
        for wave in PEW_SURVEY_LIST:
            sn = f'American_Trends_Panel_W{wave}'
            hdf = pd.read_csv(os.path.join(OUTPUT_DIR, f'{sn}_default_human.csv'))
            idf = info_df[info_df['survey'] == f'Pew_{sn}']
            hdf = hdf[hdf['qkey'].isin(idf['key'].values)]
            human_df.append(hdf)
        human_df = pd.concat(human_df)
        if CONTEXT != "default": human_df = human_df[human_df['group'].isin(steer_groups)]
        human_df['D_H'] = human_df.apply(lambda x: [float(f) for f in x['D_H'][1:-1].strip().split(' ') if len(f)], axis=1)
        
        
    human_df.to_csv(os.path.join(OUTPUT_DIR, f'{SURVEY_NAME}_{CONTEXT}_human.csv'))
    model_df.to_csv(os.path.join(OUTPUT_DIR, f'{SURVEY_NAME}_{CONTEXT}_model.csv'))

    ## Combine and save
    print(colored('--Comparing opinion distribution--', 'blue'))
    combined_df = pd.merge(model_df, human_df)
    combined_df['group_order'] = combined_df.apply(lambda x: md_order[x['attribute']][x['group']], axis=1)
    combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'], 
                                                                         x['ordinal'],
                                                                         x['D_M'], x['D_H']) / ph.get_max_wd(x['ordinal']), 
                                          axis=1)

    combined_df.to_csv(os.path.join(OUTPUT_DIR, f'{SURVEY_NAME}_{CONTEXT}_combined.csv'))
    
    if CONTEXT == 'default':
        print(colored('--Getting human baseline--', 'blue'))
        human_overall = human_df[human_df['group'] == 'Overall'].rename(columns={'D_H': 'D_O', 'R_H': 'R_O'})
        human_groups = human_df[human_df['group'] != 'Overall'].rename(columns={'D_H': 'D_G', 'R_H': 'R_G'})
        key_to_ordering = {k: v for k, v in zip(info_df['key'], info_df['option_ordinal'])}
        human_groups['ordinal'] = human_groups.apply(lambda x: key_to_ordering[x['qkey']], axis=1)
        human_groups = pd.merge(human_groups, human_overall, on='qkey')
        human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'], 
                                                                             x['ordinal'],
                                                                             x['D_G'], x['D_O']) / ph.get_max_wd(x['ordinal']), 
                                              axis=1)
        human_groups.to_csv(os.path.join(OUTPUT_DIR, f'{SURVEY_NAME}_baseline.csv'))

[31mAmerican_Trends_Panel_W26[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W26,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W26,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W26,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W26,num_logprobs=100,context=default,num_train_trials=1,model=openai_

  resp_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'responses.csv'))


[34m--Comparing opinion distribution--[0m


Pandas Apply:   0%|          | 0/42732 [00:00<?, ?it/s]

  combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'],


[34m--Getting human baseline--[0m


  human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'],


[31mAmerican_Trends_Panel_W27[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W27,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W27,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W27,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W27,num_logprobs=100,context=default,num_train_trials=1,model=openai_

  resp_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'responses.csv'))


[34m--Comparing opinion distribution--[0m


Pandas Apply:   0%|          | 0/52704 [00:00<?, ?it/s]

  combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'],


[34m--Getting human baseline--[0m


  human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'],


[31mAmerican_Trends_Panel_W29[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W29,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W29,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W29,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W29,num_logprobs=100,context=default,num_train_trials=1,model=openai_

  resp_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'responses.csv'))


[34m--Comparing opinion distribution--[0m


Pandas Apply:   0%|          | 0/42201 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W32[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W32,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W32,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W32,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W32,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/53793 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W34[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W34,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W34,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W34,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W34,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/36468 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W36[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W36,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W36,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W36,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W36,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/76239 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W41[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W41,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W41,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W41,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W41,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/49410 [00:00<?, ?it/s]

  combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'],


[34m--Getting human baseline--[0m


  human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'],


[31mAmerican_Trends_Panel_W42[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W42,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W42,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W42,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W42,num_logprobs=100,context=default,num_train_trials=1,model=openai_

Pandas Apply:   0%|          | 0/70821 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W43[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W43,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W43,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W43,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W43,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/62586 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W45[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W45,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W45,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W45,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W45,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/52155 [00:00<?, ?it/s]

  combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'],


[34m--Getting human baseline--[0m


  human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'],


[31mAmerican_Trends_Panel_W49[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W49,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W49,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W49,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W49,num_logprobs=100,context=default,num_train_trials=1,model=openai_

Pandas Apply:   0%|          | 0/53802 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W50[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W50,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W50,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W50,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W50,num_logprobs=100,context=defa

  resp_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'responses.csv'))


[34m--Comparing opinion distribution--[0m


Pandas Apply:   0%|          | 0/69156 [00:00<?, ?it/s]

  combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'],


[34m--Getting human baseline--[0m


  human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'],


[31mAmerican_Trends_Panel_W54[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W54,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W54,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W54,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W54,num_logprobs=100,context=default,num_train_trials=1,model=openai_

Pandas Apply:   0%|          | 0/63684 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W82[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W82,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W82,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W82,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W82,num_logprobs=100,context=defa

Pandas Apply:   0%|          | 0/57096 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mAmerican_Trends_Panel_W92[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_W92,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W92,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W92,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_W92,num_logprobs=100,context=defa

  resp_df = pd.read_csv(os.path.join(DATASET_DIR, SURVEY_NAME, 'responses.csv'))


[34m--Comparing opinion distribution--[0m


Pandas Apply:   0%|          | 0/42273 [00:00<?, ?it/s]

[34m--Getting human baseline--[0m
[31mPew_American_Trends_Panel_disagreement_500[0m
[34m--Getting LM opinion distribution--[0m
opinions_qa_survey=Pew_American_Trends_Panel_disagreement_500,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande,num_train_trials=1
ai21_j1-grande default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_disagreement_500,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-grande-v2-beta,num_train_trials=1
ai21_j1-grande-v2-beta default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew_American_Trends_Panel_disagreement_500,num_logprobs=10,context=default,num_train_trials=1,model=ai21_j1-jumbo,num_train_trials=1
ai21_j1-jumbo default
----------------------------------------------------------------------------------------------------
opinions_qa_survey=Pew

Pandas Apply:   0%|          | 0/272916 [00:00<?, ?it/s]

  combined_df['WD'] = combined_df.swifter.apply(lambda x: wasserstein_distance(x['ordinal'],


[34m--Getting human baseline--[0m


  human_groups['WD'] = human_groups.apply(lambda x: wasserstein_distance(x['ordinal'],
