In [35]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina' # Make visualizations look good
%matplotlib inline
#%config InlineBackend.figure_format = 'svg' 
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import pickle as pkl
import dtreeviz
import imodelsx.process_results
from collections import defaultdict
import sys
import numpy as np
import viz
import llm_tree.llm
import imodelsx
import sklearn.tree
from copy import deepcopy
sys.path.append('../experiments/')
results_dir = '../results/feb11/'

def get_exansion_dfs(r):
    dataset_names = list(r.dataset_name.unique())
    dfs = []
    for dataset_name in tqdm(dataset_names):

        row = r[r.dataset_name == dataset_name].iloc[0]
        model = pkl.load(open(join(row.save_dir_unique, 'model.pkl'), 'rb'))

        d = defaultdict(list)
        d_dict = defaultdict(list)
        for i in range(len(model.estimators_)):
            est = model.estimators_[i]
            ks_list = list(est.get_tree_dict_repr().values())
            d['keywords_list'] += ks_list
            d['keyword'] += [ks[0] for ks in ks_list]
            d['keyword_expanded'] += [ks[0] for ks in ks_list if len(ks) > 1]
            for ks in ks_list:
                d_dict[ks[0]].append(ks[1:])

        # add value counts
        df = pd.Series(d['keyword_expanded']).value_counts()[:20].reset_index().rename(
            columns={
                'index': 'Keyword',
                0: '# Expansions',
            }
        )
        df = df[~df['Keyword'].isin(STOPWORDS)]
        def select_expand_with_median_length(k):
            expands = d_dict[k]
            lens = [len(e) for e in expands]
            return expands[np.argsort(lens)[len(lens)//2]]
        def select_expand_with_longest_length(k):
            expands = d_dict[k]
            lens = [len(e) for e in expands]
            return expands[np.argsort(lens)[-1]]
        df.insert(1, 'Example expansion',
                df['Keyword'].apply(lambda k: ', '.join(select_expand_with_longest_length(k))))
        df['Mean expansions'] = df['Keyword'].apply(lambda k: np.mean([len(e) for e in d_dict[k]]))
        df['# Expansion candidates'] = df['Keyword'].apply(lambda k: len(EXPANSION_DICT[k]))
        df.insert(0, 'Dataset', viz.DSETS_RENAME_DICT[dataset_name])
        
        dfs.append(df)
    return dfs

r = imodelsx.process_results.get_results_df(results_dir, use_cached=True)

STOPWORDS = ['the', 'or', 'not', 'too', 'with', 'so', 'be',
             'nt', 'it', 'this', 'and', 'so', 'that', 'are',
             'said', 'from', 'per', 'the movie', 'movie']
EXPANSION_DICT = pkl.load(open('/home/chansingh/llm-tree/experiments/gpt3_cache/base.pkl', 'rb'))
EXPANSION_DICT_RAW = pkl.load(open('/home/chansingh/llm-tree/experiments/gpt3_cache/raw_base.pkl', 'rb'))
r = r[r.n_estimators == 40]
r = r[r.seed == 1]
r = r[r.model_name == 'llm_tree']

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Expansions metadata table

In [None]:
dataset_names = list(r.dataset_name.unique())
dfs = []
d = defaultdict(lambda: defaultdict(list))

for dataset_name in tqdm(dataset_names):
    row = r[r.dataset_name == dataset_name].iloc[0]
    model = pkl.load(open(join(row.save_dir_unique, 'model.pkl'), 'rb'))

    for i in range(len(model.estimators_)):
        est = model.estimators_[i]
        ks_list = list(est.get_tree_dict_repr().values())
        for ks in ks_list:
            keyword = ks[0]
            expansion_candidates = llm_tree.llm.convert_response_to_keywords(EXPANSION_DICT_RAW[keyword], remove_duplicates=False)
            d['\makecell{# Expansion candidates\\\\(before deduplication)}'][keyword].append(len(expansion_candidates))
            d['# Expansion candidates'][keyword].append(len(set(expansion_candidates)))
            d['\makecell{# Expansions\\\\(after screening)}'][keyword].append(len(ks[1:]))
            # d['Fraction with any expansions'][keyword].append(1 if len(ks[1:]) > 1 else 0)

tab = {}
for k, v in d.items():
    mu = np.mean([np.mean(lens) for lens in list(v.values())])
    sem = np.std([np.mean(lens) for lens in list(v.values())]) / np.sqrt(len(v))
    tab[k] = f'{mu:.1f}' + '\err{' + f'{sem:.1f}' + '}'
tab = pd.Series(tab).to_frame().T
tab['\makecell{Expansion relevance\\\\(human)}'] = '0.94\err{0.3}'
tab['\makecell{Expansion relevance\\\\(randomized, human)}'] = '0.15\err{0.4}'
print(tab.round(2)
      .to_latex(index=False, escape=False)
      .replace('#', '\#').replace('_', '\_')
      .replace('lllll', 'rrr|rr'))

# Expansions example table

In [None]:
dfs = get_exansion_dfs(r)

In [None]:
# display df with long strings
with pd.option_context('display.max_colwidth', 100, 'display.max_rows', None):
    d_full = pd.concat(dfs).round(1)
    d_full = d_full[d_full['Example expansion'].apply(lambda x: len(x) > 0)]
    # d_full = d_full.astype(str).apply(lambda x: x.str[:100])
    # d_full = d_full.astype(str)

    # d_full = d_full.set_index('Dataset')
    # drop rows that have duplicate Keyword
    d_full = d_full.drop_duplicates(subset=['Keyword'])
    d_full = d_full[d_full['# Expansions'] > 1]
    d_full = d_full[d_full['Mean expansions'] > 0.5]
    d_full = d_full.groupby('Dataset').head(6)

    # replace repeat entries in dset with empty string
    dset_counts = d_full['Dataset'].value_counts().to_dict()
    dset = [''] * len(d_full)
    dset[0] = dataset_names[0]
    idx = 0
    for i, dataset_name in enumerate(dataset_names):
        dname = viz.DSETS_RENAME_DICT[dataset_names[i]]
        count = dset_counts[viz.DSETS_RENAME_DICT[dataset_name]]
        s = '\\parbox[c]{1mm}{\\multirow{' + str(count) + '}{*}{\\rotatebox[origin=c]{90} {' + dname + '}}}'
        dset[idx] = s
        idx += count
    d_full['Dataset'] = dset 
    

    display(d_full)

    # display(d_full.style.hide(axis='index').to_latex(hrules=True))
    with open('expansions.tex', 'w') as f:
        s = d_full.to_latex(index=False, escape=False).replace('_', '\_').replace('#', '\#').replace('\parbox', '\midrule \n \parbox').replace("Dataset ", "")
        f.write(s)

In [None]:
# display df with long strings
with pd.option_context('display.max_colwidth', 100, 'display.max_rows', None):
    d_full = pd.concat(dfs).round(1)
    d_full = d_full[d_full['Example expansion'].apply(lambda x: len(x) > 0)]
    # d_full = d_full.astype(str).apply(lambda x: x.str[:100])
    # d_full = d_full.astype(str)

    # d_full = d_full.set_index('Dataset')
    # drop rows that have duplicate Keyword
    d_full = d_full.drop_duplicates(subset=['Keyword'])
    d_full = d_full[d_full['# Expansions'] > 1]
    d_full = d_full[d_full['Mean expansions'] > 0.5]
    d_full = d_full.groupby('Dataset').head(2)

    # replace repeat entries in dset with empty string
    dset_counts = d_full['Dataset'].value_counts().to_dict()
    dset = [''] * len(d_full)
    dset[0] = dataset_names[0]
    idx = 0
    for i, dataset_name in enumerate(dataset_names):
        dname = viz.DSETS_RENAME_DICT[dataset_names[i]]
        count = dset_counts[viz.DSETS_RENAME_DICT[dataset_name]]
        # s = '\\parbox[c]{1mm}{\\multirow{' + str(count) + '}{*}{\\rotatebox[origin=c]{90} {' + dname + '}}}'
        s = viz.DSETS_RENAME_ABBREVIATE_DICT[dataset_name]
        dset[idx] = s
        idx += count
    d_full['Dataset'] = dset 
    
    d_full = d_full.drop(columns=['# Expansions', 'Mean expansions', '# Expansion candidates'])
    display(d_full)

    # display(d_full.style.hide(axis='index').to_latex(hrules=True))
    with open('expansions.tex', 'w') as f:
        s = d_full.to_latex(index=False, escape=False).replace('_', '\_').replace('#', '\#').replace('\parbox', '\midrule \n \parbox')
        f.write(s)

# Export table for human scoring

In [None]:
dfs = get_exansion_dfs(r)

In [None]:
d_full = pd.concat(dfs).round(1)
rng = np.random.default_rng(seed=1)

nums = np.random.choice(50, size=30, replace=False)
wrongs = defaultdict(list)
for i in range(30//2):
    wrongs['Keyword'].append(d_full.iloc[nums[2 * i]]['Keyword'])
    wrongs['Example expansion'].append(d_full.iloc[nums[2 * i + 1]]['Example expansion'])
d_wrongs = pd.DataFrame(wrongs)
d_wrongs['Dataset'] = None

d_study = pd.concat([d_full, d_wrongs])
d_study['Wrongs'] = [0] * len(d_full) + [1] * len(d_wrongs)
d_study = d_study.sample(frac=1, random_state=1)
d_study = d_study[['Keyword', 'Example expansion', 'Wrongs', 'Dataset']]
d_study = d_study[d_study['Example expansion'].apply(lambda x: len(x) > 0)]

In [None]:

with pd.option_context('display.max_colwidth', 100, 'display.max_rows', None):
    display(d_study)
    d_study.to_csv('human_study.csv', index=False)

### Compute results

In [36]:
ds_list = []
for i in [1, 2, 3]:
    ds = pd.read_csv(f'subj{i}.csv')
    assert ds.shape[0] == d_study.shape[0]
    ds['Dataset'] = d_study['Dataset'].fillna('Null').values
    ds_list.append(deepcopy(ds))

In [38]:
ds = pd.concat(ds_list)

In [39]:
ds.groupby('Dataset').mean()

  ds.groupby('Dataset').mean()


Unnamed: 0_level_0,Score (1 to 5)
Dataset,Unnamed: 1_level_1
Emotion,4.380952
Financial phrasebank,4.166667
Null,1.311111
Rotten tomatoes,4.433333
SST2,4.571429


In [40]:
ds.groupby('Dataset').sem()

  ds.groupby('Dataset').sem()


Unnamed: 0_level_0,Score (1 to 5)
Dataset,Unnamed: 1_level_1
Emotion,0.159676
Financial phrasebank,0.161835
Null,0.099606
Rotten tomatoes,0.132902
SST2,0.103048
