In [126]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import itertools

In [127]:
with open('eleuther_eval_jobs.tsv', 'r') as f:
    df1 = pd.read_csv(f, sep='\t', index_col=None)

df1['eval_method'] = 'eleuther'
with open('composer_eval_jobs.tsv', 'r') as f:
    df2 = pd.read_csv(f, sep='\t', index_col=None)

df2['eval_method'] = 'composer'

df = pd.concat([df1, df2])

In [128]:
df = df.dropna()
graph_partition_cols = ['eval_method', 'parameters-icl_tasks', ] # diff values will be on diff graphs
line_partition_cols = ['parameters-model'] # diff values will be diff lines

graph_partition_vals = {
    k: set(df[k]) for k in graph_partition_cols
}
line_partition_vals = {
    k: set(df[k]) for k in line_partition_cols
}
x_col = 'gpu_num'
y_col = 'run_time'

In [129]:
def partition_graphs(df, graph_partition_vals):
    keys, vals = graph_partition_vals.keys(), graph_partition_vals.values()
    
    dataset_graph_partitions = []
    for combo in itertools.product(*vals):
        filter_string = ""
        
        df_part = {}
        df_part['choices'] = {}
        
        for col_name, col_val in zip(keys, combo):
            filter_string += f"`{col_name}` == '{col_val}' and"
            df_part['choices'][col_name] = col_val
            
        filter_string = filter_string[:-4]
        df_subset = df.query(filter_string)
        df_part['df'] = df_subset
        dataset_graph_partitions.append(df_part)
    
    return dataset_graph_partitions



graph_partitions = partition_graphs(df, graph_partition_vals)

In [130]:
def partition_lines(df, line_partition_vals):
    keys, vals = line_partition_vals.keys(), line_partition_vals.values()
    dataset_line_partitions = []
    for combo in itertools.product(*vals):
        filter_string = ""
        
        df_part = {}
        df_part['choices'] = {}
        
        for col_name, col_val in zip(keys, combo):
            filter_string += f"`{col_name}` == '{col_val}' and"
            df_part['choices'][col_name] = col_val
            
        filter_string = filter_string[:-4]
        df_subset = df.query(filter_string)
        df_part['df'] = df_subset
        dataset_line_partitions.append(df_part)
    return dataset_line_partitions 

def make_title(choices):
    return f"{choices['eval_method']}-based eval, {choices['parameters-icl_tasks'].upper()}"


for idx, graph_info in enumerate(graph_partitions):
    choices = graph_info['choices']
    print(choices)
    line_partitions = partition_lines(graph_info['df'], line_partition_vals)
    legend = []
    for line in line_partitions:
        legend.append(', '.join(line['choices'].values()))
        plt.plot(line['df'][x_col],line['df'][y_col])
    
    
    plt.legend(legend)


    plt.title(make_title(choices))
    plt.xlabel('Number of GPUs')
    plt.ylabel('Run time (seconds)')
    plt.savefig(f"fig{idx}.png", )
    plt.clf()


{'eval_method': 'composer', 'parameters-icl_tasks': 'lambada'}
{'eval_method': 'composer', 'parameters-icl_tasks': 'piqa'}
{'eval_method': 'eleuther', 'parameters-icl_tasks': 'lambada'}
{'eval_method': 'eleuther', 'parameters-icl_tasks': 'piqa'}


<Figure size 640x480 with 0 Axes>