In [None]:
import spacy
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from wordfreq import zipf_frequency, tokenize
from statistics import mean
stop_words = set(stopwords.words('english'))
nlp = spacy.load("en_core_web_sm")

def get_target_sent(text):
    if "Answer:\n" in text:
        return text.split("Answer:\n")[1]
    else:
        return text.split("\n")[1]
    
def get_word_freqs(sents, exclude_stopwords=True):
    freqs = []
    for sent in sents:
        if exclude_stopwords:
            freqs.append([zipf_frequency(tok, "en", minimum=0.0) for tok in tokenize(" ".join(get_target_sent(sent).split()[4:]), "en") if tok.lower() not in stop_words])
        else:
            freqs.append([zipf_frequency(tok, "en", minimum=0.0) for tok in tokenize(" ".join(get_target_sent(sent).split()[4:], "en"))])
    freqs = [f for freq in freqs for f in freq if f != 0]
    return freqs

def get_word_lens(sents):
    word_lens = [len(tok) for sent in sents for tok in word_tokenize(" ".join(get_target_sent(sent).split()[4:]))]
    return word_lens

def get_dep_lens(sents):
    dep_lens = []
    for text in sents:
        for token in nlp(sent_tokenize(get_target_sent(text))[0]):
            dep_lens.append(abs(token.i - token.head.i))
    # dep_lens = [dep for dep in dep_lens if dep > 5]
    return dep_lens

def get_sent_lens(sents):
    sent_lens = [len(word_tokenize(sent)) for text in sents for sent in sent_tokenize(get_target_sent(text))[:1]]
    sent_lens = [sent_len for sent_len in sent_lens if sent_len > 3]
    return sent_lens

In [None]:
prompt2id = {
    "simple_grammar": "Syn$\\downarrow$",
    "middle_grammar": "Syn$\\rightarrow$",
    "difficult_grammar": "Syn$\\uparrow$",
    "simple_vocab": "Lex$\\downarrow$",
    "middle_vocab": "Lex$\\rightarrow$",
    "difficult_vocab": "Lex$\\uparrow$",
    "human_like_1": "Task1",
    "human_like_2": "Task2",
    "baseline": "baseline",
}
model_orders = ["Llama-2-7b-chat-hf", "Llama-2-13b-chat-hf", "Llama-2-70b-chat-hf", "falcon-7b-instruct", "falcon-40b-instruct"]

In [None]:
import json
import pandas as pd

df = pd.DataFrame(columns=["model", "prompt", "sents"])

row_list = []
for dir in model_orders:
    if "Llama" in dir:
        baseline = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "baseline", "sents": baseline})
        simple_vocab = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence_using_the_simplest_vocabulary_possible.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "simple_vocab", "sents": simple_vocab})
        middle_vocab = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence_with_a_careful_focus_on_word_choice.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "middle_vocab", "sents": middle_vocab})
        difficult_vocab = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence_using_the_most_difficult_vocabulary_possible.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "difficult_vocab", "sents": difficult_vocab})
        simple_grammar = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_grammatically_simple_sentence_as_much_as_possible.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "simple_grammar", "sents": simple_grammar})
        middle_grammar = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence_with_a_careful_focus_on_grammar.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "middle_grammar", "sents": middle_grammar})
        difficult_grammar = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_grammatically_complex_sentence_as_much_as_possible.--Answer:-.json"))
        row_list.append({"model": dir, "prompt": "difficult_grammar", "sents": difficult_grammar})
        human_like_1 = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence_in_a_human-like_manner._It_has_been_reported_that_human_ability_to_predic.json"))
        row_list.append({"model": dir, "prompt": "human_like_1", "sents": human_like_1})
        human_like_2 = json.load(open(f"../sampled/DC/{dir}/Please_generate_a_sentence._We_are_trying_to_reproduce_human_reading_times_with_the_word_prediction_.json"))
        row_list.append({"model": dir, "prompt": "human_like_2", "sents": human_like_2})
    else:
        baseline = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence:-.json"))
        row_list.append({"model": dir, "prompt": "baseline", "sents": baseline})
        simple_vocab = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_using_the_simplest_vocabulary_possible:-.json"))
        row_list.append({"model": dir, "prompt": "simple_vocab", "sents": simple_vocab})
        middle_vocab = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_with_a_careful_focus_on_word_choice.-.json"))
        row_list.append({"model": dir, "prompt": "middle_vocab", "sents": middle_vocab})
        difficult_vocab = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_using_the_most_difficult_vocabulary_possible:-.json"))
        row_list.append({"model": dir, "prompt": "difficult_vocab", "sents": difficult_vocab})
        simple_grammar = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_to_make_it_as_grammatically_simple_as_possible:-.json"))
        row_list.append({"model": dir, "prompt": "simple_grammar", "sents": simple_grammar})
        middle_grammar = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_with_a_careful_focus_on_grammar.-.json"))
        row_list.append({"model": dir, "prompt": "middle_grammar", "sents": middle_grammar})
        difficult_grammar = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_to_make_it_as_grammatically_complex_as_possible:-.json"))
        row_list.append({"model": dir, "prompt": "difficult_grammar", "sents": difficult_grammar})
        human_like_1 = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence_in_a_human-like_manner._It_has_been_reported_that_human_abili.json"))
        row_list.append({"model": dir, "prompt": "human_like_1", "sents": human_like_1})
        human_like_2 = json.load(open(f"../sampled/DC/{dir}/Please_complete_the_following_sentence._We_are_trying_to_reproduce_human_reading_times_with_the_word.json"))
        row_list.append({"model": dir, "prompt": "human_like_2", "sents": human_like_2})

In [None]:
df = pd.DataFrame(row_list)
df["dep_len"] = df["sents"].apply(lambda x: mean(get_dep_lens(x)))
df["sent_len"] = df["sents"].apply(lambda x: mean(get_sent_lens(x)))
df["word_freq"] = df["sents"].apply(lambda x: mean(get_word_freqs(x, exclude_stopwords=True)))
df["word_len"] = df["sents"].apply(lambda x: mean(get_word_lens(x)))
df["id"] = df["prompt"].apply(lambda x: prompt2id[x])

In [None]:
for prompt in prompt2id.keys():
    print(prompt2id[prompt], end=" ")
    for metric in ["dep_len", "sent_len", "word_freq", "word_len"]:
        for model in model_orders:
            print("& ", end="")
            if metric == "sent_len":
                score = df[(df["prompt"]==prompt) & (df["model"]==model)][metric].apply(lambda x: '{:,.1f}'.format(x)).to_list()[0]
            else:
                score = df[(df["prompt"]==prompt) & (df["model"]==model)][metric].apply(lambda x: '{:,.2f}'.format(x)).to_list()[0]
            print(score, end=" ")
    print("\\\\")

In [None]:
import glob
files = glob.glob("../sampled/DC/Llama-2-70b-chat-hf/*.json")

In [None]:
for file in files:
    # print(file)
    data = json.load(open(file))
    output = data[7]
    if "Answer:\n" in output:
        output = "\n".join([line for line in output.split("\n") if line])
        prompt = output.split("Answer:\n")[0] + "Answer:\n" + " ".join(output.split("Answer:\n")[1].split("\n")[0].split()[:5])
        output = prompt +  " \\textcolor{red}{" + " ".join(output.split("Answer:\n")[1].split()[5:]) + "}\n"
        output = output.replace("\n", "\\\\\n").strip("\n")
        print(output)
        print("\midrule")
