# Exp013: Create prompts for grammar-controlled text generation tasks

In [1]:
import random
import os
import pandas as pd
import sys
sys.path.append('../source')
import models
import data
import api
import evaluation
import importlib
#importlib.reload(data)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[nltk_data] Downloading package punkt to
[nltk_data]     /cluster/scratch/dglandorf/cache...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data]

In [22]:
# helper functions
levels = ["A1", "A2", "B1", "B2", "C1", "C2"] 
level_idx = {level: i for i, level in enumerate(levels)}

def skills(rules, statement=True):
     if statement: return "- " + rules['SubCategory'] + " - " + rules['guideword'] + ": " + rules['Can-do statement']
     return rules['SubCategory'] + " " + rules['guideword']
    
def get_prompt(rules, snippet, negative_rules=None, dialog=True, all=False, level=None, statement=True):
    sep = os.linesep if statement else "; "
    if level:
        constraints = f"grammar skills on CEFR level {level}."
    else: 
        exclude = f"""Do NOT apply the following grammar skills:
    {sep.join(skills(negative_rules, statement=statement))}
""" if negative_rules else ""
        constraints = f"""{"all" if all else "at least one"} of these grammar skills:
{sep.join(skills(rules, statement=statement))}
{exclude}"""
    return f"""Continue the {"dialog with one turn" if dialog else "text with two sentences"}, proving knowledge of {constraints}
Snippet:
{snippet}"""
    
def classify_sents(classifiers, rules, sentences, verbose=True):
    results = []
    for idx, rule in rules.iterrows():
        scores = models.probe_model(classifiers[rule['#']], sentences)
        hits = [sentences[i] for i, score in enumerate(scores[0]) if score > 0.5]
        if verbose & len(hits): print(f'\n{rule["Level"]}-{rule["Can-do statement"]} ({rule["#"]})\n{os.linesep.join(["- "+h for h in hits])}')
        results.append(len(hits)/len(sentences))
    return results

def sample_adjacent(lst, n=3):
    if len(lst) < n: raise ValueError("List must contain at least two items")
    index = random.randint(0, len(lst) - n)
    return lst[index:index+n]
    
def sample_dialog_snippet(dialogs):
    dialog = random.sample(dialogs, 1)[0]
    utterances = sample_adjacent(dialog)
    return os.linesep.join([("A" if (i%2==0) else "B") + ": " + utt for i, utt in enumerate(utterances + [""])])

def prompt_score(rules, snippet, classifiers, dialog, all, n_responses=1, level=None, statement=True, verbose=False):
    prompt = get_prompt(rules, snippet, dialog=dialog, all=all, level=level, statement=statement)
    if verbose: print(prompt)
    responses = [api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0] for _ in range(n_responses)] #, model="gpt-4-0125-preview"
    if verbose: print("RESPONSE from GPT3.5")
    if verbose: print(responses)
    if dialog: print(evaluation.evaluate_responses([snippet], [responses], [list(rules['#'])], [[]]))
    return [classify_sents(classifiers, rules, data.sent_tokenize(response), verbose) for response in responses]

Load EGP rules, topical prompts and dialogs

In [3]:
egp = data.get_egp()
cefr = data.CEFRTexts()
stories = list(cefr.get_beginnings(100))
ds = data.DialogSum()
dialogs = ds.get_dialogues()
classifiers = {nr: models.load_classifier(nr, 'corpus_training') for nr in [int(name.replace(".pth","")) for name in os.listdir(f"../models/corpus_training")]}

## Task 1: Single constraints

In [24]:
def sample_single_constraints(level=None, n_per_subcat=1, subcategories = ["would", "negation", "superlatives"]):
    if level is None: level = random.sample(levels, 1)[0]
    return egp[(egp['SubCategory'].isin(subcategories)) & (egp['Level']==level) & egp['#'].isin(classifiers.keys())].groupby("SubCategory").sample(n_per_subcat)

For Text

In [93]:
snippet = random.sample(stories, 1)[0]
prompt_score(rules, snippet, classifiers, dialog=False, all=True, verbose=True)

Continue the text with two sentences, proving knowledge of all of these grammar skills:
- negation - FORM: MODAL VERB 'CAN': Can use negative form 'can't'.
- would - USE: WISHES AND PREFERENCES WITH 'LIKE': Can use 'would like' to talk about wishes and preferences.

Snippet:

The first chart illustrates the percentage of the population who owned a smartphone from 2011 to 2016, and the second breaks the percentages down by age for 2011 and 2016.
RESPONSE from GPT3.5
However, it is important to note that the data for 2016 is not entirely accurate, as some respondents refused to answer the survey. Therefore, we can't fully rely on the statistics presented in the second chart.

A1-Can use negative form 'can't'. (1177)
- Therefore, we can't fully rely on the statistics presented in the second chart.


[0.5, 0.0]

For Dialog

In [27]:
for i in range(5):
    rules = sample_single_constraints()
    snippet = sample_dialog_snippet(dialogs)
    prompt_score(rules, snippet, classifiers, dialog=True, all=True, verbose=True, n_responses=3)
    print("_" * 100)

Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM/USE: 'NEITHER': Can use 'Neither' or 'Nor' + with 'do' or 'be' + inverted subject to add to a previous related negative clause, to focus on an additional negative factor.
- would - FORM: WITH ADVERBS: Can use a wide range of adverbs with 'would', including 'undoubtedly', 'possibly', 'normally', 'personally', 'eventually', 'obviously', 'significantly', 'inevitably'. ► adverbs

Snippet:
A: What is the difference between a general doctor and a specialist?
B: A general doctor treats a variety of illnesses and will treat your whole family. A specialist concentrates in one area, like the heart or the skin.
A: Then I would like to find a general doctor.
B: 
RESPONSE from GPT3.5
['I neither would seek out a specialist nor would I consider any other option.', 'Neither do I doubt your choice, as they can provide comprehensive care for your entire family.', 'Neither do I doubt that a general doc

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8979591836734694], 'positive_constraints': [[[1.0, 0.0], [1.0, 0.0], [1.0, 1.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[2.0, 4.0, 2.0]], 'Relevance': [[2.0, 2.0, 2.0]], 'Content Richness': [[2.0, 3.0, 2.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}

C2-Can use 'Neither' or 'Nor' + with 'do' or 'be' + inverted subject to add to a previous related negative clause, to focus on an additional negative factor. (1204)
- I neither would seek out a specialist nor would I consider any other option.

C2-Can use 'Neither' or 'Nor' + with 'do' or 'be' + inverted subject to add to a previous related negative clause, to focus on an additional negative factor. (1204)
- Neither do I doubt your choice, as they can provide comprehensive care for your entire family.

C2-Can use 'Neither' or 'Nor' + with 'do' or 'be' + inverted subject to add to a previous related negative clause, to focus on an additional negative factor. (1204)
- Neither do I doubt that

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7555555555555555], 'positive_constraints': [[[1.0, 0.0], [1.0, 0.0], [0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[2.0, 2.0, 2.0]], 'Relevance': [[2.0, 2.0, 4.0]], 'Content Richness': [[2.0, 3.0, 4.0]], 'Grammatical Correctness': [[4.0, 5.0, 5.0]]}

B1-Can use the negative forms of mental process verbs ('I don't think', 'I don't believe') followed by a complement clause, where the negative form is in the mental process verb rather than the complement clause. (1186)
- I really don't think the flight will be delayed.

B1-Can use the negative forms of mental process verbs ('I don't think', 'I don't believe') followed by a complement clause, where the negative form is in the mental process verb rather than the complement clause. (1186)
- I don't think the flight will be delayed, but I wouldn't count on arriving exactly on time.
____________________________________________________________________________________________________
Continue th

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7307692307692307], 'positive_constraints': [[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[3.0, 4.0, 4.0]], 'Grammatical Correctness': [[4.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM: AUXILIARY VERBS 'BE', 'HAVE', PAST: Can form negative statements of main verbs in the past continuous and past perfect with auxiliary verbs 'be' and 'have' + 'not/n't'. ► past continuous ► past perfect
- would - USE: REPORTED SPEECH: Can use 'would' as the past form of 'will' in reported clauses.

Snippet:
A: Dear, can you drive me to the clinic?
B: I'm afraid I can't.
A: Why?
B: 
RESPONSE from GPT3.5
["Because I wouldn't be able to drive you due to my previous appointment.", "Because I wo

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7647058823529411], 'positive_constraints': [[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[3.0, 4.0, 2.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM: MAIN VERB 'BE': Can form negative statements of main verb 'be', with contracted and uncontracted forms.
- would - FORM: AFFIRMATIVE WITH 'LIKE': Can use the affirmative form with 'like'. 

Snippet:
A: But it isn't all right. I made you drop your shopping bags. Let me pick these bags up.
B: You must have been in a great hurry. Never mind.
A: I was only going to catch the bus. I'm afraid I wasn't looking where I was going. I hope nothing was destroyed.
B: 
RESPONSE from GPT3.5
["I don't thin

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.9310344827586207], 'positive_constraints': [[[0.0, 0.0], [0.0, 0.0], [0.0, 0.3333333333333333]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 2.0, 4.0]], 'Content Richness': [[4.0, 3.0, 4.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}

A1-Can use the affirmative form with 'like'.  (616)
- And besides, I would like to thank you for offering to help.
____________________________________________________________________________________________________


## Task 2: Combine subcategories

Choose subcategory and level skills

In [31]:
def sample_subcategory_rules(subcategories = ["would", "negation", "superlatives"], levels = ["B1", "B2", "B1"]):
    filter_clf = (egp['#'].isin(classifiers.keys()))
    return pd.concat([egp[(egp['SubCategory'] == subcat) & (egp['Level']==level) & filter_clf] for subcat, level in zip(subcategories, levels)])

Assemble prompt for a random story beginning, test it with GPT3.5 and use classifiers to score the response

In [17]:
rules = sample_subcategory_rules()
snippet = random.sample(stories, 1)[0]
prompt_score(rules, snippet, classifiers, dialog=False, all=False, verbose=True, statement=False)

Continue the text with two sentences, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN THE PAST; negation FORM/USE: 'NOT', EMPHASIS; negation FORM/USE: 'NEVER', INVERTED FRONT POSITION, FOCUS; negation FORM/USE: 'NEITHER ... NOR'

Snippet:
Nobody knows which came first: the economic crisis tearing Greece apart or shisha, the drug now known as the “cocaine of the poor”.
RESPONSE from GPT3.5
If the economic crisis had not brought such devastation to Greece, perhaps the popularity of shisha would not have reached such heights. Would you happen to know where one can find reliable statistics on the correlation between the two phenomena?

B1-Can use 'would' in the

[0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0]

Do this with dialogs

In [34]:
for i in range(5):
    rules = sample_subcategory_rules()
    snippet = sample_dialog_snippet(dialogs)
    prompt_score(rules, snippet, classifiers, dialog=True, all=False, verbose=True, statement=False, n_responses=3)
    print("_" * 100)

Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN THE PAST; negation FORM/USE: 'NOT', EMPHASIS; negation FORM/USE: 'NEVER', INVERTED FRONT POSITION, FOCUS; negation FORM/USE: 'NEITHER ... NOR'

Snippet:
A: I didn't sleep a wink last night.
B: Did you have something on your mind? You look so concerned! Maybe I can help you.
A: Well, I'm under a lot of pressure. My boss is very pushy. He assigned me two projects. Now the deadlines are near and I still haven't finish one of them.
B: 
RESPONSE from GPT3.5
['If I were you, I would talk to your boss and explain the situation. Maybe he could give you an extension.', 'If you would l

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7164179104477612], 'positive_constraints': [[[0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[5.0, 5.0, 5.0]]}

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- If I were you, I would talk to your boss and explain the situation.

B1-Can use 'would' with verbs such as 'advise', 'imagine', 'recommend', 'say' to be less direct. (632)
- If I were you, I would talk to your boss and explain the situation.

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the co

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.45161290322580644], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[2.0, 3.0, 2.0]], 'Grammatical Correctness': [[5.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.948051948051948], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[5.0, 4.0, 4.0]]}

B1-Can use an limited range of adverbs with 'would', including 'really', 'probably', 'certainly', 'definitely'.► adverbs (629)
- Well, if I may say so, you would definitely excel in a leadership role with your innovative ideas and strategic thinking.

B1-Can use an limited range of adverbs with 'would', including 'really', 'probably', 'certainly', 'definitely'.► adverbs (629)
- If you don't mind me saying, I think your ideas would definitely take our marketing efforts to the next level.
___________________________

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7777777777777778], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 3.0, 4.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN 

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8636363636363636], 'positive_constraints': [[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 3.0, 3.0]], 'Grammatical Correctness': [[5.0, 4.0, 5.0]]}

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- If you would consider budgeting wisely, you could save a lot of money in the long run.

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- If you start saving now, you would see the benefits in the future.

B1-Can use question forms. (628)
- Would you

## Task 3: All skills from one difficulty level

In [36]:
def sample_level_rules(level = None):
    if level is None: level = random.sample(levels, 1)[0]
    return level, egp[(egp['Level']==level) & (egp['#'].isin(classifiers.keys()))]

In [37]:
level, rules = sample_level_rules()
snippet = random.sample(stories, 1)[0]
prompt_score(rules, snippet, classifiers, dialog=False, all=False, verbose=True, level=level)

Continue the text with two sentences, proving knowledge of grammar skills on CEFR level C1.
Snippet:
On December 7, 1941, the United States of America was attacked by the Imperial Japanese Navy at Pearl Harbor, Hawaii.
RESPONSE from GPT3.5
["This event led to the United States' entry into World War II, marking a turning point in the conflict and drastically changing the course of history. The surprise attack resulted in significant damage to the US Pacific Fleet and the loss of thousands of lives."]


[[0.0, 0.0, 0.0, 0.0]]

In [39]:
for i in range(5):
    level, rules = sample_level_rules()
    snippet = sample_dialog_snippet(dialogs)
    prompt_score(rules, snippet, classifiers, dialog=True, all=False, verbose=True, level=level, n_responses=3)
    print("_" * 100)

Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level B1.
Snippet:
A: That is what tornadoes sound like. They sound like trains. They're very loud.
B: But if you hear them coming, isn't it already too late?
A: Maybe. It depends on the tornado. Some can move across the ground at 200 miles an hour. That is very fast. Others aren't so fast.
B: 
RESPONSE from GPT3.5
['Yes, I heard that the speed of a tornado can vary greatly depending on its size and strength.', 'I heard that tornadoes can leave a path of destruction behind them.', "Yes, I've heard that tornadoes can vary in speed depending on their size and strength. I once read that the Enhanced Fujita Scale is used to measure their intensity."]


Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7936507936507936], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 3.0, 4.0]], 'Grammatical Correctness': [[5.0, 4.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level C2.
Snippet:
A: I appreciate it very much. You've been doing great. I expect to see you be promoted pretty soon.
B: that's very nice of you to say so. I'll try my best. When will you start at the new position?
A: next week.
B: 
RESPONSE from GPT3.5
["I'm happy to hear that. I will make s

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.9615384615384616], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 3.0]], 'Grammatical Correctness': [[4.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level C2.
Snippet:
A: Hi, Peter. I'm afraid I can't make it to dinner tomorrow night.
B: Oh, why not?
A: It's just I'm not feeling well. I think I'm coming down with a cold or something.
B: 
RESPONSE from GPT3.5
['I can completely understand, health should always come first.', "I'm really sorry to hear that. Make sure to get plenty of rest and stay hydrated.", "That's too bad. I hope you feel better soon."]


Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [1.0], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 2.0]], 'Grammatical Correctness': [[5.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level C2.
Snippet:
A: I don't have a good camera. How am I going to take nice pictures of the butterflies? They are so colorful. I want to get in really close so everyone can see the details.
B: Our photography lab loans cameras to all students enrolled in my class. Don't worry. Just show your student ID and they will give you one.
A: What if I lose it? Or break it?
B: 
RESPONSE from GPT3.5
['If you happen to lose or break the camera, you will be responsible for covering the cost of repair or replac

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8392857142857143], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[5.0, 4.0, 4.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level A1.
Snippet:
A: No, there isn't. They are all vacuumed.
B: I wonder how old these things are, thousands of years?
A: Yeah, they all come from a very ancient time.
B: 
RESPONSE from GPT3.5
['I think they might be relics from ancient civilizations.', 'How interesting! They must hold a lot of history.', 'So they must be very valuable.']


Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [1.0], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[5.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
