# Exp013: Create constraints and prompts for grammar-controlled text generation tasks

In [8]:
import random
import os
from dotenv import load_dotenv
load_dotenv()
import pandas as pd
import sys
sys.path.append('../source')
import models
import data
import api
import evaluation
import importlib
#importlib.reload(data)

In [45]:
# helper functions
levels = ["A1", "A2", "B1", "B2", "C1", "C2"] 
level_idx = {level: i for i, level in enumerate(levels)}

def skills(rules, statement=True):
     if statement: return "- " + rules['SubCategory'] + " - " + rules['guideword'] + ": " + rules['Can-do statement']
     return rules['SubCategory'] + " " + rules['guideword']
    
def get_prompt(rules, snippet, negative_rules=None, dialog=True, all=False, level=None, statement=True):
    sep = os.linesep if statement else "; "
    if level:
        constraints = f"grammar skills on CEFR level {level}."
    else: 
        exclude = f"""Do NOT apply the following grammar skills:
    {sep.join(skills(negative_rules, statement=statement))}
""" if negative_rules else ""
        constraints = f"""{"all" if all else "at least one"} of these grammar skills:
{sep.join(skills(rules, statement=statement))}
{exclude}"""
    return f"""Continue the {"dialog with one turn" if dialog else "text with two sentences"}, proving knowledge of {constraints}
Snippet:
{snippet}"""
    
def classify_sents(classifiers, rules, sentences, verbose=True):
    results = []
    for idx, rule in rules.iterrows():
        scores = models.probe_model(classifiers[rule['#']], sentences)
        hits = [sentences[i] for i, score in enumerate(scores[0]) if score > 0.5]
        if verbose & len(hits): print(f'\n{rule["Level"]}-{rule["Can-do statement"]} ({rule["#"]})\n{os.linesep.join(["- "+h for h in hits])}')
        results.append(len(hits)/len(sentences))
    return results

def sample_adjacent(lst, n=4):
    if len(lst) < n+1: return False
    index = random.randint(0, len(lst) - n)
    return lst[index:index+n]
    
def sample_dialog_snippet(dialogs, n=4):
    dialog = random.sample(dialogs, 1)[0]
    utterances = sample_adjacent(dialog, n=n+1)
    while not utterances:
        dialog = random.sample(dialogs, 1)[0]
        utterances = sample_adjacent(dialog, n=n+1)
    return os.linesep.join([("A" if (i%2==0) else "B") + ": " + utt for i, utt in enumerate(utterances[:-1] + [""])]), utterances[-1]

def prompt_score(rules, snippet, classifiers, dialog, all, n_responses=1, level=None, statement=True, verbose=False):
    prompt = get_prompt(rules, snippet, dialog=dialog, all=all, level=level, statement=statement)
    if verbose: print(prompt)
    responses = [api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0] for _ in range(n_responses)] #, model="gpt-4-0125-preview"
    if verbose: print("RESPONSE from GPT3.5")
    if verbose: print(responses)
    if dialog: print(evaluation.evaluate_responses([snippet], [responses], [list(rules['#'])], [[]]))
    return [classify_sents(classifiers, rules, data.sent_tokenize(response), verbose) for response in responses]

Load EGP rules, topical prompts and dialogs

In [50]:
egp = data.get_egp()
cefr = data.CEFRTexts()
stories = list(cefr.get_beginnings(100))
ds = data.DialogSum()
dd = data.DailyDialog()
wow = data.WoW()
dialogs = ds.get_dialogues() + dd.get_dialogues() + wow.get_dialogues()
classifiers = {nr: models.load_classifier(nr, 'corpus_training') for nr in [int(name.replace(".pth","")) for name in os.listdir(f"../models/corpus_training")]}

General parameters for datasets
1. text vs dialog -> dialog
2. context length -> 4 turns (trade off between context length and informativeness)

Dev datasets: Wizard of Wikipedia, Daily Dialog, Dialog Sum
Test datasets: CMUDoG

- Should the dialog contexts be as diverse as possible, i.e. different for each datapoint or the same set of dialog contexts included in every combination of parameters?
- Is it reasonable to work with a fixed number of turns both in training and test data?
- Is it okay to include no or only an unconditioned ground truth answer in train and test set if all metrics are reference-free?

## Task 1: Single constraints

In [46]:
def sample_single_constraints(level=None, n_per_subcat=1, subcategories = ["would", "negation", "superlatives"]):
    if level is None: level = random.sample(levels, 1)[0]
    return egp[(egp['SubCategory'].isin(subcategories)) & (egp['Level']==level) & egp['#'].isin(classifiers.keys())].groupby("SubCategory").sample(n_per_subcat)

For Text

In [47]:
snippet = random.sample(stories, 1)[0]
rules = sample_single_constraints()
prompt_score(rules, snippet, classifiers, dialog=False, all=True, verbose=True)

Continue the text with two sentences, proving knowledge of all of these grammar skills:
- negation - FORM: AUXILIARY VERB 'DO', PAST: Can form negative statements of main verbs in the past simple with 'didn't' + main verb. ► past simple
- superlatives - FORM/USE: COMPLEX NOUN PHRASES: Can form a limited range of complex noun phrases with a superlative adjective  + prepositional phrase, to talk about something unique.► noun phrases ►  clauses: comparison
- would - USE: SUGGESTIONS WITH 'IT WOULD BE': Can use 'it would be' to make suggestions.

Snippet:
Andy Murray's first match since undergoing back surgery in September ended in a straight sets defeat to Jo-Wilfried Tsonga at an exhibition tournament in Abu Dhabi Thursday.
RESPONSE from GPT3.5
["However, despite the loss, Murray didn't disappoint his fans with his performance on the court. Some would argue that it was his strongest showing since his surgery."]

A2-Can form negative statements of main verbs in the past simple with 'didn'

[[0.5, 0.0, 0.0]]

For Dialog

In [51]:
for i in range(5):
    rules = sample_single_constraints()
    snippet, _ = sample_dialog_snippet(dialogs)
    prompt_score(rules, snippet, classifiers, dialog=True, all=True, verbose=True, n_responses=3)
    print("_" * 100)

Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM: MODAL VERB 'CAN': Can use negative form 'can't'.
- superlatives - FORM: 'MY BEST FRIEND': Can use the irregular superlative adjective 'best' in the phrase 'my best friend'.
- would - USE: INVITATIONS WITH 'LIKE': Can use 'would like to invite' to make invitations.

Snippet:
A: Do you have any plans for tomorrow night?
B: Not really. I wasn thinking of maybe going to a movie. Do you want to go with me?
A: Sure. What movies were you thinking about going to?
B: Have you heard of Hot Fuzz?
A: 
RESPONSE from GPT3.5
["Yes, I have! I've heard it's one of the best comedies ever made. My best friend would like to invite you to see it with us tomorrow night.", "Yes, I have! I heard it's the best movie of all time. My best friend saw it and said it was amazing. Would you like to see it tomorrow night?", "Yes, I've heard of it. It's one of my best friend's favorite movies. I would like to invite

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.6952380952380952], 'positive_constraints': [[[0.0, 0.3333333333333333, 0.3333333333333333], [0.0, 0.25, 0.0], [0.0, 0.3333333333333333, 0.3333333333333333]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[2.0, 4.0, 3.0]], 'Relevance': [[2.0, 4.0, 4.0]], 'Content Richness': [[3.0, 4.0, 4.0]], 'Grammatical Correctness': [[4.0, 5.0, 4.0]]}

A1-Can use the irregular superlative adjective 'best' in the phrase 'my best friend'. (57)
- My best friend would like to invite you to see it with us tomorrow night.

A1-Can use 'would like to invite' to make invitations. (617)
- My best friend would like to invite you to see it with us tomorrow night.

A1-Can use the irregular superlative adjective 'best' in the phrase 'my best friend'. (57)
- My best friend saw it and said it was amazing.

A1-Can use the irregular superlative adjective 'best' in the phrase 'my best friend'. (57)
- It's one of my best friend's favorite movies.

A1-Can use 'would like to invite' t

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.6481481481481481], 'positive_constraints': [[[0.0, 0.5], [0.0, 0.0], [0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[4.0, 4.0, 5.0]]}

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- If it were a spinoff, the story would focus on one specific character or plotline from the original series.
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM/USE: 'NEVER', INVERTED FRONT POSITION, FOCUS: Can use 'never' in front position followed by an inverted subject (most commonly 'I') + main verb, to give focus. 
- would - USE: HABITUAL PAST: Can use 'would' to talk about habitu

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.9534883720930233], 'positive_constraints': [[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 1.0]], 'Relevance': [[2.0, 2.0, 1.0]], 'Content Richness': [[2.0, 2.0, 1.0]], 'Grammatical Correctness': [[4.0, 4.0, 2.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM/USE: 'NEITHER ... NOR': Can use 'neither ... nor' to connect two words, phrases or clauses, often to give emphasis or focus.
- would - USE: HABITUAL PAST: Can use 'would' to talk about habitual actions and events in the past.

Snippet:
A: NO!
B: I haven't even told you what it is yet!
A: Okay, okay, what do you want?
B: Do you think I could borrow the car? I'm going to a concert tonight.
A: 
RESPONSE from GPT3.5
["I'm sorry, but neither the car nor my motorcycle is available tonight. And, I

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.9], 'positive_constraints': [[[0.5, 0.0], [0.0, 0.0], [0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[2.0, 2.0, 2.0]], 'Relevance': [[2.0, 2.0, 2.0]], 'Content Richness': [[4.0, 2.0, 4.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}

B2-Can use 'neither ... nor' to connect two words, phrases or clauses, often to give emphasis or focus. (1194)
- I'm sorry, but neither the car nor my motorcycle is available tonight.
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM: 'NOT ALL', 'NOT EVERY': Can use 'not with indefinite pronouns 'everyone' and 'everything' and determiners 'every', 'all'.
- would - FORM: WITH ADVERBS: Can use an increasing range of adverbs with 'would', including 'strongly', 'easily', 'especially', 'actually', 'absolutely', 'gladly'  ► adverbs

Snippet:
A: I feel totally

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.9361702127659575], 'positive_constraints': [[[1.0, 0.0], [1.0, 0.0], [1.0, 1.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[2.0, 2.0, 2.0]], 'Relevance': [[2.0, 2.0, 2.0]], 'Content Richness': [[2.0, 2.0, 2.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}

C1-Can use 'not with indefinite pronouns 'everyone' and 'everything' and determiners 'every', 'all'. (1197)
- Not everyone has time to go ten times to the spa, so I'll pass on the coupon.

C1-Can use 'not with indefinite pronouns 'everyone' and 'everything' and determiners 'every', 'all'. (1197)
- Not everyone can use them though, not all students are eligible.

C1-Can use 'not with indefinite pronouns 'everyone' and 'everything' and determiners 'every', 'all'. (1197)
- Not all of us would actually be able to use them, so maybe next time.

C1-Can use an increasing range of adverbs with 'would', including 'strongly', 'easily', 'especially', 'actually', 'absolutely', 'gladly'  ► adverbs (637

Single constraints parameters
1. number of single EGP skill constraints -> 1-6 (unreasonable to have more than six constraints in one answer)
2. number of subcategories to choose constraints from -> 1-3 (cost of developing more classifiers)
3. CEFR levels -> A1-C2

dataset format

CTX_SOURCE_DS | CTX_SOURCE_ID | CONTEXT | RESPONSE | EGP_NRS
Daily Dialog | 023 | A: Hey B: Hey A: How are you? | B: I'm good | 616,636,1199

In [49]:
subcats = ["would", "negation", "superlatives"]
num_constraints = list(range(1,1+6))
num_subcats = list(range(1,1+3))
num_dialogs = 2
random.seed(os.getenv("RANDOM_SEED"))
#dd = data.DialogSum()
#dd = data.DailyDialog()
#dd = data.WoW()
dd = data.CMUDoG()
dialogs = dd.get_dialogues()
dialog_contexts = [sample_dialog_snippet(dialogs, n=4) for _ in range(num_dialogs)]

for context, response in dialog_contexts:
    print(f"Context: {context}")
    print(f"True response: {response}")
    for num_constraint in num_constraints:
        for num_subcat in num_subcats:
            for level in levels:
                pass
                # add to dataframe

Context: A: I do too! Bruce Almighty was hilarious
B: Yes it was, he doesn't disappointed, I love his movies, including Dick and Jane and Truman Show!
A: also has quite a few other big name actors and actresses like Freeman and Aniston!
B: Fun with Dick and Jane is one of his best in my opinion.
A: 
True response: he is talented for sure, like to get into character
Context: A: I wonder what other powers she has
B: yeah but she turns everything she touches to ice - thats scary :)
A: and she has no control over it!
B: haha that is scary
A: 
True response: I guess that means no sex


## Task 2: Combine subcategories

Choose subcategory and level skills

In [52]:
def sample_subcategory_rules(subcategories = ["would", "negation", "superlatives"], levels = ["B1", "B2", "B1"]):
    filter_clf = (egp['#'].isin(classifiers.keys()))
    return pd.concat([egp[(egp['SubCategory'] == subcat) & (egp['Level']==level) & filter_clf] for subcat, level in zip(subcategories, levels)])

Assemble prompt for a random story beginning, test it with GPT3.5 and use classifiers to score the response

In [53]:
rules = sample_subcategory_rules()
snippet = random.sample(stories, 1)[0]
prompt_score(rules, snippet, classifiers, dialog=False, all=False, verbose=True, statement=False)

Continue the text with two sentences, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN THE PAST; negation FORM/USE: 'NOT', EMPHASIS; negation FORM/USE: 'NEVER', INVERTED FRONT POSITION, FOCUS; negation FORM/USE: 'NEITHER ... NOR'

Snippet:
Some people stood on the small pier.
RESPONSE from GPT3.5
['If they had known about the storm, they would not have gone sailing. Instead, they would have stayed safely on land.']

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- If they had known about the storm, they would not have gone sailing.

B1-Can use 'would

[[0.5, 1.0, 0.5, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0]]

Do this with dialogs

In [54]:
for i in range(5):
    rules = sample_subcategory_rules()
    snippet = sample_dialog_snippet(dialogs)
    prompt_score(rules, snippet, classifiers, dialog=True, all=False, verbose=True, statement=False, n_responses=3)
    print("_" * 100)

Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN THE PAST; negation FORM/USE: 'NOT', EMPHASIS; negation FORM/USE: 'NEVER', INVERTED FRONT POSITION, FOCUS; negation FORM/USE: 'NEITHER ... NOR'

Snippet:
("A: Yes I believe so, because it is a type of walking and all forms have health benefits. I think this also contributes too the popularity of hiking.\nB: What kind of equipment do you need to go on a hike?\nA: I think you would need some food and some mountain traversal gear like a pick axe. There are also apps like hike messenger that help you communicate while hiking.\nB: Thanks for the tips i'll keep that in mind for my n

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8947368421052632], 'positive_constraints': [[[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 3.0, 4.0]], 'Content Richness': [[4.0, 3.0, 3.0]], 'Grammatical Correctness': [[4.0, 4.0, 5.0]]}

B1-Can use question forms. (628)
- B: Would you also recommend bringing a first aid kit in case of any emergencies while hiking?

B1-Can use question forms. (628)
- B: Would you say it's better to hike with a partner rather than alone for safety reasons?

B1-Can use question forms. (628)
- B: Would you recommend that I bring a map and compass to ensure I stay on the right track?

B1-Can use 'would' to make polite requests, often in the fixed expression 'would you mind'. (633)
- B: Would you recommend that I bring

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8809523809523809], 'positive_constraints': [[[0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- If I were in their shoes, I would do everything in my power to ensure my child's well-being.

B1-Can use 'would' with verbs such as 'advise', 'imagine', 'recommend', 'say' to be less direct. (632)
- If I were in their shoes, I would do everything in my power to ensure my child's well-being.
___________________________________________________________________

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8345323741007195], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 3.0, 3.0]], 'Relevance': [[4.0, 3.0, 2.0]], 'Content Richness': [[4.0, 3.0, 3.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}

B1-Can use 'would' in the main clause of a conditional sentence to talk about an imagined situation, often in the context of advice or opinion-giving. (625)
- B: If I were in your position, I would listen to my parents' advice and consider keeping your day job at Wal-Mart.

B1-Can use 'would' with verbs such as 'advise', 'imagine', 'recommend', 'say' to be less direct. (632)
- B: If I were in your position, I would listen to my parents' advice and consider keeping your day job at Wal

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8833333333333333], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[4.0, 4.0, 4.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
would FORM/USE: AFTER 'IF' CLAUSES; would FORM: PAST AFFIRMATIVE; would FORM: PAST NEGATIVE; would FORM: QUESTIONS; would FORM: WITH ADVERBS; would USE: FUTURE IN THE PAST; would USE: IMAGINED SITUATIONS IN THE PAST; would USE: INDIRECTNESS; would USE: POLITE REQUESTS; would USE: REPORTED SPEECH; would USE: WILLINGNESS IN 

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8717948717948718], 'positive_constraints': [[[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 3.0]], 'Relevance': [[2.0, 4.0, 2.0]], 'Content Richness': [[3.0, 4.0, 3.0]], 'Grammatical Correctness': [[4.0, 5.0, 4.0]]}

B1-Can use question forms. (628)
- B: Would you mind if I take the lead this time?

B1-Can use 'would' to make polite requests, often in the fixed expression 'would you mind'. (633)
- B: Would you mind if I take the lead this time?

B1-Can use question forms. (628)
- B: Would you mind showing me the steps again?

B1-Can use 'would' to make polite requests, often in the fixed expression 'would you mind'. (633)
- B: Would you mind showing me the steps again?

B1-Can use 'would' in the main clause of a conditional s

Subcategory constraints parameters
1. number of subcategories to choose constraints from -> 1-3 (cost of developing more classifiers)
2. CEFR levels -> A1-C2
3. maximum difference between CEFR levels per category

dataset format

CTX_SOURCE_DS | CTX_SOURCE_ID | CONTEXT | RESPONSE | SUBCATS x LEVELS
Daily Dialog | 023 | A: Hey B: Hey A: How are you? | B: I'm good | would_B2, negation_C1, superlatives_B1

## Task 3: All skills from one difficulty level

In [36]:
def sample_level_rules(level = None):
    if level is None: level = random.sample(levels, 1)[0]
    return level, egp[(egp['Level']==level) & (egp['#'].isin(classifiers.keys()))]

In [37]:
level, rules = sample_level_rules()
snippet = random.sample(stories, 1)[0]
prompt_score(rules, snippet, classifiers, dialog=False, all=False, verbose=True, level=level)

Continue the text with two sentences, proving knowledge of grammar skills on CEFR level C1.
Snippet:
On December 7, 1941, the United States of America was attacked by the Imperial Japanese Navy at Pearl Harbor, Hawaii.
RESPONSE from GPT3.5
["This event led to the United States' entry into World War II, marking a turning point in the conflict and drastically changing the course of history. The surprise attack resulted in significant damage to the US Pacific Fleet and the loss of thousands of lives."]


[[0.0, 0.0, 0.0, 0.0]]

In [39]:
for i in range(5):
    level, rules = sample_level_rules()
    snippet = sample_dialog_snippet(dialogs)
    prompt_score(rules, snippet, classifiers, dialog=True, all=False, verbose=True, level=level, n_responses=3)
    print("_" * 100)

Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level B1.
Snippet:
A: That is what tornadoes sound like. They sound like trains. They're very loud.
B: But if you hear them coming, isn't it already too late?
A: Maybe. It depends on the tornado. Some can move across the ground at 200 miles an hour. That is very fast. Others aren't so fast.
B: 
RESPONSE from GPT3.5
['Yes, I heard that the speed of a tornado can vary greatly depending on its size and strength.', 'I heard that tornadoes can leave a path of destruction behind them.', "Yes, I've heard that tornadoes can vary in speed depending on their size and strength. I once read that the Enhanced Fujita Scale is used to measure their intensity."]


Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.7936507936507936], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 3.0, 4.0]], 'Grammatical Correctness': [[5.0, 4.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level C2.
Snippet:
A: I appreciate it very much. You've been doing great. I expect to see you be promoted pretty soon.
B: that's very nice of you to say so. I'll try my best. When will you start at the new position?
A: next week.
B: 
RESPONSE from GPT3.5
["I'm happy to hear that. I will make s

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.9615384615384616], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 3.0]], 'Grammatical Correctness': [[4.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level C2.
Snippet:
A: Hi, Peter. I'm afraid I can't make it to dinner tomorrow night.
B: Oh, why not?
A: It's just I'm not feeling well. I think I'm coming down with a cold or something.
B: 
RESPONSE from GPT3.5
['I can completely understand, health should always come first.', "I'm really sorry to hear that. Make sure to get plenty of rest and stay hydrated.", "That's too bad. I hope you feel better soon."]


Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [1.0], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 2.0]], 'Grammatical Correctness': [[5.0, 5.0, 5.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level C2.
Snippet:
A: I don't have a good camera. How am I going to take nice pictures of the butterflies? They are so colorful. I want to get in really close so everyone can see the details.
B: Our photography lab loans cameras to all students enrolled in my class. Don't worry. Just show your student ID and they will give you one.
A: What if I lose it? Or break it?
B: 
RESPONSE from GPT3.5
['If you happen to lose or break the camera, you will be responsible for covering the cost of repair or replac

Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [0.8392857142857143], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[5.0, 4.0, 4.0]]}
____________________________________________________________________________________________________
Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level A1.
Snippet:
A: No, there isn't. They are all vacuumed.
B: I wonder how old these things are, thousands of years?
A: Yeah, they all come from a very ancient time.
B: 
RESPONSE from GPT3.5
['I think they might be relics from ancient civilizations.', 'How interesting! They must hold a lot of history.', 'So they must be very valuable.']


Contexts:   0%|          | 0/1 [00:00<?, ?it/s]

Responses:   0%|          | 0/3 [00:00<?, ?it/s]

{'Distinctiveness': [1.0], 'positive_constraints': [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]], 'negative_constraints': [[[], [], []]], 'Appropriateness': [[4.0, 4.0, 4.0]], 'Relevance': [[4.0, 4.0, 4.0]], 'Content Richness': [[4.0, 4.0, 4.0]], 'Grammatical Correctness': [[5.0, 5.0, 5.0]]}
____________________________________________________________________________________________________


Difficulty level constraints parameters
1. CEFR levels -> A1-C2

dataset format

CTX_SOURCE_DS | CTX_SOURCE_ID | CONTEXT | RESPONSE | LEVEL
Daily Dialog | 023 | A: Hey B: Hey A: How are you? | B: I'm good | B1