# Exp013: Create prompts for generating grammar-controlled text

In [4]:
import random
import os
import pandas as pd
import sys
sys.path.append('../source')
import models
import data
import api
import importlib
#importlib.reload(data)

In [36]:
# helper functions
levels = ["A1", "A2", "B1", "B2", "C1", "C2"] 
level_idx = {level: i for i, level in enumerate(levels)}

def skills(rules):
    return "- " + rules['SubCategory'] + " - " + rules['guideword'] + ": " + rules['Can-do statement']
    
def get_prompt(rules, snippet, negative_rules=None, dialog=True, all=False):
    exclude = f"""Do NOT apply the following grammar skills:
{os.linesep.join(skills(negative_rules))}""" if negative_rules else ""
    return f"""Continue the {"dialog with one turn" if dialog else "text with two sentences"}, proving knowledge of {"all" if all else "at least one"} of these grammar skills:
{os.linesep.join(skills(rules))}
{exclude}
Snippet:
{snippet}"""

def get_sentences(prompt, response):
    init_sent = prompt[prompt.rfind(":")+1:].strip()
    preamble = (init_sent + " " if not init_sent[-1]=="." else "") if len(init_sent) else ""
    return [preamble + data.sent_tokenize(response)[0].strip()] + data.sent_tokenize(response)[1:]
    
def classify_sents(classifiers, rules, sentences):
    for level, skill, (nr, classifier) in zip(rules['Level'], rules['Can-do statement'], classifiers.items()):    
        scores = models.probe_model(classifiers[nr], sentences)
        hits = [sentences[i] for i, score in enumerate(scores[0]) if score > 0.5]
        if len(hits): print(f'\n{level}-{skill} ({nr})\n{os.linesep.join(["- "+h for h in hits])}')

def sample_adjacent(lst, n=3):
    if len(lst) < n: raise ValueError("List must contain at least two items")
    index = random.randint(0, len(lst) - n)
    return lst[index:index+n]
    
def get_dialog_snippet(utterances):
    return os.linesep.join([("A" if (i%2==0) else "B") + ": " + utt for i, utt in enumerate(utterances + [""])])

def prompt_score(snippet):
    prompt = get_prompt(rules, level, snippet)
    print(f"{snippet} (total words in prompt: {len(prompt.split(' '))})")
    response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0]
    print(response)
    sentences = get_sentences(prompt, response)
    classify_sents(classifiers, rules, sentences)
    

In [3]:
egp = data.get_egp()

In [6]:
egp['SubCategory'].nunique()

86

Load EGP rules, topical prompts and dialogs

In [30]:
egp = data.get_egp()
cefr = data.CEFRTexts()
stories = list(cefr.get_beginnings(100))
ds = data.DialogSum()
existing_classifiers = [int(name.replace(".pth","")) for name in os.listdir(f"../models/corpus_training")]
classifiers = {nr: models.load_classifier(nr, 'corpus_training') for nr in existing_classifiers}

## Task 1: Single constraints

In [24]:
subcategories = ["would", "negation", "superlatives"]
level = "A2"
n_per_subcat=1
rules = egp[(egp['SubCategory'].isin(subcategories)) & (egp['Level']==level)].groupby("SubCategory").sample(n_per_subcat)

In [25]:
snippet = random.sample(stories, 1)[0]
prompt = get_prompt(rules, snippet, dialog=False, all=True)
print(prompt)
response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0] #, model="gpt-4-0125-preview"
print("RESPONSE")
print(response)

Continue the text with two sentences, proving knowledge of all of these grammar skills:
- negation - FORM: AUXILIARY VERBS 'BE', 'HAVE', PRESENT: Can form negative statements of main verbs in the present continuous and present perfect with 'be' and 'have' + 'not/n't'.  ► present continuous ► present perfect
- superlatives - FORM: WITH '-EST' : Can form superlative adjectives using the '-est' suffix, with adjectives of one syllable, and with two-syllable adjectives ending in -y.
- would - FORM: QUESTIONS WITH 'LIKE': Can use the question form 'would you like'.

Snippet:
Sharon was five years old. Her best friend was Pam. They played games together. They played all kinds
RESPONSE
 of games. Sharon liked to play hide and seek the most. Pam was the fastest runner in their neighborhood. Sharon would always ask Pam to be the seeker because she knew Pam would find her quickly.


In [26]:
dialog = random.sample(ds.get_dialogues(), 1)[0]
utterances = sample_adjacent(dialog)
snippet = get_dialog_snippet(utterances)
prompt = get_prompt(rules, snippet, dialog=True, all=True)
print(prompt)
response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0] # , model="gpt-4-0125-preview"
print(response)

Continue the dialog with one turn, proving knowledge of all of these grammar skills:
- negation - FORM: AUXILIARY VERBS 'BE', 'HAVE', PRESENT: Can form negative statements of main verbs in the present continuous and present perfect with 'be' and 'have' + 'not/n't'.  ► present continuous ► present perfect
- superlatives - FORM: WITH '-EST' : Can form superlative adjectives using the '-est' suffix, with adjectives of one syllable, and with two-syllable adjectives ending in -y.
- would - FORM: QUESTIONS WITH 'LIKE': Can use the question form 'would you like'.

Snippet:
A: Yes, sometimes it happens, Ma'am. That's why it is always good to carry your confirmation number. I'm sorry about the inconvenience.
B: It's okay. It's probably my husband's fault.Is there a pool in this hotel?
A: Yes, of course.The pool is open until 10:00 p. m. So you still have about 45 minutes.
B: 
Great! Thanks for letting me know. Is it possible to have the superlative room available with the best view of the pool?

## Task 2: Combine subcategories

Choose subcategory and load necessary classifiers

In [27]:
subcategories = ["would", "negation", "superlatives"]
levels = ["A2", "B1", "B1"]
rules = pd.concat([egp[(egp['SubCategory'] == subcat) & (egp['Level']==level)] for subcat, level in zip(subcategories, levels)])

Assemble prompt for a random story beginning, test it with GPT3.5 and use classifiers to score the response

In [39]:
snippet = random.sample(stories, 1)[0]
prompt = get_prompt(rules, snippet)
print(prompt)
response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0]
print(response)
sentences = get_sentences(prompt, response)
classify_sents(classifiers, rules, sentences)
print("X" * 75)

Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
- would - FORM: AFFIRMATIVE: Can use the affirmative form.
- would - FORM: NEGATIVE: Can use the negative forms.
- would - FORM: QUESTIONS WITH 'LIKE': Can use the question form 'would you like'.
- would - USE: IMAGINED SITUATIONS: Can use 'would' to talk about imagined situations.
- would - USE: SUGGESTIONS WITH 'IT WOULD BE': Can use 'it would be' to make suggestions.
- would - USE: WISHES AND PREFERENCES : Can use 'would' with a wide range of verbs to talk about wishes and preferences.
- negation - FORM: AUXILIARY VERBS 'BE', 'HAVE', PAST: Can form negative statements of main verbs in the past continuous and past perfect with auxiliary verbs 'be' and 'have' + 'not/n't'. ► past continuous ► past perfect
- negation - FORM: MENTAL PROCESS VERBS + CLAUSE: Can use the negative forms of mental process verbs ('I don't think', 'I don't believe') followed by a complement clause, where the negative f

Do this with dialogs

In [40]:
dialog = random.sample(ds.get_dialogues(), 1)[0]
utterances = sample_adjacent(dialog)
snippet = get_dialog_snippet(utterances)
prompt = get_prompt(rules, snippet)
print(prompt)
response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0]
print(response)
sentences = get_sentences(prompt, response)
classify_sents(classifiers, rules, sentences)
print("X" * 75)

Continue the dialog with one turn, proving knowledge of at least one of these grammar skills:
- would - FORM: AFFIRMATIVE: Can use the affirmative form.
- would - FORM: NEGATIVE: Can use the negative forms.
- would - FORM: QUESTIONS WITH 'LIKE': Can use the question form 'would you like'.
- would - USE: IMAGINED SITUATIONS: Can use 'would' to talk about imagined situations.
- would - USE: SUGGESTIONS WITH 'IT WOULD BE': Can use 'it would be' to make suggestions.
- would - USE: WISHES AND PREFERENCES : Can use 'would' with a wide range of verbs to talk about wishes and preferences.
- negation - FORM: AUXILIARY VERBS 'BE', 'HAVE', PAST: Can form negative statements of main verbs in the past continuous and past perfect with auxiliary verbs 'be' and 'have' + 'not/n't'. ► past continuous ► past perfect
- negation - FORM: MENTAL PROCESS VERBS + CLAUSE: Can use the negative forms of mental process verbs ('I don't think', 'I don't believe') followed by a complement clause, where the negative f

## Task 3: Difficulty

In [42]:
def get_level_prompt(level, snippet, dialog=True, all=False):
    return f"""Continue the {"dialog with one turn" if dialog else "text with two sentences"}, proving knowledge of grammar skills on CEFR level {level}.

Snippet:
{snippet}"""

In [44]:
snippet = random.sample(stories, 1)[0]
prompt = get_level_prompt("A2", snippet)
print(prompt)
response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0]
print(response)
sentences = get_sentences(prompt, response)
classify_sents(classifiers, rules, sentences)
print("X" * 75)

Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level A2

Snippet:
A great American tradition dating back to the birth of the country is the board game night. This is where
 friends and family gather to play games like Monopoly, Scrabble, and Clue. It's a fun way to spend time together and build relationships.
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX


In [45]:
dialog = random.sample(ds.get_dialogues(), 1)[0]
utterances = sample_adjacent(dialog)
snippet = get_dialog_snippet(utterances)
prompt = get_level_prompt("A2", snippet)
print(prompt)
response = api.get_openai_chat_completion([{ "role": "user", "content": prompt}])[0]
print(response)
sentences = get_sentences(prompt, response)
classify_sents(classifiers, rules, sentences)
print("X" * 75)

Continue the dialog with one turn, proving knowledge of grammar skills on CEFR level A2

Snippet:
A: Oh, no. I'm sure I made a reservation for a first-class ticket.
B: Let me check. Now, your ticket is in order. Thank you for calling to reconfirm.
A: That's what I should do. And I don't want to lose it.
B: 
Don't worry, everything is in order. Just make sure to have your ticket with you when you board the train.
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
