# Dataset Generation with new PROMPT


Prompt: Context + Instruction + Question (+ Intent) (+ Examples) + Response Format

prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n{answer}</s>”

Task 1: context = You will receive a problem in first-order predicate logic to solve. This problem contains statements about the world (let us call it a “world model”) and a mapping from things in this world to variables representing these things (we call that mapping the “keys”). You are then asked to provide a formula in first-order predicate logic that is either satisfied or unsatisfied. Please return only the formula, written in the nltk python library’s format.Question = World model: … . \n Keys: … . Please provide a formula in first-order predicate logic that is {satisfied}.Answer = {formula}

Task 2: context = You will receive a problem in first-order predicate logic to solve. This problem contains a formula in first-order predicate logic, written in the format of the nltk python library for which you are asked to create “world model” and “keys” that either satisfy or do not satisfy the formula. Here, a world model is a set of statements about a simple world that only talks about persons and adjectives that are either true or not true of these persons, e.g. “George is blunt”. Keys are a mapping from things in this world to variables representing these things, e.g. “a: George”. Please return the world model with “World model:”, and after a new line the keys with “Keys:”. Always map persons to lower case characters of the alphabet and adjectives to upper case characters of the alphabet.Question = Consider the following formula: all x.F(x). Provide a world model and keys that {satisfy} the formula.Answer = {World model: … . \n Keys: … .}

Task 3: context = You will receive a problem in first-order predicate logic to solve. This problem contains a formula in first-order predicate logic, written in the nltk python library’s format, a set of statements about a world (“world model”) and a mapping from things in this world to variables representing these things (“keys”). Please return in one word whether the formula is “satisfied” or “unsatisfied” given the world model and keys.Question = Consider the following formula in first-order predicate logic: {formula}. World Model: … .Keys: … .Is the provided formula “satisfied” or “unsatisfied” given the world model and keys?Answer = {satisfied}

Also:

Bisher:

Here is a world model:  Violet is Conservative. George is Conservative. Waverly is not Conservative. Maximiliano is Conservative. Deacon is Conservative. Ashlynn is Conservative. Cheyenne is not Conservative. Jackson is Conservative. Let us interpret predicates and names as follows: F: Conservative. q: Violet. l: George. n: Waverly. j: Maximiliano. s: Deacon. d: Ashlynn. m: Cheyenne. p: Jackson.. Provide a formula in first-order predicate logic that is unsatisfied given the above situation and interpretation (keys).

Neu:

Context:

You will receive a problem in first-order predicate logic to solve. This problem contains statements about the world (let us call it a “world model”) and a mapping from things in this world to variables representing these things (we call that mapping the “keys”). You are then asked to provide a formula in first-order predicate logic that is either satisfied or unsatisfied. Please return only the formula, written in the nltk python library’s format. 

Question: 

Here is a world model:  

Violet is Conservative. George is Conservative. Waverly is not Conservative. Maximiliano is Conservative. Deacon is Conservative. Ashlynn is Conservative. Cheyenne is not Conservative. Jackson is Conservative.

Here are the keys: 

F: Conservative. q: Violet. l: George. n: Waverly. j: Maximiliano. s: Deacon. d: Ashlynn. m: Cheyenne. p: Jackson.

Please write down only one formula in first-order predicate logic that is unsatisfied given the above world model and keys.

Answer:

In [None]:
import pandas as pd
import json

In [None]:
df = pd.read_json('base_pred_logic_data.json')

In [None]:
def make_task1_inputs(df):
    keys_list = df['Keys'].tolist()
    world_models = df['World Model'].tolist()
    sat_list = df['Satisfied'].tolist()
    #satisfied_list = ['satisfied' if x == "sati" else 'unsatisfied' for x in sat_list]

    context = "Context:\n\nYou will receive a problem in first-order predicate logic to solve. \
        This problem contains a set of statements about the world (let us call it a 'world model') \
        and a mapping from things in this world to variables representing these things \
        (we call that mapping the 'keys'). You are then asked to provide a formula in first-order predicate logic \
        that is either satisfied or unsatisfied given the world model and keys.\
        Please return only the formula, written in the format of the python library nltk.\n\n"
    instruction_p1 = "Question:\n\n Here is a world model:\n\n"
    instruction_p2 = "\n\nHere are the keys:\n\n"
    question_p1 = "\n\nPlease write down only one formula in first-order predicate logic that is " 
    question_p2 = " given the above world model and keys.\n\n"
    answer = "Answer:"

    prompts = []
    #sat_list = []
    for keys, model, sat in zip(keys_list, world_models, sat_list):
      #sat = random.choice(["satisfied", "unsatisfied"])
      prompt = context + instruction_p1 + model + instruction_p2 + keys + question_p1 + sat + question_p2 + answer
      prompts.append(prompt)
      #sat_list.append(sat)
    
    return prompts



def make_task2_inputs(df):
    formulas = df['Formula'].tolist()
    sat_list = df['Satisfied'].tolist()
    #satisfied_list = ['satisfied' if x == "satisfied" else 'unsatisfied' for x in sat_list]

    context = "Context:\n\nYou will receive a problem in first-order predicate logic to solve. \
      This problem contains a formula in first-order predicate logic, written in the format of the python library nltk \
      for which you are asked to create a 'world model' and 'keys' that either satisfy or do not satisfy the formula. \
      A world model is a set of statements about whether one or more predicates apply to things in this world.\
      Keys are a mapping from things to (lower case) variables and predicates to (upper case) variables.\n\n"
    instruction_p1 = "Question:\n\nConsider the following formula:"
    instruction_p2 = ". Provide a world model and keys such that the formula is "
    instruction_p3 = ". Please answer by returning the world model starting with 'World model:', followed by the keys starting with 'Keys:'." 
    answer = "\n\nAnswer:"

    prompts = []
    #sat_list = []
    for formula, sat in zip(formulas, sat_list):
      #sat = random.choice(["satisfied", "unsatisfied"])
      prompt = context + instruction_p1 + formula + instruction_p2 + sat + instruction_p3 + answer
      prompts.append(prompt)
      sat_list.append(sat) 
    
    return prompts



def make_task3_inputs(df):
    formulas = df['Formula'].tolist()
    keys_list = df['Keys'].tolist()
    world_models = df['World Model'].tolist()

    context = "Context:\n\nYou will receive a problem in first-order predicate logic to solve. \
      This problem contains a formula in first-order predicate logic, written in the format of the python library nltk \
      a set of statements about a world ('world model') and a mapping ('keys') from things in this world to variables \
      representing these things. Please return in one word whether the formula is satisfied or unsatisfied given the world model and keys.\n\n"
    instruction_p1 = "Question:\n\nConsider the following formula:"
    instruction_p2 = "\n\nHere is a world model:\n\n"
    instruction_p3 = "\n\nHere are the keys:\n\n"
    instruction_p4 = "Is the provided formula satisfied or unsatisfied given the world model and keys?"
    answer = "\n\nAnswer:"

    # TODO: Einfügen: \n + (A) Satisfied -> zufällig A und B auswählen (50/50)
    # \n + (B) Unsatisfied

    prompts = []
    unique_ids = [] # to be changed (and returned), if we make these datasets a different size than the original one that we map to.
    for i, (formula, keys, model) in enumerate(zip(formulas, keys_list, world_models)):
      prompt = context + instruction_p1 + formula + instruction_p2 + model + instruction_p3 + keys + instruction_p4 + answer
      prompts.append(prompt)
      unique_ids.append(i)
    
    return prompts

#### TASK 3

Input: Formula + Keys + World Model

Output: true / false (sat / unsat)

In [None]:
inputs = make_task3_inputs(df)
targets = df['Satisfied'].tolist()

d = {'Input':inputs,'Target':targets}
df_task3 = pd.DataFrame(d)
df_task3.to_json('Task3-dataset.json')

#### TASK 1

Input: World Model + Keys + Satisfied

Target: formula (but not necessarily the one from the original dataset, as multiple formulas are possible.)

In [None]:
def convert_valuation(valuation):
    # this is necessary, as jsonl can not serialize sets, so we convert them to lists here and back afterwards.
    return [(v[0], list(v[1])) if v[0].isupper() else v for v in valuation]

In [None]:
prompts = make_task1_inputs(df)
target_sats = df['Satisfied'].tolist()
# Object of type set is not JSON serializable, so we convert it to a list and convert back afterwards.
valuations = df["Valuation"].tolist()
print(valuations[0])
targets = df['Formula'].tolist()


d = {'Input':prompts,"Target-sat": target_sats,"Valuation": valuations,'Target':targets}
df_task1 = pd.DataFrame(d)

In [None]:
df["Valuation"].to_dict()

In [None]:
df_task1.iloc[0:19].to_json('Task1-20-datapoints.json')

In [None]:
df_task1.to_json('Task1-dataset.json')

#### TASK 2:

Input: Formula + Satisfied

Target: World Model + Keys

In [None]:
inputs = make_task2_inputs(df)
models = df['World Model'].tolist()
target_sats = df['Satisfied'].tolist()
formulas = df['Formula'].tolist()
keys = df['Keys'].tolist()
targets = [m + " " + k for m, k in zip(models, keys)]

d = {'Input':inputs,"Target-sat": target_sats, "Formulas": formulas, 'Target':targets}
df_task2 = pd.DataFrame(d)

In [None]:
df_task2

In [None]:
df_task2.to_json('Task2-dataset.json')

In [None]:
len(df_task2)