In [1]:
import os
import pathlib
import torch
import openai
import datasets
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
from eval.args import RunnerArguments, HFArguments, OAIArguments, GenerationArguments
from eval.evaluator import HFEvaluator, OAIEvaluator
from eval.tasks import ALL_TASKS, get_task

from dotenv import load_dotenv
load_dotenv()

transformers.logging.set_verbosity_error()
datasets.logging.set_verbosity_error()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
task = get_task('proofwriter-neurosymbolic-2shot')

In [3]:
print(task.get_prompt(task.get_dataset()[0]))

The following is a first-order logic (FOL) problem.
The problem is to determine whether the conclusion follows from the premises.
The premises are given in the form of a set of first-order logic sentences.
The conclusion is given in the form of a single first-order logic sentence.
The task is to translate each of the premises and conclusions into FOL expressions, so that the expressions can be evaluated by a theorem solver to determine whether the conclusion follows from the premises.
Expressions should be adhere to the format of the Python NLTK package logic module.


<PREMISES>
All dispensable things are environment-friendly.
All woodware is dispensable.
All paper is woodware.
No good things are bad.
All environment-friendly things are good.
A worksheet is either paper or is environment-friendly.
</PREMISES>
<CONCLUSION>
A worksheet is not dispensable.
</CONCLUSION>
<EVALUATE>
TEXT:	All dispensable things are environment-friendly.
FOL:	all x. (Dispensable(x) -> EnvironmentFriendly(x)

In [26]:
def get_args():
    runner_args = RunnerArguments()
    hf_args = HFArguments()
    oai_args = OAIArguments()
    gen_args = GenerationArguments()
    args = HfArgumentParser([runner_args, hf_args, oai_args, gen_args]).parse_args("")

    args.output_dir = pathlib.Path(os.getcwd()).parent / args.output_dir
    args.save_generations_raw_path = args.output_dir / args.save_generations_raw_path
    args.save_generations_prc_path = args.output_dir / args.save_generations_prc_path
    args.save_references_path = args.output_dir / args.save_references_path
    args.save_results_path = args.output_dir / args.save_results_path
    args.save_generations_raw_path.parent.mkdir(parents=True, exist_ok=True)
    args.save_generations_prc_path.parent.mkdir(parents=True, exist_ok=True)
    args.save_references_path.parent.mkdir(parents=True, exist_ok=True)
    args.save_results_path.parent.mkdir(parents=True, exist_ok=True)
    args.max_length_generation = 4096

    args.openai_api_env_keys = ['OPENAI_API_KEY']
    args.model = 'gpt-3.5-turbo'
    return args

In [27]:
args = get_args()
task = get_task('proofwriter-neurosymbolic-2shot')
evaluator = OAIEvaluator(args, chat=True)
dataset = task.get_dataset()

In [28]:
print(args.chat_system_instruction)

You are a helpful assistant that carefully follows instructions. You should complete the user text, continuing from the example format, rather than providing a conversational response.


In [29]:
response = evaluator.get_completion(
    prompt=task.get_prompt(dataset[0]),
    stop=task.stop_words
)

InvalidRequestError: This model's maximum context length is 4097 tokens. However, you requested 5161 tokens (1065 in the messages, 4096 in the completion). Please reduce the length of the messages or completion.

### Manually checking the improvement of technique

In [39]:
task = get_task('proofwriter-neurosymbolic-2shot')

idx = 0

doc = task.get_dataset()[idx]

print("doc is ", doc)

doc is  {'id': 'RelNoneg-OWA-D5-651', 'QDep': 0, 'premises': ['The bear eats the dog.', 'The bear eats the rabbit.', 'The bear is round.', 'The bear needs the rabbit.', 'The bear needs the tiger.', 'The bear sees the rabbit.', 'The dog eats the bear.', 'The rabbit eats the tiger.', 'The rabbit is round.', 'The tiger eats the bear.', 'The tiger is cold.', 'The tiger is young.', 'If something eats the tiger then the tiger needs the dog.', 'If the bear sees the dog and the dog eats the tiger then the dog eats the bear.', 'If something eats the bear then it sees the tiger.', 'If the tiger sees the rabbit then the rabbit eats the bear.', 'If something eats the bear and the bear sees the rabbit then it sees the dog.', 'If something sees the dog then it sees the rabbit.'], 'conclusion': 'The tiger eats the dog.', 'label': 'Uncertain'}


In [40]:
previous_prompt = task.get_prompt(doc)
print(previous_prompt)

The following is a first-order logic (FOL) problem.
The problem is to determine whether the conclusion follows from the premises.
The premises are given in the form of a set of first-order logic sentences.
The conclusion is given in the form of a single first-order logic sentence.
The task is to translate each of the premises and conclusions into FOL expressions, so that the expressions can be evaluated by a theorem solver to determine whether the conclusion follows from the premises.
Expressions should be adhere to the format of the Python NLTK package logic module.


<PREMISES>
All dispensable things are environment-friendly.
All woodware is dispensable.
All paper is woodware.
No good things are bad.
All environment-friendly things are good.
A worksheet is either paper or is environment-friendly.
</PREMISES>
<CONCLUSION>
A worksheet is not dispensable.
</CONCLUSION>
<EVALUATE>
TEXT:	All dispensable things are environment-friendly.
FOL:	all x. (Dispensable(x) -> EnvironmentFriendly(x)

In [41]:
previous_generation_raw = f"""
TEXT:	The bear eats the dog.
FOL:	Eats(Bear, Dog)
TEXT:	The bear eats the rabbit.
FOL:	Eats(Bear, Rabbit)
TEXT:	The bear is round.
FOL:	Round(Bear)
TEXT:	The bear needs the rabbit.
FOL:	Needs(Bear, Rabbit)
TEXT:	The bear needs the tiger.
FOL:	Needs(Bear, Tiger)
TEXT:	The bear sees the rabbit.
FOL:	Sees(Bear, Rabbit)
TEXT:	The dog eats the bear.
FOL:	Eats(Dog, Bear)
TEXT:	The rabbit eats the tiger.
FOL:	Eats(Rabbit, Tiger)
TEXT:	The rabbit is round.
FOL:	Round(Rabbit)
TEXT:	The tiger eats the bear.
FOL:	Eats(Tiger, Bear)
TEXT:	The tiger is cold.
FOL:	Cold(Tiger)
TEXT:	The tiger is young.
FOL:	Young(Tiger)
TEXT:	If something eats the tiger then the tiger needs the dog.
FOL:	all x. (Eats(x, Tiger) -> Needs(Tiger, Dog))
TEXT:	If the bear sees the dog and the dog eats the tiger then the dog eats the bear.
FOL:	all x. all y. (Sees(Bear, Dog) & Eats(Dog, Tiger) -> Eats(Dog, Bear))
TEXT:	If something eats the bear then it sees the tiger.
FOL:	all x. (Eats(x, Bear) -> Sees(x, Tiger))
TEXT:	If the tiger sees the rabbit then the rabbit eats the bear.
FOL:	all x. (Sees(Tiger, Rabbit) -> Eats(Rabbit, Bear))
TEXT:	If something eats the bear and the bear sees the rabbit then it sees the dog.
FOL:	all x. (Eats(x, Bear) & Sees(Bear, Rabbit) -> Sees(x, Dog))
TEXT:	If something sees the dog then it sees the rabbit.
FOL:	all x. (Sees(x, Dog) -> Sees(x, Rabbit))
TEXT:	The tiger eats the dog.
FOL:	Eats(Tiger, Dog)
"""

new_generation_raw = previous_generation_raw + f"""
Premise: The bear is an animal.
FOL: Animal(Bear)
Premise: The dog is an animal.
FOL: Animal(Dog)

Premise: The rabbit is an animal.
FOL: Animal(Rabbit)

Premise: The tiger is an animal.
FOL: Animal(Tiger)

Premise: Animals can eat other animals.
FOL: all x. all y. (Animal(x) & Animal(y) -> CanEat(x, y))

Premise: Seeing an animal implies the existence of that animal.
FOL: all x. all y. (Sees(x, y) & Animal(y) -> Exists(y))

Premise: Needing something implies its existence.
FOL: all x. all y. (Needs(x, y) -> Exists(y))

Premise: If an animal eats another, the eaten animal exists.
FOL: all x. all y. (Eats(x, y) & Animal(y) -> Exists(y))

Premise: Being round is a physical attribute.
FOL: all x. (Round(x) -> HasPhysicalAttribute(x))

Premise: Being cold is a physical state.
FOL: all x. (Cold(x) -> HasPhysicalState(x))

Premise: Being young is a temporal state.
FOL: all x. (Young(x) -> HasTemporalState(x))
"""
print(new_generation_raw)


TEXT:	The bear eats the dog.
FOL:	Eats(Bear, Dog)
TEXT:	The bear eats the rabbit.
FOL:	Eats(Bear, Rabbit)
TEXT:	The bear is round.
FOL:	Round(Bear)
TEXT:	The bear needs the rabbit.
FOL:	Needs(Bear, Rabbit)
TEXT:	The bear needs the tiger.
FOL:	Needs(Bear, Tiger)
TEXT:	The bear sees the rabbit.
FOL:	Sees(Bear, Rabbit)
TEXT:	The dog eats the bear.
FOL:	Eats(Dog, Bear)
TEXT:	The rabbit eats the tiger.
FOL:	Eats(Rabbit, Tiger)
TEXT:	The rabbit is round.
FOL:	Round(Rabbit)
TEXT:	The tiger eats the bear.
FOL:	Eats(Tiger, Bear)
TEXT:	The tiger is cold.
FOL:	Cold(Tiger)
TEXT:	The tiger is young.
FOL:	Young(Tiger)
TEXT:	If something eats the tiger then the tiger needs the dog.
FOL:	all x. (Eats(x, Tiger) -> Needs(Tiger, Dog))
TEXT:	If the bear sees the dog and the dog eats the tiger then the dog eats the bear.
FOL:	all x. all y. (Sees(Bear, Dog) & Eats(Dog, Tiger) -> Eats(Dog, Bear))
TEXT:	If something eats the bear then it sees the tiger.
FOL:	all x. (Eats(x, Bear) -> Sees(x, Tiger))
TEXT:	If 

In [50]:
print("reference: ", task.get_reference(doc))
print("without context: ", task.postprocess_generation(previous_generation_raw, idx, completion_only=True))
print("with context: ", task.postprocess_generation(new_generation_raw, idx, completion_only=True))

reference:  Uncertain
without context:  Uncertain
with context:  Uncertain


In [43]:
args.max_length_generation

4096

In [45]:
print(new_generation_raw)


TEXT:	The bear eats the dog.
FOL:	Eats(Bear, Dog)
TEXT:	The bear eats the rabbit.
FOL:	Eats(Bear, Rabbit)
TEXT:	The bear is round.
FOL:	Round(Bear)
TEXT:	The bear needs the rabbit.
FOL:	Needs(Bear, Rabbit)
TEXT:	The bear needs the tiger.
FOL:	Needs(Bear, Tiger)
TEXT:	The bear sees the rabbit.
FOL:	Sees(Bear, Rabbit)
TEXT:	The dog eats the bear.
FOL:	Eats(Dog, Bear)
TEXT:	The rabbit eats the tiger.
FOL:	Eats(Rabbit, Tiger)
TEXT:	The rabbit is round.
FOL:	Round(Rabbit)
TEXT:	The tiger eats the bear.
FOL:	Eats(Tiger, Bear)
TEXT:	The tiger is cold.
FOL:	Cold(Tiger)
TEXT:	The tiger is young.
FOL:	Young(Tiger)
TEXT:	If something eats the tiger then the tiger needs the dog.
FOL:	all x. (Eats(x, Tiger) -> Needs(Tiger, Dog))
TEXT:	If the bear sees the dog and the dog eats the tiger then the dog eats the bear.
FOL:	all x. all y. (Sees(Bear, Dog) & Eats(Dog, Tiger) -> Eats(Dog, Bear))
TEXT:	If something eats the bear then it sees the tiger.
FOL:	all x. (Eats(x, Bear) -> Sees(x, Tiger))
TEXT:	If 