In [1]:
import os
import pathlib
import torch
import openai
import datasets
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
from eval.args import RunnerArguments, HFArguments, OAIArguments, GenerationArguments
from eval.evaluator import HFEvaluator, OAIEvaluator
from eval.tasks import ALL_TASKS, get_task

from dotenv import load_dotenv
load_dotenv()

transformers.logging.set_verbosity_error()
datasets.logging.set_verbosity_error()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
task = get_task('proofwriter-neurosymbolic-2shot')

In [3]:
print(task.get_prompt(task.get_dataset()[0]))

The following is a first-order logic (FOL) problem.
The problem is to determine whether the conclusion follows from the premises.
The premises are given in the form of a set of first-order logic sentences.
The conclusion is given in the form of a single first-order logic sentence.
The task is to translate each of the premises and conclusions into FOL expressions, so that the expressions can be evaluated by a theorem solver to determine whether the conclusion follows from the premises.
Expressions should be adhere to the format of the Python NLTK package logic module.


<PREMISES>
All dispensable things are environment-friendly.
All woodware is dispensable.
All paper is woodware.
No good things are bad.
All environment-friendly things are good.
A worksheet is either paper or is environment-friendly.
</PREMISES>
<CONCLUSION>
A worksheet is not dispensable.
</CONCLUSION>
<EVALUATE>
TEXT:	All dispensable things are environment-friendly.
FOL:	all x. (Dispensable(x) -> EnvironmentFriendly(x)

In [4]:
def get_args():
    runner_args = RunnerArguments()
    hf_args = HFArguments()
    oai_args = OAIArguments()
    gen_args = GenerationArguments()
    args = HfArgumentParser([runner_args, hf_args, oai_args, gen_args]).parse_args("")

    args.output_dir = pathlib.Path(os.getcwd()).parent / args.output_dir
    args.save_generations_raw_path = args.output_dir / args.save_generations_raw_path
    args.save_generations_prc_path = args.output_dir / args.save_generations_prc_path
    args.save_references_path = args.output_dir / args.save_references_path
    args.save_results_path = args.output_dir / args.save_results_path
    args.save_generations_raw_path.parent.mkdir(parents=True, exist_ok=True)
    args.save_generations_prc_path.parent.mkdir(parents=True, exist_ok=True)
    args.save_references_path.parent.mkdir(parents=True, exist_ok=True)
    args.save_results_path.parent.mkdir(parents=True, exist_ok=True)

    args.openai_api_env_keys = ['OPENAI_API_KEY']
    args.model = 'gpt-3.5-turbo'
    return args

In [5]:
args = get_args()
task = get_task('proofwriter-neurosymbolic-2shot')
evaluator = OAIEvaluator(args, chat=True)
dataset = task.get_dataset()

In [6]:
print(args.chat_system_instruction)

You are a helpful assistant that carefully follows instructions. You should complete the user text, continuing from the example format, rather than providing a conversational response.


In [32]:
response = evaluator.get_completion(
    prompt=task.get_prompt(dataset[0]),
    stop=task.stop_words
)

The following is a first-order logic (FOL) problem.
The problem is to determine whether the conclusion follows from the premises.
The premises are given in the form of a set of first-order logic sentences.
The conclusion is given in the form of a single first-order logic sentence.
The task is to translate each of the premises and conclusions into FOL expressions, so that the expressions can be evaluated by a theorem solver to determine whether the conclusion follows from the premises.
Expressions should be adhere to the format of the Python NLTK package logic module.


<PREMISES>
All dispensable things are environment-friendly.
All woodware is dispensable.
All paper is woodware.
No good things are bad.
All environment-friendly things are good.
A worksheet is either paper or is environment-friendly.
</PREMISES>
<CONCLUSION>
A worksheet is not dispensable.
</CONCLUSION>
<EVALUATE>
TEXT:	All dispensable things are environment-friendly.
FOL:	all x. (Dispensable(x) -> EnvironmentFriendly(x)