In [1]:
from src.teacher_query_tools import ANLITeacherQuerier, CQATeacherQuerier, ESNLITeacherQuerier, SVAMPTeacherQuerier

  from .autonotebook import tqdm as notebook_tqdm


## Teacher Querier

In [2]:
anliTQ = ANLITeacherQuerier()

In [3]:
anliTQ._query(split="train", idx=0, prompt_template_id=179, dont_save=True)


Premise: The Parma trolleybus system (Italian: "Rete filoviaria di Parma" ) forms part of the public transport network of the city and "comune" of Parma, in the region of Emilia-Romagna, northern Italy. In operation since 1953, the system presently comprises four urban routes.\nClaim: The trolleybus system has over 2 urban routes\n\nWhy is the hypothesis "entailment" towards the premise? Answer in a single sentence starting with: "The hypothesis is "entailment" towards the premise, because..."
RESPONSE:
The hypothesis is "entailment" towards the premise, because it directly supports and confirms the information provided in the premise that the Parma trolleybus system presently comprises four urban routes.


In [3]:
anliTQ._batch_query(split="train", idxs=list(range(10)), prompt_template_id=179, force_query=False)

QUERYING EXAMPLE 10/10 (9)...
Batch Query completed! (Skipped 0 queries as they were already queried and stored.)
Total Prompt Tokens: 1438
Total Completion Tokens: 390
Total Costs: $0.002937


(1438, 390, 0.002937)

In [7]:
svampTQ = SVAMPTeacherQuerier()

In [8]:
svampTQ._batch_query(split="train", idxs=list(range(41)), prompt_template_id=1, force_query=False)

Batch Query completed! (Skipped 30 queries as they were already queried and stored.)
Total Prompt Tokens: 566
Total Completion Tokens: 635
Total Costs: $0.002119


In [24]:
esnli = ESNLITeacherQuerier()

In [26]:
esnli._batch_query(split="train", idxs=[0, 34191], prompt_template_id=1, force_query=False, dont_save=True)

Batch Query completed! (Skipped 2 queries as they were already queried and stored.)
Total Prompt Tokens: 0
Total Completion Tokens: 0
Total Costs: $0


In [11]:
cqaTQ = CQATeacherQuerier()

In [12]:
cqaTQ._batch_query(split="train", idxs=[0, 1, 2, 100, 200], prompt_template_id=2, force_query=False)

Batch Query completed! (Skipped 3 queries as they were already queried and stored.)
Total Prompt Tokens: 148
Total Completion Tokens: 58
Total Costs: $0.000338


## Parser

In [4]:
from src.teacher_response_parser import SVAMPTeacherResponseParser, ESNLITeacherResponseParser, CQATeacherResponseParser, ANLITeacherResponseParser
import re

In [5]:
parser = ANLITeacherResponseParser()

In [22]:
pattern = re.compile("(balls|a\)|b\)|c\)|d\)|e\)|1\)|2\)|3\)|4\)|5\)|1\.|2\.|3\.|4\.|5\.).?(.*)", re.IGNORECASE|re.DOTALL)
parser.parse_response("The correct answer is a) fire because coal is commonly used as a fuel source to ignite and sustain fires.", pattern=pattern, prompt_values={"choice_a": "lol"})

('lol',
 'fire because coal is commonly used as a fuel source to ignite and sustain fires.')

In [60]:
pattern = re.compile("(.+\\b)(\d+)")
match = pattern.search("In each row, there are 30 crayons, so in 7 rows there are 7 * 30 <<7*30=210>>210 crayons. Answer: \\boxed{210}.")

In [9]:
parser.parse_response_batch(split="train", prompt_template_id=179, verbose= True)

{0: ('entailment',
  'it directly supports and confirms the statement that the trolleybus system in Parma presently comprises four urban routes, which is consistent with the claim that it has over 2 urban routes.'),
 1: ('neutral',
  'it does not make any claims or express any opinions about the popularity of the character Sharron Macready in the 1980s.'),
 2: ('neutral',
  "it does not make any assumptions or provide any evidence either supporting or refuting the claim that Bastedo didn't keep any pets due to her views on animal rights."),
 3: ('neutral',
  'it does not make any specific claims about whether Alexandra Bastedo was named by her mother or not.'),
 4: ('neutral',
  "it does not provide any evidence or support either confirming or refuting Bastedo's care for all animals."),
 5: ('neutral',
  'it does not make a definitive statement about whether Bastedo never ate meat in her life.'),
 6: ('contradiction',
  'the premise states that Jesse James was a guerrilla leader for th

In [61]:
match.groups()

('In each row, there are 30 crayons, so in 7 rows there are 7 * 30 <<7*30=210>>210 crayons. Answer: \\boxed{',
 '210')

In [6]:
parser.parse_response_batch(split="train", prompt_template_id=3)

{0: ('42', None),
 1: ('210', None),
 2: ('34', 'Julia played with a total of'),
 3: ('6', None),
 4: ('7', None),
 5: ('1088', 'The mailman should give a total of'),
 6: ('0.5', 'Each person ate 0.5 crackers and'),
 7: ('280', None),
 8: ('5', None),
 9: ('13', None),
 10: ('12', None),
 11: ('150', None),
 12: ('60', None),
 13: ('69', None),
 14: ('9', None),
 15: ('204', 'The farmer picked a total of'),
 16: ('180', 'The machine made a total of'),
 17: ('18', None),
 18: ('18', 'The number of birds sitting on the fence is'),
 19: ('8', None),
 20: ('2', None),
 21: ('60', 'The depth of the water was'),
 22: ('66', 'The grasshopper and the frog jumped a total of'),
 23: ('89', None),
 24: ('130', None),
 25: ('2', None),
 26: ('32', None),
 27: ('154', None),
 28: ('345', None),
 29: ('1', None),
 696: ('2', None),
 667: ('4', None),
 63: ('12', None),
 533: ('26', None),
 66: ('3', None),
 621: ('51', None),
 346: ('7', None),
 490: ('238', None),
 760: ('4', None),
 456: ('1', Non

In [10]:

s = "Tell me a {adjective} joke about {content}."

s.format(**{"adjective": "funny", "content": "chickens", "mashall": "mashall"})
#(**{"adjective": "funny", "content": "chickens", "mashall": "mashall"})

'Tell me a funny joke about chickens.'

## Utils

In [1]:
import utils

In [2]:
c1 = utils.Metadata()
c2 = utils.OtherMeta()

In [3]:
c2.static_thing

'another static'

## Evaluator

In [2]:
from src.teacher_response_evaluator import TeacherResponseEvaluator

In [5]:
evaluator = TeacherResponseEvaluator("esnli")

In [9]:
evaluator.evaluate_responses_split(split = "train", prompt_template_id = 2)

{'accuracy': 0.76,
 'n_correct': 38,
 'n_wrong': 12,
 'n_parse_errors': 0,
 'n_none_responses': 0,
 'total_responses': 50,
 'total_length_of_explanations': 7145}

In [4]:
evaluator.evaluate_train()

Could not parse '8 customers left than those that stayed behind.' with pattern 're.compile('(.+[\\s{\\$â‚¬])(\\d*\\.?\\d+)', re.IGNORECASE|re.DOTALL)'
Could not parse '137 more students suggested mashed potatoes than those that suggested bacon.' with pattern 're.compile('(.+[\\s\\{\\$â‚¬])(\\d*\\.?\\d+)', re.IGNORECASE|re.DOTALL)'
Could not parse '34 bird families were living near the mountain at the start.' with pattern 're.compile('(.+[\\s\\{\\$â‚¬])(\\d*\\.?\\d+)', re.IGNORECASE|re.DOTALL)'
Could not parse '65 more students suggested mashed potatoes than those that suggested tomatoes.' with pattern 're.compile('(.+[\\s\\{\\$â‚¬])(\\d*\\.?\\d+)', re.IGNORECASE|re.DOTALL)'
Could not parse '30 campers went rowing and hiking in all.' with pattern 're.compile('(.+[\\s\\{\\$â‚¬])(\\d*\\.?\\d+)', re.IGNORECASE|re.DOTALL)'
Best prompt template for svamp is 2, with accuracy: 0.7936507936507936


'2'

## Dataset Writer

In [1]:
from teacher_writer import TeacherWriter
tw = TeacherWriter("svamp")

In [19]:
mix = {"label": {1: range(10)}, "explanation": {2: range(10)}}

In [20]:
tw.write_teacher_responses(split="train", prompt_template_id_mix=mix)

# Plain Langchain

In [3]:
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

True

In [13]:
chat_model = ChatOpenAI(model = "gpt-3.5-turbo", temperature=1.1, max_tokens=200, request_timeout=10)

chat_model.predict("Hi, how are you today?")

"Hello! As an AI, I don't have emotions, but I'm here to help you. How can I assist you today?"

## Downsample ESNLI

In [1]:
import importlib
dsbs_du = importlib.import_module("distilling-step-by-step.data_utils")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataloader = dsbs_du.ESNLIDatasetLoader(subset="small")
datasets = dataloader.load_from_json()

In [3]:
datasets["train"] = datasets["train"].train_test_split(test_size=0.6909)["train"]

In [4]:
datasets["test"] = datasets["test"].train_test_split(test_size=0.8982)["train"]

In [5]:
datasets["valid"] = datasets["valid"].train_test_split(test_size=0.89839)["train"]

In [9]:
import json
to_write = ""
with open(f"esnli_valid.json", "w") as f:
    for line in datasets["valid"]:
        json.dump(line, f)
        f.write("\n")

In [None]:
with open(f"{write_location}/{split}_CoT.json", "w") as f:
    for line in to_write:
        json.dump(line, f)
        f.write("\n")