In [3]:
import pandas as pd
from tqdm import tqdm
from IPython.display import display 
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain

from utils import evaluate, ColBERTv2


train = [('Who produced the album that included a re-recording of "Lithium"?', ['Butch Vig']),
         ('Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?', ['Kevin Greutert']),
         ('The heir to the Du Pont family fortune sponsored what wrestling team?', ['Foxcatcher', 'Team Foxcatcher', 'Foxcatcher Team']),
         ('In what year was the star of To Hell and Back born?', ['1925']),
         ('Which award did the first book of Gary Zukav receive?', ['U.S. National Book Award', 'National Book Award']),
         ('What city was the victim of Joseph Druces working in?', ['Boston, Massachusetts', 'Boston']),]

dev = [('Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?', ['E. L. Doctorow', 'E.L. Doctorow', 'Doctorow']),
       ('What documentary about the Gilgo Beach Killer debuted on A&E?', ['The Killing Season']),
       ('Right Back At It Again contains lyrics co-written by the singer born in what city?', ['Gainesville, Florida', 'Gainesville']),
       ('What year was the party of the winner of the 1971 San Francisco mayoral election founded?', ['1828']),
       ('Which author is English: John Braine or Studs Terkel?', ['John Braine']),
       ('Anthony Dirrell is the brother of which super middleweight title holder?', ['Andre Dirrell']),
       ('In which city is the sports nutrition business established by Oliver Cookson based ?', ['Cheshire', 'Cheshire, UK']),
       ('Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.', ['February 13, 1980']),
       ('Kyle Moran was born in the town on what river?', ['Castletown', 'Castletown River']),
       ("What is the name of one branch of Robert D. Braun's speciality?", ['aeronautical engineering', 'astronautical engineering', 'aeronautics', 'astronautics']),
       ("Where was the actress who played the niece in the Priest film born?", ['Surrey', 'Guildford, Surrey']),
       ('Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.', ['Portrait of a Marriage']),
       ('What year was the father of the Princes in the Tower born?', ['1442'])]


train = [{'question': q, 'answer': a[0]} for q, a in train]
dev = [{'question': q, 'answers': a} for q, a in dev]

In [4]:
llm = OpenAI(temperature=0.9)
rm = ColBERTv2('http://ec2-44-228-128-229.us-west-2.compute.amazonaws.com:8893/api/search')

# 1 - Vanilla GPT-3.5

In [16]:
example_question_answer_template = """
Question: {question}
Answer: {answer}
"""
example_prompt = PromptTemplate(
    input_variables=['question', 'answer'],
    template=example_question_answer_template,
)

prefix = """
Answer questions with short factoid answers.

---

Follow the following format.

Question: $[the question to be answered]
Answer: $[a short factoid answer, often between 1 and 5 words]

---
"""

few_shot_prompt = FewShotPromptTemplate(
    examples=train,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix="\nQuestion: {question}\nAnswer:",
    input_variables=['question'],
    example_separator=''
)

chain = LLMChain(llm=llm, prompt=few_shot_prompt)

def run_vanilla_chain(example):
    return chain.run(question=example['question'])

In [17]:
evaluate(run_vanilla_chain, dev)

100%|██████████| 13/13 [00:08<00:00,  1.55it/s]

Correct: 4 / 13





Unnamed: 0,question,answers,prediction,correct
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,"[E. L. Doctorow, E.L. Doctorow, Doctorow]",E. L. Doctorow,✅
1,What documentary about the Gilgo Beach Killer debuted on A&E?,[The Killing Season],The Killing Season,✅
2,Right Back At It Again contains lyrics co-written by the singer born in what city?,"[Gainesville, Florida, Gainesville]","Rochester, New York",❌
3,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,[1828],1967,❌
4,Which author is English: John Braine or Studs Terkel?,[John Braine],John Braine,✅
5,Anthony Dirrell is the brother of which super middleweight title holder?,[Andre Dirrell],Andre Dirrell,✅
6,In which city is the sports nutrition business established by Oliver Cookson based ?,"[Cheshire, Cheshire, UK]",New York City,❌
7,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"[February 13, 1980]","April 3, 1967",❌
8,Kyle Moran was born in the town on what river?,"[Castletown, Castletown River]",Raritan River,❌
9,What is the name of one branch of Robert D. Braun's speciality?,"[aeronautical engineering, astronautical engineering, aeronautics, astronautics]",Aerospace engineering,❌


# 2 - Retrieve then read

In [11]:
prefix = """
Answer questions with short factoid answers.
"""

suffix = """
---

Follow the following format.

Context:
$[sources that may contain relevant content]

Question: $[the question to be answered]

Answer: $[a short factoid answer, often between 1 and 5 words]

---

Context:
{context}

Question: {question}

Answer:
"""

retrieval_prompt = FewShotPromptTemplate(
    examples=train,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=['context', 'question'],
    example_separator=''
)

chain = LLMChain(llm=llm, prompt=retrieval_prompt)

def run_retrieval_chain(example):
    context = rm(example['question'])[0]
    return chain.run(context=context, question=example['question'])

In [14]:
evaluate(run_retrieval_chain, dev)

100%|██████████| 13/13 [00:13<00:00,  1.07s/it]

Correct: 5 / 13





Unnamed: 0,question,answers,prediction,correct
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,"[E. L. Doctorow, E.L. Doctorow, Doctorow]",E.L. Doctorow,✅
1,What documentary about the Gilgo Beach Killer debuted on A&E?,[The Killing Season],The Killing Season,✅
2,Right Back At It Again contains lyrics co-written by the singer born in what city?,"[Gainesville, Florida, Gainesville]","Orlando, Florida",❌
3,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,[1828],1936,❌
4,Which author is English: John Braine or Studs Terkel?,[John Braine],Studs Terkel,❌
5,Anthony Dirrell is the brother of which super middleweight title holder?,[Andre Dirrell],Andre Dirrell,✅
6,In which city is the sports nutrition business established by Oliver Cookson based ?,"[Cheshire, Cheshire, UK]","Manchester, UK",❌
7,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"[February 13, 1980]",5 August 1862,❌
8,Kyle Moran was born in the town on what river?,"[Castletown, Castletown River]",Liffey River,❌
9,What is the name of one branch of Robert D. Braun's speciality?,"[aeronautical engineering, astronautical engineering, aeronautics, astronautics]",Jet Propulsion,❌


# 3 - Retrieve then read with self-consistency

In [33]:
prefix = """
Answer questions with short factoid answers.
"""

suffix = """
---

Follow the following format.

Context:
$[sources that may contain relevant content]

Question: $[the question to be answered]

Rationale: Let's think step by step. $[a step-by-step deduction that identifies the correct response, which will be provided below]

Answer: $[a short factoid answer, often between 1 and 5 words]

---

Context:
{context}

Question: {question}

Rationale: Let's think step by step."""

rationale_prompt = FewShotPromptTemplate(
    examples=train,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=['context', 'question'],
    example_separator=''
)

chain = LLMChain(llm=llm, prompt=rationale_prompt)

def run_rationale_chain(example, n=3):
    context = rm(example['question'], k=5)
    context = '\n'.join([f'[{i+1}] {c}' for i, c in enumerate(context)])

    print(rationale_prompt.format(context=context, question=example['question']))

    # Self consistency
    answers = []
    for i in range(n):
        completion = chain.run(context=context, question=example['question'])
        answer = completion.split('\n')[-1].strip()
        answers.append(answer)

    

    return chain.run(context=context, question=example['question'])

print(run_rationale_chain(dev[2]))


Answer questions with short factoid answers.

Question: Who produced the album that included a re-recording of "Lithium"?
Answer: Butch Vig

Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?
Answer: Kevin Greutert

Question: The heir to the Du Pont family fortune sponsored what wrestling team?
Answer: Foxcatcher

Question: In what year was the star of To Hell and Back born?
Answer: 1925

Question: Which award did the first book of Gary Zukav receive?
Answer: U.S. National Book Award

Question: What city was the victim of Joseph Druces working in?
Answer: Boston, Massachusetts

---

Follow the following format.

Context:
$[sources that may contain relevant content]

Question: $[the question to be answered]

Rationale: Let's think step by step. $[a step-by-step deduction that identifies the correct response, which will be provided below]

Answer: $[a short factoid answer, often between 1 and 5 words]

---

Context:
[1] Right Back at It Again