In [57]:
import pandas as pd
from tqdm import tqdm
from IPython.display import display 
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain


train = [('Who produced the album that included a re-recording of "Lithium"?', ['Butch Vig']),
         ('Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?', ['Kevin Greutert']),
         ('The heir to the Du Pont family fortune sponsored what wrestling team?', ['Foxcatcher', 'Team Foxcatcher', 'Foxcatcher Team']),
         ('In what year was the star of To Hell and Back born?', ['1925']),
         ('Which award did the first book of Gary Zukav receive?', ['U.S. National Book Award', 'National Book Award']),
         ('What city was the victim of Joseph Druces working in?', ['Boston, Massachusetts', 'Boston']),]


dev = [('Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?', ['E. L. Doctorow', 'E.L. Doctorow', 'Doctorow']),
       ('What documentary about the Gilgo Beach Killer debuted on A&E?', ['The Killing Season']),
       ('Right Back At It Again contains lyrics co-written by the singer born in what city?', ['Gainesville, Florida', 'Gainesville']),
       ('What year was the party of the winner of the 1971 San Francisco mayoral election founded?', ['1828']),
       ('Which author is English: John Braine or Studs Terkel?', ['John Braine']),
       ('Anthony Dirrell is the brother of which super middleweight title holder?', ['Andre Dirrell']),
       ('In which city is the sports nutrition business established by Oliver Cookson based ?', ['Cheshire', 'Cheshire, UK']),
       ('Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.', ['February 13, 1980']),
       ('Kyle Moran was born in the town on what river?', ['Castletown', 'Castletown River']),
       ("What is the name of one branch of Robert D. Braun's speciality?", ['aeronautical engineering', 'astronautical engineering', 'aeronautics', 'astronautics']),
       ("Where was the actress who played the niece in the Priest film born?", ['Surrey', 'Guildford, Surrey']),
       ('Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.', ['Portrait of a Marriage']),
       ('What year was the father of the Princes in the Tower born?', ['1442'])]


# Copy the train array using a dict with question and aswer as keys
train = [{'question': q, 'answer': a[0]} for q, a in train]
dev = [{'question': q, 'answers': a} for q, a in dev]

# 1 - Vanilla GPT-3.5

In [56]:
llm = OpenAI(temperature=0.9)

example_question_answer_template = """
Question: {question}
Answer: {answer}
"""
example_prompt = PromptTemplate(
    input_variables=['question', 'answer'],
    template=example_question_answer_template,
)

prefix = """
Answer questions with short factoid answers.

---

Follow the following format.

Question: $[the question to be answered]
Answer: $[a short factoid answer, often between 1 and 5 words]

---
"""

few_shot_prompt = FewShotPromptTemplate(
    examples=train,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix="\nQuestion: {question}\nAnswer:",
    input_variables=['question'],
    example_separator=''
)

chain = LLMChain(llm=llm, prompt=few_shot_prompt)

In [62]:
def metric(prediction, answers):
    return prediction in answers


def evaluate(chain, dev):
    data = []

    for example in tqdm(dev):
        prediction = chain.run(question=example['question'])
        prediction = prediction.strip() 

        d = dict(example)
        d['prediction'] = prediction
        d['correct'] = prediction in example['answers']

        data.append(d)

    df = pd.DataFrame(data)
    correct = df['correct'].sum()
    print(f'Correct: {correct} / {len(df)}')

    pd.options.display.max_colwidth = None
    
    df['correct'] = df['correct'].apply(lambda x: '✅' if x else '❌')
    display(df)

In [65]:
evaluate(chain, dev)

100%|██████████| 13/13 [00:09<00:00,  1.36it/s]

Correct: 3 / 13





Unnamed: 0,question,answers,prediction,correct
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,"[E. L. Doctorow, E.L. Doctorow, Doctorow]",E. L. Doctorow,✅
1,What documentary about the Gilgo Beach Killer debuted on A&E?,[The Killing Season],The Long Island Serial Killer,❌
2,Right Back At It Again contains lyrics co-written by the singer born in what city?,"[Gainesville, Florida, Gainesville]","Grand Rapids, Michigan",❌
3,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,[1828],1965,❌
4,Which author is English: John Braine or Studs Terkel?,[John Braine],John Braine,✅
5,Anthony Dirrell is the brother of which super middleweight title holder?,[Andre Dirrell],Andre Dirrell,✅
6,In which city is the sports nutrition business established by Oliver Cookson based ?,"[Cheshire, Cheshire, UK]","London, UK",❌
7,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"[February 13, 1980]","August 22, 1974",❌
8,Kyle Moran was born in the town on what river?,"[Castletown, Castletown River]",St. Lawrence,❌
9,What is the name of one branch of Robert D. Braun's speciality?,"[aeronautical engineering, astronautical engineering, aeronautics, astronautics]",Aerospace engineering,❌


# 2 - Retrieve then read