In [1]:
import collections
from contextlib import nullcontext
from collections import namedtuple
from datasets import load_dataset
import json
import numpy as np
import random
import re 
import string
import torch
from typing import List

In [819]:
import copy
import pandas as pd
import os
import time

pd.set_option('display.max_rows', 100)

In [801]:
def normalize_answer(s: str) -> str:
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
        return re.sub(regex, ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def get_tokens(s: str) -> List[str]:
    """Normalize string and split string into tokens."""
    if not s:
        return []
    return normalize_answer(s).split()


def compute_exact(a_gold: str, a_pred: str) -> int:
    """Compute the Exact Match score."""
    return int(normalize_answer(a_gold) == normalize_answer(a_pred))


def compute_f1_from_tokens(gold_toks: List[str], pred_toks: List[str]) -> float:
    """Compute the F1 score from tokenized gold answer and prediction."""
    common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
    num_same = sum(common.values())

    if len(gold_toks) == 0 or len(pred_toks) == 0:
        # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
        return int(gold_toks == pred_toks)

    if num_same == 0:
        return 0

    precision = 1.0 * num_same / len(pred_toks)
    recall = 1.0 * num_same / len(gold_toks)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def compute_f1(a_gold: str, a_pred: str) -> float:
    """Compute the F1 score."""
    gold_toks = get_tokens(a_gold)
    pred_toks = get_tokens(a_pred)
    return compute_f1_from_tokens(gold_toks, pred_toks)

In [798]:
def evaluate(df):
    for i, ex in df.iterrows():
        answers = ex.answers
        pred = ex.generated_answer
        # The result is the highest EM from the available answer strings:
        df.loc[i, 'em'] = max([compute_exact(ans, pred) for ans in answers])
        df.loc[i, 'f1'] = max([compute_f1(ans, pred) for ans in answers])

    return {"macro_f1": df['f1'].mean(), "em_per": df['em'].mean()}

In [815]:
seed = 123456

np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

os.environ["CUDA_VISIBLE_DEVICES"]=""

In [4]:
import openai
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

transformers.logging.set_verbosity_error()

In [5]:
def _find_generated_answer(tokens, newline="\n" ): 
    """Our LMs tend to insert initial newline characters before
    they begin generating text. This function ensures that we 
    properly capture the true first line as the answer while
    also ensuring that token probabilities are aligned."""        
    answer_token_indices = []
    char_seen = False            
    for i, tok in enumerate(tokens):
        # This is the main condition: a newline that isn't an initial
        # string of newlines:
        if tok == newline and char_seen:
            break
        # Keep the initial newlines for consistency:
        elif tok == newline and not char_seen:
            answer_token_indices.append(i)
        # Proper tokens:
        elif tok != newline:
            char_seen = True
            answer_token_indices.append(i)
    return answer_token_indices 

# SQuAD

In [7]:
squad = load_dataset("squad")
SquadExample = namedtuple("SquadExample",  "id title context question answers")

def get_squad_split(squad, split="validation"):
    """
    Use `split='train'` for the train split.
    
    Returns
    -------
    list of SquadExample named tuples with attributes
    id, title, context, question, answers
    
    """    
    fields = squad[split].features
    data = zip(*[squad[split][field] for field in fields])
    return [SquadExample(eid, title, context, question, answers["text"]) 
            for eid, title, context, question, answers in data]

Reusing dataset squad (/Users/lara.thompson/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


  0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
squad_train = get_squad_split(squad, "train")

SquadExample(id='5733be284776f41900661182', title='University_of_Notre_Dame', context='Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', question='To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', answers=['Saint Bernadette Soubirous'])

In [562]:
np.random.randint(len(squad_train))

30975

In [563]:
squad_train[np.random.randint(len(squad_train))]

SquadExample(id='573018a5947a6a140053d0dc', title='Liberia', context='In 1822, the American Colonization Society began sending African-American volunteers to the Pepper Coast to establish a colony for freed African Americans. By 1867, the ACS (and state-related chapters) had assisted in the migration of more than 13,000 African Americans to Liberia. These free African Americans and their descendants married within their community and came to identify as Americo-Liberians. Many were of mixed race and educated in American culture; they did not identify with the indigenous natives of the tribes they encountered. They intermarried largely within the colonial community, developing an ethnic group that had a cultural tradition infused with American notions of political republicanism and Protestant Christianity.', question='What did African Americans who were moved to Liberia identify as? ', answers=['Americo-Liberians'])

In [9]:
squad_dev = get_squad_split(squad)
squad_dev[0]

SquadExample(id='56be4db0acb8001400a502ec', title='Super_Bowl_50', context='Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levi\'s Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the "golden anniversary" with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as "Super Bowl L"), so that the logo could prominently feature the Arabic numerals 50.', question='Which NFL team represented the AFC at Super Bowl 50?', answers=['Denver Broncos', 'Denver Broncos', 'Denver Broncos'])

In [10]:
dev_exs = sorted(squad_dev, key=lambda x: hash(x.id))[: 200]
dev_exs[0]

SquadExample(id='56beb86b3aeaaa14008c92c0', title='Super_Bowl_50', context="Peyton Manning became the first quarterback ever to lead two different teams to multiple Super Bowls. He is also the oldest quarterback ever to play in a Super Bowl at age 39. The past record was held by John Elway, who led the Broncos to victory in Super Bowl XXXIII at age 38 and is currently Denver's Executive Vice President of Football Operations and General Manager.", question='What role does John Elway currently have in the Broncos franchise?', answers=['Executive Vice President of Football Operations and General Manager', 'Executive Vice President of Football Operations and General Manager', 'Executive Vice President of Football Operations and General Manager.', 'Executive Vice President of Football Operations and General Manager.'])

In [235]:
dev_exs[2]

SquadExample(id='572a1046af94a219006aa78e', title='Economic_inequality', context='Economist Joseph Stiglitz presented evidence in 2009 that both global inequality and inequality within countries prevent growth by limiting aggregate demand. Economist Branko Milanovic, wrote in 2001 that, "The view that income inequality harms growth – or that improved equality can help sustain growth – has become more widely held in recent years. ... The main reason for this shift is the increasing importance of human capital in development. When physical capital mattered most, savings and investments were key. Then it was important to have a large contingent of rich people who could save a greater proportion of their income than the poor and invest it in physical capital. But now that human capital is scarcer than machines, widespread education has become the secret to growth."', question='How does inequality prevent growth?', answers=['by limiting aggregate demand', 'limiting aggregate demand', 'limit

In [241]:
dev_dsp = pd.DataFrame([('Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?', ['E. L. Doctorow', 'E.L. Doctorow', 'Doctorow']),
       ('What documentary about the Gilgo Beach Killer debuted on A&E?', ['The Killing Season']),
       ('Right Back At It Again contains lyrics co-written by the singer born in what city?', ['Gainesville, Florida', 'Gainesville']),
       ('What year was the party of the winner of the 1971 San Francisco mayoral election founded?', ['1828']),
       ('Which author is English: John Braine or Studs Terkel?', ['John Braine']),
       ('Anthony Dirrell is the brother of which super middleweight title holder?', ['Andre Dirrell']),
       ('In which city is the sports nutrition business established by Oliver Cookson based ?', ['Cheshire', 'Cheshire, UK']),
       ('Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.', ['February 13, 1980']),
       ('Kyle Moran was born in the town on what river?', ['Castletown', 'Castletown River']),
       ("What is the name of one branch of Robert D. Braun's speciality?", ['aeronautical engineering', 'astronautical engineering', 'aeronautics', 'astronautics']),
       ("Where was the actress who played the niece in the Priest film born?", ['Surrey', 'Guildford, Surrey']),
       ('Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.', ['Portrait of a Marriage']),
       ('What year was the father of the Princes in the Tower born?', ['1442'])], columns=['question', 'answers'])
dev_dsp


Unnamed: 0,question,answers
0,Who has a broader scope of profession: E. L. D...,"[E. L. Doctorow, E.L. Doctorow, Doctorow]"
1,What documentary about the Gilgo Beach Killer ...,[The Killing Season]
2,Right Back At It Again contains lyrics co-writ...,"[Gainesville, Florida, Gainesville]"
3,What year was the party of the winner of the 1...,[1828]
4,Which author is English: John Braine or Studs ...,[John Braine]
5,Anthony Dirrell is the brother of which super ...,[Andre Dirrell]
6,In which city is the sports nutrition business...,"[Cheshire, Cheshire, UK]"
7,Find the birth date of the actor who played ro...,"[February 13, 1980]"
8,Kyle Moran was born in the town on what river?,"[Castletown, Castletown River]"
9,What is the name of one branch of Robert D. Br...,"[aeronautical engineering, astronautical engin..."


In [242]:
dev = pd.DataFrame([[s.question, s.answers] for s in dev_exs], columns=['question', 'answers'])

In [479]:
dev.loc[2, 'answers']

['by limiting aggregate demand',
 'limiting aggregate demand',
 'limiting aggregate demand']

In [15]:
filename = os.path.join("data", "openqa", "cs224u-openqa-test-unlabeled.txt")
with open(filename) as f:
    test_questions = f.read().splitlines()

In [145]:
test = pd.DataFrame({'question': test_questions})
test['len'] = test['question'].str.len()
test['# ww'] = num_ww

In [146]:
test['# words'] = test['question'].str.split(' ').str.len()

In [140]:
watch_words = ['where', 'what', 'who', 'when', 'that', 'which', 'how']
num_ww = []
for ex in test_questions:
    num_ww.append(np.sum([ww in ex.lower() for ww in watch_words]))

In [147]:
test.groupby('# words').count()

Unnamed: 0_level_0,question,len,# ww
# words,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,35,35,35
4,29,29,29
5,38,38,38
6,45,45,45
7,51,51,51
8,73,73,73
9,58,58,58
10,27,27,27
11,15,15,15
12,9,9,9


In [722]:
tmp = copy.copy(test['question'])
for ww in watch_words + ['are', 'is', 'was', 'does', 'were', 'did', 'a', 'the']:
    tmp = tmp.str.removeprefix(ww+' ')
print(tmp.str.removesuffix('?').iloc[:100])

0                                   sang for pink floyd
1                                              epilepsy
2      woodrow wilson was president of which university
3          number of gold medals won by india in hockey
4                             name of dormitory of naga
5                                shakespeare's nickname
6                   season 4 of grace and frankie start
7     played the role of chandler bing in the tv ser...
8     name of the first russian astronaut to do a sp...
9                              canada's two territories
10                  country was theodor schwann born in
11                              battle of waterloo held
12                  became president after harding died
13                                      ozone depletion
14                                     phillies founded
15              developed the vaccination against polio
16      during which season do most thunderstorms occur
17                    lion king musical first pe

In [732]:
test['stripped'] = tmp.str.removesuffix('?')

In [765]:
test[test['# words']<=4].iloc[:20]

Unnamed: 0,question,len,# ww,# words,generated_answer,result_0,query_0,context_0,result_1,analysis_0,query_1,stripped
1,what is epilepsy?,17,1,3,a group of neurological disorders characterize...,what epilepsy is.\nSearch Query: epilepsy,epilepsy,[Epilepsy | Epilepsy Epilepsy is a group of ne...,epilepsy is a group of neurological disorders...,epilepsy is a group of neurological disorders...,,epilepsy
5,what is shakespeare's nickname?,31,1,4,"""The Bard of Staten Island"", ""The Bard of Sout...",what Shakespeare's nickname is.\nSearch Query...,Shakespeares nickname,[William Shakespeare (American football) | Foo...,"William Shakespeare had nicknames such as ""Th...","William Shakespeare had nicknames such as ""Th...",,shakespeare's nickname
13,what is ozone depletion?,24,1,4,Ozone depletion describes two related events o...,what ozone depletion is.\nSearch Query: ozone...,ozone depletion,[Ozone depletion | Ozone depletion Ozone deple...,ozone depletion is caused by manufactured che...,ozone depletion is caused by manufactured che...,,ozone depletion
25,when was algeria colonized?,27,1,4,1830,,algeria colonized,[Poverty in Algeria | also coordinated to boos...,Algeria was colonized by France in 1830; we n...,Algeria was colonized by France in 1830,,algeria colonized
29,what are the poconos?,21,1,4,,,,,,,,poconos
30,what is cryogenics?,19,1,3,,,,,,,,cryogenics
33,what is die-casting?,20,1,3,,,,,,,,die-casting
34,what is caffeine?,17,1,3,,,,,,,,caffeine
54,what is amoxicillin?,20,1,3,,,,,,,,amoxicillin
56,what is angiotensin?,20,1,3,,,,,,,,angiotensin


# ColBERT

In [14]:
import os
import sys
sys.path.insert(0, 'ColBERT/')

from colbert.infra import Run, RunConfig, ColBERTConfig
from colbert.data import Collection
from colbert.searcher import Searcher
from utility.utils.dpr import has_answer, DPR_normalize

2023-02-18 13:13:56.827703: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  assert(not torch.cuda.is_available(), "cupy must be installed in GPU mode")


In [84]:
!pip install dsp-ml

Collecting dsp-ml
  Downloading dsp-ml-0.1.4.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting backoff
  Using cached backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting jupyter
  Using cached jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting qtconsole
  Using cached qtconsole-5.4.0-py3-none-any.whl (121 kB)
Collecting jupyter-console
  Using cached jupyter_console-6.5.1-py3-none-any.whl (23 kB)
Collecting jupyter-core!=5.0.*,>=4.12
  Using cached jupyter_core-5.2.0-py3-none-any.whl (94 kB)
Collecting qtpy>=2.0.1
  Using cached QtPy-2.3.0-py3-none-any.whl (83 kB)
Collecting platformdirs>=2.5
  Using cached platformdirs-3.0.0-py3-none-any.whl (14 kB)
Building wheels for collected packages: dsp-ml
  Building wheel for dsp-ml (setup.py) ... [?25ldone
[?25h  Created wheel for dsp-ml: filename=dsp_ml-0.1.4-py3-none-any.whl size=22008 sha256=f695188cc4825fefbb5a0827bc21810589d9518872ac1bd43b5c33b1616e1abe
  Stored in directory: /Users/lara.thompson/Library/C

In [17]:
import dsp

In [18]:
colbert_server = 'http://ec2-44-228-128-229.us-west-2.compute.amazonaws.com:8893/api/search'
r_search = dsp.ColBERTv2(url=colbert_server)

In [21]:
index_home = os.path.join("experiments", "notebook", "indexes")

collection = os.path.join(index_home, "cs224u.collection.2bits", "cs224u.collection.tsv")

collection = Collection(path=collection)

f'Loaded {len(collection):,} passages'

index_name = "cs224u.collection.2bits"
with Run().context(RunConfig(experiment='notebook')):
    searcher = Searcher(index=index_name)

[Feb 18, 13:16:17] #> Loading collection...
0M 
[Feb 18, 13:16:18] #> Loading collection...
0M 




[Feb 18, 13:16:20] #> Building the emb2pid mapping..
[Feb 18, 13:16:20] len(self.emb2pid) = 14968345


In [620]:
def rm_top_k(query, k=3, source='local'):
    tmp = []
    if source in ['local', 'both']:
        tmp.extend([(p_score, 'local', searcher.collection[p_id]) for p_id, _, p_score in zip(*searcher.search(query, k=k))])
    if source in ['remote', 'both']:
        tmp.extend([(ans['score'], 'remote', ans['text']) for ans in r_search(query, k=k)])
    answers = pd.DataFrame(tmp, columns=['score', 'source', 'text']
                          ).drop_duplicates('text').sort_values('score', ascending=False).reset_index(drop=True)
    return answers

In [459]:
query = "What is DIS" #dev_dsp.loc[0, 'question']
for _, row in rm_top_k(query, k=10).iterrows():
    print(f"{row['score']:.2f}\t{row['source']}\t{row['text']}")
# for passage_id, passage_rank, passage_score in zip(*answers):
#     print(f"\t[{passage_rank}]\t{passage_score:.1f}\t {searcher.collection[passage_id]}")



24.32	local	Adolescence | their lives, youth is susceptible to drug addiction, sexual abuse, peer pressure, violent crimes and other illegal activities. Developmental Intervention Science (DIS) is a fusion of the literature of both developmental and intervention sciences. This association conducts youth interventions that mutually assist both the needs of the community as well as psychologically stranded youth by focusing on risky and inappropriate behaviors while promoting positive self-development along with self-esteem among adolescents. The concept of "adolescence" is criticized by some experts such as Robert Epstein, stating that an undeveloped brain is not the main cause of teenagers' turmoils. Some argue that adolescence
21.88	local	Defense Information Systems Agency | Defense Information Systems Agency The Defense Information Systems Agency (DISA), known as the Defense Communications Agency (DCA) until 1991, is a United States Department of Defense (DoD) combat support agency c

In [282]:
context = rm_top_k(query, k=4).iloc[:2, -1].to_list()



In [736]:
answers = rm_top_k('algeria colonized', k=3, source='both')
answers.loc[:, 'text'].to_list()



['Poverty in Algeria | also coordinated to boost the declining stature of the French monarch. In response to the "fly whist incident", the French decided to declare a full fledge conquest of Algeria in 1830 with the intention of imposing French rule. After three weeks, Algeria was captured and annexed by France. The French colonisation of Algeria in 1830 had a significant impact on Algeria\'s national identity and social system. After French colonisation, there was a large influx of mainly working-class Europeans from Italy, Spain and France into Algeria. The usurpation of Algeria by French soldiers resulted in many Indigenous Algerian farmers becoming dispossessed',
 "Algerian War | colonisation. But France was their ally, and they couldn't renounce this alliance. Nevertheless, it gave them a bad image abroad, and could encourage Algeria to join the eastern side. In situation, the USA had every interest in pushing France to give Algeria its independence. Secondly, the FLN can count on

In [171]:
ash_passages = [ans['text'] for ans in answers]

In [460]:
dis_passage = rm_top_k("What is DIS", k=1).loc[0, 'text']



In [442]:
basilica_passage = rm_top_k("architectural style of the Basilica of the Sacred Heart at Notre Dame", k=1).loc[0, 'text']
basilica_passage



'Basilica of the Sacred Heart, Notre Dame | Basilica of the Sacred Heart, Notre Dame The Basilica of the Sacred Heart in Notre Dame, Indiana, USA, is a Roman Catholic church on the campus of the University of Notre Dame, also serving as the mother church of the Congregation of Holy Cross in the United States. The neo-gothic church has 44 large stained glass windows and murals completed over a 17-year period by the Vatican painter Luigi Gregori. The basilica bell tower is high, making it the tallest University chapel in America. It is consistently ranked among the most beautiful university churches in the country and around the'

In [850]:
dupont_passage = rm_top_k("who was heir to the Du Pont family fortune", k=1, source='remote').loc[0, 'text']

In [858]:
rm_top_k("who was heir to the Du Pont family fortune", 
                              k=1, source='remote').loc[0, 'text']

'John du Pont | John du Pont John Eleuthère du Pont (November 22, 1938 – December 9, 2010) was an American philanthropist, heir to the Du Pont family fortune, and a convicted murderer. He had been a published ornithologist, philatelist, conchologist, sports enthusiast, and self-styled wrestling coach. He died in prison while serving a sentence of 30 years for the murder of Dave Schultz. In 1972, du Pont founded and directed the Delaware Museum of Natural History and contributed to Villanova University and other institutions. In the 1980s, he established a wrestling facility at his Foxcatcher Farm after becoming interested in the sport and'

In [851]:
rockne_passage = rm_top_k("In what year did the team lead by Knute Rockne win the Rose Bowl", k=1, source='local').loc[0, 'text']



In [852]:
moon_passage = rm_top_k("first man on the moon", k=2, source='local').loc[1, 'text']



In [853]:
lourde_passage = rm_top_k("Who did the Virgin Mary appear to in Lourdes France", k=1, source='local').loc[0, 'text']



In [104]:
rockne_passages = [searcher.collection[passage_id] for passage_id in answers[0]]
rockne_passages

['University of Notre Dame | five ties. During his 13 years the Irish won three national championships, had five undefeated seasons, won the Rose Bowl in 1925, and produced players such as George Gipp and the "Four Horsemen". Knute Rockne has the highest winning percentage (.881) in NCAA Division I/FBS football history. Rockne\'s offenses employed the Notre Dame Box and his defenses ran a 7–2–2 scheme. The last game Rockne coached was on December 14, 1930, when he led a group of Notre Dame all-stars against the New York Giants in New York City. The success of its football team made Notre Dame a household',
 "Residence halls at the University of Notre Dame | and a side altar is dedicated to St. Olaf, in honor of Knute Rockne, of Norwegian ancestry. The dormitory was built with the proceeds, $52,000, from the 1925 Rose Bowl. It was named after Fr .Patrick Dillon, a Notre Dame graduate who was only 33 years old in 1865 when he was named the University's second president. He was considered

In [54]:
moon_passages = [searcher.collection[passage_id] for passage_id in answers[0]]
moon_passages

['Space Race | 20, 1969 (3:17:04\xa0pm CDT). The first humans on the Moon waited six hours before they left their craft. At 02:56 UTC, July 21 (9:56\xa0pm CDT July 20), Armstrong became the first human to set foot on the Moon. The first step was witnessed by at least one-fifth of the population of Earth, or about 723 million people. His first words when he stepped off the LM\'s landing footpad were, "That\'s one small step for [a] man, one giant leap for mankind." Aldrin joined him on the surface almost 20 minutes later. Altogether, they spent just under two and one-quarter hours',
 "First Man: The Life of Neil A. Armstrong | First Man: The Life of Neil A. Armstrong First Man: The Life of Neil A. Armstrong is the official biography of Neil Armstrong, the astronaut who became the first man to walk on the Moon, on July 20, 1969. The book was written by James R. Hansen, and was first published in 2005, by Simon & Schuster. The book describes Armstrong's involvement in the United States sp

In [59]:
lourde_passages = [ans['text'] for ans in answers]
lourde_passages

['Our Lady of Lourdes | doctor stated that Soubirous, in ecstasy, was observed to have held her hands over a lit candle without sustaining harm. On 16 July, Soubirous went for the last time to the Grotto. "I have never seen her so beautiful before," she reported. The Church, faced with nationwide questions, decided to institute an investigative commission on 17 November 1858. On 18 January 1860, the local bishop finally declared that: "The Virgin Mary did appear indeed to Bernadette Soubirous." These events established the Marian veneration in Lourdes, which together with Fátima, is one of the most frequented Marian shrines in the world,',
 'Lourdes apparitions | to Soubirous, her visions occurred at the grotto of Massabielle, just outside Lourdes. On 16 July 1858, Soubirous visited the grotto for the last time and said: "I have never seen her so beautiful before." On 18 January 1860, the local bishop declared: "The Virgin Mary did appear indeed to Bernadette Soubirous." In 1958, Pope 

# Eleuther

In [11]:
# "gpt-neo-125M" "gpt-neo-1.3B" "gpt-neo-2.7B" "gpt-j-6B"
eleuther_model_name = "gpt-neo-125M"

eleuther_tokenizer = AutoTokenizer.from_pretrained(
    f"EleutherAI/{eleuther_model_name}", 
    padding_side="left", 
    padding='longest', 
    truncation='longest_first', max_length=2000)
eleuther_tokenizer.pad_token = eleuther_tokenizer.eos_token

eleuther_model = AutoModelForCausalLM.from_pretrained(
    f"EleutherAI/{eleuther_model_name}")

device = "cuda" if torch.cuda.is_available() else "cpu"
eleuther_model = eleuther_model.to(device)

In [21]:
def run_eleuther(prompts, temperature=0.1, top_p=0.95, **generate_kwargs): 
    """
    Parameters
    ----------
    prompts : iterable of str
    temperature : float
        It seems best to set it low for this task!
    top_p : float
       
    For options for `generate_kwargs`, see:
    
    https://huggingface.co/docs/transformers/master/en/main_classes/text_generation#transformers.generation_utils.GenerationMixin.generate
    
    Options that are likely to be especially relevant include 
    `temperature`, `length_penalty`, and the parameters that
    determine the decoding strategy. With `num_return_sequences > 1`,
    the default parameters in this function do multinomial sampling.
    
    Returns
    -------
    list of dicts
    
    {"prompt": str, 
     "generated_text": str, "generated_tokens": list of str, "generated_probs": list of float,
     "answer": str, "answer_tokens": list of str, "answer_probs": list of float
    }
         
    """
    prompt_ids = eleuther_tokenizer(
        prompts, return_tensors="pt", padding=True).input_ids.to(device)
        
    with torch.inference_mode():
        # Automatic mixed precision if possible.
        with torch.cuda.amp.autocast() if torch.cuda.is_available() else nullcontext():
            model_output = eleuther_model.generate(
                prompt_ids,
                temperature=temperature,
                do_sample=True,
                top_p=top_p,           
                max_new_tokens=24,
                num_return_sequences=1,                
                pad_token_id=eleuther_tokenizer.eos_token_id, 
                return_dict_in_generate=True,
                output_scores=True,
                **generate_kwargs)
        
    # Converting output scores using the helpful recipe here:
    # https://discuss.huggingface.co/t/generation-probabilities-how-to-compute-probabilities-of-output-scores-for-gpt2/3175
    gen_ids = model_output.sequences[:, prompt_ids.shape[-1] :]
    gen_probs = torch.stack(model_output.scores, dim=1).softmax(-1)
    gen_probs = torch.gather(gen_probs, 2, gen_ids[:, :, None]).squeeze(-1)
    
    # Generated texts, including the prompts:
    gen_texts = eleuther_tokenizer.batch_decode(
        model_output.sequences, skip_special_tokens=True)
    
    data = []     
    iterator = zip(prompts, gen_ids, gen_texts, gen_probs)    
    for prompt, gen_id, gen_text, gen_prob in iterator:       
        gen_tokens = eleuther_tokenizer.convert_ids_to_tokens(gen_id)
        generated_text = gen_text[len(prompt): ]
        gen_prob = [float(x) for x in gen_prob.cpu().numpy()] # float for JSON storage
        ans_indices = _find_generated_answer(gen_tokens, newline="Ċ")
        answer_tokens = [gen_tokens[i] for i in ans_indices]
        answer_probs = [gen_prob[i] for i in ans_indices]
        answer = "".join(answer_tokens).replace("Ġ", " ").replace("Ċ", "\n")                                       
        data.append({
            "prompt": prompt,
            "generated_text": generated_text,
            "generated_tokens": gen_tokens,
            "generated_probs": gen_prob,
            "generated_answer": answer,
            "generated_answer_probs": answer_probs,
            "generated_answer_tokens": answer_tokens})                        

    return data

In [20]:
results = run_eleuther([    
    "Write a search query that will help answer a complex question.\n\n"  # on +hops: Write N/A if the context contains all information required
          # better still: always try the RM first, to have a context; egs should show no extra queries needed and versions with next query
    "---\n\n"
    "Follow the following format.\n\n"
    "Question: ${the question to be answered}\n"
    "Rationale: Let's think step by step. To answer this question, we first need to find out ${the missing information}\n"
    "Search Query: ${a simple question for seeking the missing information}\n\n"
    "---\n\n"
    "Question: In what year was the first man on the moon born?\n"
    "Rationale: Let's think step by step. To answer this question, we first need to find out the name of the first man to walk on the moon.\n"
    "Search Query: the name of first man to walk on the moon\n\n"
    "---\n\n"
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    "Rationale: Let's think step by step. To answer this question, we first need to find out who the heir to the Du Pont family fortune is.\n"
    "Search Query: Heir to the Du Pont family fortune\n\n"
    "---\n\n"
    "Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?\n"
    "Rationale: Let's think step by step. To answer this question, we first need to find out the name of the 2009 movie featuring Peter Outerbridge as William Easton.\n"
    "Search Query: 2009 movie featuring Peter Outerbridge as William Easton\n\n"
    "---\n\n"
    "Question: who were the sponsor of the icc world cup when it was hosted outside england for the first time?"
    "Rationale: Let's think step by step. To answer this question, we first need to find out"])
[ex['generated_text'] for ex in results]

[' who the sponsor of the icc world cup was.\nSearch Query: The sponsor of the icc world cup was']

# GPT-3

In [117]:
def run_gpt3(prompts, engine="text-babbage-001", temperature=0.1, top_p=0.95, **gpt3_kwargs):
    """To use this function, sign up for an OpenAI account at
        
    https://beta.openai.com/signup
    
    That should give you $18 in free credits, which is more than enough
    for this assignment assuming you are careful with testing.
    
    Once your account is set up, you can get your API key from your 
    account dashboard and paste it in below as the value of 
    `openai.api_key`.
    
    Parameters
    ----------
    prompts : iterable of str
    engine : str
        This has to be one of the models whose name begins with "text".
        The "instruct" class of models can't be used, since they seem
        to depend on some kinds of QA-relevant supervision.        
        For options, costs, and other details: 
        https://beta.openai.com/docs/engines/gpt-3                
    temperature : float
        It seems best to set it low for this task!
    top_p : float
        
    For information about values for `gpt3_kwargs`, see
    
    https://beta.openai.com/docs/api-reference/completions
    
    Returns
    -------
    list of dicts   
    
    """
    # Fill this in with the value from your OpenAI account. First
    # verify that your account is set up with a spending limit that
    # you are comfortable with. If you just opened your account,
    # you should have $18 in credit and so won't need to supply any
    # payment information.
    openai.api_key = 'hahahahaha'
    
    
    assert engine.startswith("text"), \
        "Please use an engine whose name begins with 'text'."
        
    response = openai.Completion.create(
        engine=engine,       
        prompt=prompts,
        temperature=temperature,
        top_p=top_p,
        max_tokens=128,
        echo=False,   # This function will not work
        logprobs=1,   # properly if any of these
        n=1,          # are changed!
        **gpt3_kwargs)
    
    # From here, we parse each example to get the values
    # we need:
    data = []
    for ex, prompt in zip(response["choices"], prompts):
        tokens = ex["logprobs"]["tokens"]
        logprobs = ex["logprobs"]["token_logprobs"]        
        probs = list(np.exp(logprobs))
        if "<|endoftext|>" in tokens:
            end_i = tokens.index("<|endoftext|>")
            tokens = tokens[ : end_i]  # This leaves off the "<|endoftext|>"
            probs = probs[ : end_i]    # token -- perhaps dubious.
        ans_indices = _find_generated_answer(tokens)
        answer_tokens = [tokens[i] for i in ans_indices]
        answer_probs = [probs[i] for i in ans_indices]
        answer = "".join(answer_tokens)        
        data.append({
            "prompt": prompt,
            "generated_text": ex["text"],
            "generated_tokens": tokens,
            "generated_probs": probs,
            "generated_answer": answer,
            "generated_answer_tokens": answer_tokens,
            "generated_answer_probs": answer_probs})
        
    return data

In [332]:
question = "who got the most streams on spotify 2018?"
initial_query = lambda question: ( "Write a search query that will help answer a complex question.\n\n" 
    "---\n\n"
    "The format we'll use:\n\n"
    "Question: ${our complex question}\n"
    "Rationale: To answer this question step by step, we need to find out ${the missing information}\n"
    "Search Query: ${a simple question for seeking the missing information}\n\n"
    "---\n\n"
    "Question: When was the first man on the moon born?\n"
    "Rationale: To answer this question step by step, we need to find out who the first man to walk on the moon was.\n"
    "Search Query: first man to walk on the moon\n\n"
    "---\n\n"
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    "Rationale: To answer this question step by step, we need to find out who the heir to the Du Pont family fortune is.\n"
    "Search Query: Heir to the Du Pont family fortune\n\n"
    "---\n\n"
    "Question: What is agoraphobia?\n"
    "Rationale: To answer this question step by step, we need to find out what agoraphobia is.\n"
    "Search Query: agoraphobia\n\n"
    "---\n\n"
    "Question: " + question + '\n'+
    "Rationale: To answer this question step by step, we need to find out")

In [333]:
initial_query(question)

"Write a search query that will help answer a complex question.\n\n---\n\nThe format we'll use:\n\nQuestion: ${our complex question}\nRationale: To answer this question step by step, we need to find out ${the missing information}\nSearch Query: ${a simple question for seeking the missing information}\n\n---\n\nQuestion: When was the first man on the moon born?\nRationale: To answer this question step by step, we need to find out who the first man to walk on the moon was.\nSearch Query: first man to walk on the moon\n\n---\n\nQuestion: The heir to the Du Pont family fortune sponsored what wrestling team?\nRationale: To answer this question step by step, we need to find out who the heir to the Du Pont family fortune is.\nSearch Query: Heir to the Du Pont family fortune\n\n---\n\nQuestion: What is agoraphobia?\nRationale: To answer this question step by step, we need to find out what agoraphobia is.\nSearch Query: agoraphobia\n\n---\n\nQuestion: who got the most streams on spotify 2018?\n

In [None]:
check_query = (
    "We have a question to answer that may need fact checking.\n\n"
    "---\n\n"
    "The format we'll use:\n\n"
    "Question: ${our complex question}\n"
    "Your answer: ${the GPT-3 davinci answer that sometimes gets its facts wrong}\n"
    "Analysis: ${the next fact we should double check}"
    "---\n\n"
    "Question: In what year was the first man on the moon born?\n"
    "Your answer: The first man on the moon, Neil Armstrong, was born in 1930\n"
    "Analysis: we should double check who was the first man on the moon\n\n"
    "---\n\n"
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    "Your answer: The heir to the Du Pont family fortune, Pierre S. du Pont IV, sponsored the Wilmington Blue Rocks, a professional wrestling team in the National Wrestling Alliance\n"
    "Analysis: we should double check who was heir to the Du Pont family fortune\n\n"
    
)

In [None]:
initial_wt_context_query = ( "Write a search query that will help answer a complex question.\n\n" 
    "---\n\n"
    "The format we'll use:\n\n"
    "Question: ${our complex question}\n"
    "Your answer: ${what you answered to the question that we should verify}"
    "Rationale: To answer this question step by step, we need to ${the missing information}\n"
    # "Search Query: ${a simple question for seeking the missing information}\n\n"
    "---\n\n"
    "Question: In what year was the first man on the moon born?\n"
    "Your answer: The first man on the moon, Neil Armstrong, was born in 1930\n"
    "Rationale: To answer this question step by step, we need to verify that Neil Armstrong was the first man on the moon and that he was born in 1930.\n"
    # "Search Query: first man to walk on the moon\n\n"
    "---\n\n"
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    "Your answer: The heir to the Du Pont family fortune, Pierre S. du Pont IV, sponsored the Wilmington Blue Rocks, a professional wrestling team in the National Wrestling Alliance\n"
    "Rationale: To answer this question step by step, we need to verify that Pierre S. du Pont IV was heir to the Du Pont family fortune is.\n"
    "Search Query: Heir to the Du Pont family fortune\n\n"
    "---\n\n"
    "Question: What is agoraphobia?\n"
    "Rationale: To answer this question step by step, we need to find out what agoraphobia is.\n"
    "Search Query: agoraphobia\n\n"
    "---\n\n"
    "Question: " + question + 
    "Rationale: To answer this question step by step, we need to find out")

In [632]:
rationale = lambda question, query, context: (
    "Write the step by step analysis of our search results to answer a complex question.\n\n"
    "---\n\n"
    "The format we'll use:\n\n"
    "Question: ${our complex question}\n"
    "Search results for ${the search query}$:\n"
    "${what search returned for that query}\n"
    "Analysis: To answer this question step by step, we know that ${relevant information from the search results}; we need to know ${the missing information or 'nothing else' if we have all the information we need.}.\n"
    "Query: {query for missing information / N/A (if none)}\n"
    "Answer: {answer (if we know it) / N/A (if we don't)}\n\n"
    "---\n\n"
    "Question: When was the first man on the moon born?\n"
    "Search results for the first man on the moon:\n" + moon_passage + "\n"
    "Analysis: To answer this question step by step, we know that Neil Armstrong was the first man on the moon; we need to know when Neil Armstrong was born.\n"
    "Query: Neil Armstrong\n"
    "Answer: N/A\n\n"
    "---\n\n"
    "Question: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?\n"
    "Search results for Who did the Virgin Mary appear to in Lourdes France:\n" + lourde_passage + "\n"
    "Analysis: To answer this question step by step, we know that Bernadette Soubirous saw the Virgin Mary in 1958; we need to know nothing else.\n"
    "Query: N/A\n"
    "Answer: Bernadette Soubirous\n\n"
    "---\n\n"
    "Question: In what year did the team lead by Knute Rockne win the Rose Bowl?\n"
    "Search results for In what year did the team lead by Knute Rockne win the Rose Bowl:\n" + rockne_passage + "\n"
    "Analysis: To answer this question step by step, we know that Knute Rockne lead the Notre Dame Team and that the Notre Dame won the Rose Bown in 1925; we need to know nothing else.\n"
    "Query: N/A\n"
    "Answer: 1925\n\n"
    "---\n\n"             
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    f"Search Results: heir to the Du Pont family fortune\n{dupont_passage}\n"
    "Analysis: To answer this question step by step, we know that John Eleuthère du Pont was heir to the Du Pont family fortune; "
               "we need to know what wrestling team John Eleuthère du Pont sponsored.\n"
    "Query: wrestling team John Eleuthère du Pont\n"
    "Answer: N/A\n\n"
    "---\n\n"             
    "Question: What is DIS?\n"
    "Search Results: What is DIS:\n" + dis_passage + "\n"
    "Analysis: Developmental Intervention Science (DIS) is a fusion of the literature of both developmental and intervention sciences; we need to know nothing else.\n"
    "Query: N/A\n"
    "Answer: a fusion of the literature of both developmental and intervention sciences\n\n"
    "---\n\n"
    f"Question: {question}\n"
    f"Search results for {query}:\n" + '\n'.join(context) + "\n"
    "Analysis: To answer this question step by step, we know that"
)

In [491]:
print(rationale(question, query, context))

Write the step by step analysis of our search results to answer a complex question.

---

The format we'll use:

Question: ${our complex question}
Search results for ${the search query}$:
${what search returned for that query}
Analysis: To answer this question step by step, we know that ${relevant information from the search results}; we need to know ${the missing information or 'nothing else' if we have all the information we need.}.
Query: {next search query to make / N/A is none}
Answer: {the answer as short as we can make without losing information / N/A if we can't answer yet}

---

Question: When was the first man on the moon born?
Search results for the first man on the moon:
First Man: The Life of Neil A. Armstrong | First Man: The Life of Neil A. Armstrong First Man: The Life of Neil A. Armstrong is the official biography of Neil Armstrong, the astronaut who became the first man to walk on the Moon, on July 20, 1969. The book was written by James R. Hansen, and was first publi

In [296]:
moon_followup = rm_top_k('when Neil Armstrong was born').loc[:1, 'text'].to_list()
moon_followup



['Neil Armstrong | 1971, Armstrong taught in the Department of Aerospace Engineering at the University of Cincinnati until 1979. He served on the Apollo 13 accident investigation, and on the Rogers Commission, which investigated the Space Shuttle "Challenger" disaster. He acted as a spokesman for several businesses, and appeared in advertising for the automotive brand Chrysler starting in January 1979. Armstrong was born on August 5, 1930, near Wapakoneta, Ohio, to Stephen Koenig Armstrong and Viola Louise Engel. He was of German, Scottish and Scots-Irish ancestry, and had a younger sister, June, and a younger brother, Dean. His father worked as an auditor',
 "Armstrong Air and Space Museum | west of I-75 at exit 111 (Bellefontaine Street) in Wapakoneta. The museum is operated by the Ohio Historical Society and has no formal relationship with Armstrong. The museum is a component of the National Aviation Heritage Area. Neil Armstrong was born on August 5, 1930 on his grandparents' farm,

In [298]:
dupont_followup = rm_top_k('what wrestling team John Eleuthère du Pont').loc[:1, 'text'].to_list()
dupont_followup



['Dave Schultz (wrestler) | Eleuthère du Pont\'s "Team Foxcatcher", which trained at a complex built on the du Pont family farm in Newtown Square, Pennsylvania. In 1996, Schultz was murdered there by a mentally-ill John du Pont. Schultz trained at the Foxcatcher center while preparing for another Olympic bid, as well as coaching the wrestling team. On January 26, 1996, he was shot and killed by John Eleuthère du Pont, the owner of the center. Du Pont had exhibited bizarre and threatening behavior for an extended period preceding the murder. Schultz was age 36 at the time of his death. His body was cremated.',
 'John du Pont | John du Pont John Eleuthère du Pont (November 22, 1938 – December 9, 2010) was an American philanthropist, heir to the Du Pont family fortune, and a convicted murderer. He had been a published ornithologist, philatelist, conchologist, sports enthusiast, and self-styled wrestling coach. He died in prison while serving a sentence of 30 years for the murder of Dave S

In [867]:
finalize('sdfa', 'dfsadf')

'---\n\nQuestion: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?\nAnalysis: we know that Bernadette Soubirous saw the Virgin Mary in 1958\nAnswer: Bernadette Soubirous\n\n---\n\nQuestion: In what year did the team lead by Knute Rockne win the Rose Bowl?\nAnalysis: we know that Knute Rockne lead the Notre Dame Team and that the Notre Dame won the Rose Bown in 1925\nAnswer: 1925\n\n---\n\nQuestion: When was the first man on the moon born?\nAnalysis: we know that first man on the moon was Neil Armstrong and that he was born August 5, 1930\nAnswer: August 5, 1930\n\n---\n\nQuestion: The heir to the Du Pont family fortune sponsored what wrestling team?\nAnalysis: we know that John Eleuthère du Pont was an heir to the Du Pont family fortune and that he sponsored Team Foxcatcher\nAnswer: Team Foxcatcher---\n\nQuestion: sdfa\nAnalysis: dfsadf\nAnswer: '

In [115]:
follow_up = (
    "Write the step by step analysis of our search results to answer a complex question.\n\n"
    "---\n\n"
    "The format we'll use:\n\n"
    "Question: ${our complex question}\n"
    "Background: ${the analysis so far}"
    "Search Results: ${the search query}$:\n
    ${what search returned for the question}\n"
    "Analysis: To answer this question step by step, we know that ${relevant information from the background and search results}; we need to know ${the missing information}.\n"
    "---\n\n"
    "Question: When was the first man on the moon born?\n"
    "Background: Neil Armstrong was the first man on the moon\n"
    "Search Results:  when Neil Armstrong was born\n" + '\n'.join(moon_followup[:2]) + "\n"
    "Analysis: To answer this question step by step, we know that first man on the moon was Neil Armstrong and that he was born August 5, 1930; we need to know nothing else.\n"
    "---\n\n"
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    "Search Results:  Heir to the Du Pont family fortune\n" + '\n'.join(dupont_followup[:2]) + "\n"
    "Analysis: To answer this question step by step, we know that John Eleuthère du Pont was an heir to the Du Pont family fortune and that he sponsored Team Foxcatcher; we need to know nothing else."
)

In [357]:
finalize = lambda question, rationale: (
    "Question: To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?\n"
    "Analysis: we know that Bernadette Soubirous saw the Virgin Mary in 1958\n"
    "Answer: Bernadette Soubirous\n\n"
    "---\n\n"
    "Question: In what year did the team lead by Knute Rockne win the Rose Bowl?\n"
    "Analysis: we know that Knute Rockne lead the Notre Dame Team and that the Notre Dame won the Rose Bown in 1925\n"
    "Answer: 1925\n\n"
    "---\n\n"
    "Question: When was the first man on the moon born?\n"
    "Analysis: we know that first man on the moon was Neil Armstrong and that he was born August 5, 1930\n"
    "Answer: August 5, 1930\n\n"
    "---\n\n"
    "Question: The heir to the Du Pont family fortune sponsored what wrestling team?\n"
    "Analysis: we know that John Eleuthère du Pont was an heir to the Du Pont family fortune and that he sponsored Team Foxcatcher\n"
    "Answer: Team Foxcatcher\n\n"
    "---\n\n"
    f"Question: {question}\n"
    f"Analysis: {rationale}\n"
    "Answer: "
)

In [334]:
# text-ada-001, text-babbage-001, text-curie-001, text-davinci-003
results = run_gpt3([initial_query(question)], engine='text-davinci-003')
[ex['generated_text'] for ex in results]

[' who had the most streams on Spotify in 2018.\nSearch Query: most streamed artist on Spotify 2018']

In [329]:
rm_top_k("Drake spotify 2018").loc[:, 'text'].to_list()



['Criticism of Spotify | to regulate artists. Therefore, we are moving away from implementing a policy around artist conduct". On June 27, 2018, when rapper Drake released his fifth studio album "Scorpion", Spotify revamped most of its playlist to include the rapper and singer\'s face, even when the playlist didn\'t specifically have a Drake song in it. The change was dubbed "Scorpion SZN" by Spotify. However, some users complained about the repetitive use of the artist\'s likeness on the website\'s front page, while Spotify Premium users demanded refunds, dissatisfied with seeing music promotion on what was supposed to be an ad-free service. Spotify gave',
 'Spotify | December 2016, Drake\'s just-under 26 million monthly listeners were overtaken by the Weeknd\'s 36.068 million. Later that month, Drake\'s song "One Dance" became the first song to hit one billion streams on Spotify. Upon its release in August 2017, the single "Look What You Made Me Do" by Taylor Swift earned over eight 

# Test on dev_exs

In [758]:
def e2e(question, engine='text-davinci-003'):
    lead_words = ['where', 'what', 'who', 'when', 'that', 'which', 'how',
                  'are', 'is', 'was', 'does', 'were', 'did', 'a', 'the']
    d = {}
    if len(question.split(' ')) < 5:
        tmp = question
        for w in lead_words:
            tmp = tmp.removeprefix(w + ' ')
        d['query_0'] = tmp.removesuffix('?')
    else:
        d['result_0'] = run_gpt3([initial_query(question)], engine=engine)[0]['generated_text']
        d['query_0'] = d['result_0'].split('Search Query: ')[-1].replace('\"','').replace('\'','')
    d['context_0'] = rm_top_k(d['query_0'], k=2, source='both').loc[:1, 'text'].to_list()
    d['result_1'] = run_gpt3([rationale(question, d['query_0'], d['context_0'])], 
                             engine=engine)[0]['generated_text']
    d['analysis_0'], tmp = d['result_1'].split('; we need to know ')
    _, tmp = tmp.split('\nQuery: ')
    d['query_1'],  d['generated_answer'] = tmp.split('\nAnswer: ')
    
    if d['query_1'] != "N/A": # often this works instead of another loop
        d['generated_answer'] = run_gpt3([finalize(question, d['analysis_0'])], 
                                         engine=engine)[0]['generated_text']
    return d

In [846]:
model = 'text-davinci-003'
for i, ex in dev.iterrows():
    if ex.generated_answer is not None:
        continue
    d = e2e(ex.question)
    dev.loc[i, list(d.keys())] =list(d.values())    
    if d['query_1'] != "N/A":
        print(i, d['generated_answer'])
    # if np.mod(i, 30)==0:
    time.sleep(8)

In [831]:
evaluate(dev)

{'macro_f1': 0.3726383134034481, 'em_per': 0.23}

# Run on TEST

In [847]:
model = 'text-davinci-003'

for i, ex in test.iterrows():
    if ex.generated_answer is not None:
        continue
    d = e2e(ex.question)
    test.loc[i, list(d.keys())] =list(d.values())  
    if d['query_1'] != "N/A":
        print(i, d['generated_answer'])
    time.sleep(8)

  return asarray(a).ndim


230  Allura Red AC (E-129) and Cochineal


  return asarray(a).ndim


231  the substantia nigra area of the brain.


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


234  Parkinson's Disease


  return asarray(a).ndim


235  Kapalua Airport and Kaanapali Airport


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


239  a desired range of heart rate reached during aerobic exercise


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


245  state legislators


  return asarray(a).ndim


246  The University of North Carolina at Chapel Hill, The College of William and Mary, and St. John's College


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


250  Yeti


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


253  Germanic


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


259  fragments of cytoplasm that are derived from the megakaryocytes of the bone marrow


  return asarray(a).ndim


260  a long-term autoimmune disorder that primarily affects joints


  return asarray(a).ndim


261  $5.6 million


  return asarray(a).ndim
  return asarray(a).ndim


263  The RIAA requires 500,000 copies to be sold for an album to be certified gold, while the BEA requires 50,000 copies to be sold for an album to be certified gold.


  return asarray(a).ndim


264  Pulmonary artery


  return asarray(a).ndim


265  Fantasia Barrino


  return asarray(a).ndim


266  July 6, 2018


  return asarray(a).ndim


267  February 15, 1820


  return asarray(a).ndim
  return asarray(a).ndim


269  May 6, 1994


  return asarray(a).ndim


270  Madonna Louise Ciccone


  return asarray(a).ndim


271  Central and North America


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


274  Atlantic Ocean


  return asarray(a).ndim


275  four exemplifications of the 1225 charter survive.


  return asarray(a).ndim


276  anything that can produce disease


  return asarray(a).ndim


277  42 years old


  return asarray(a).ndim


278  beneath the cerebral cortex in the brain


  return asarray(a).ndim
  return asarray(a).ndim


280  a stream of charged particles released from the upper atmosphere of the Sun


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


283  a respiratory disease in which scars are formed in the lung tissues, leading to serious breathing problems


  return asarray(a).ndim
  return asarray(a).ndim


285  Monsanto and American Cyanamid


  return asarray(a).ndim


286  The House of Representatives and the Senate.


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


292  June 1973


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


296  Wax Crayon


  return asarray(a).ndim
  return asarray(a).ndim


298  Alice Walker was born in Eatonton, Georgia, United States.


  return asarray(a).ndim
  return asarray(a).ndim


300  July 20, 1969


  return asarray(a).ndim


301  traditional Italian sweet yeast bread


  return asarray(a).ndim
  return asarray(a).ndim


303  $7.25 per hour


  return asarray(a).ndim
  return asarray(a).ndim


305  Fayette, New York


  return asarray(a).ndim
  return asarray(a).ndim


307  California


  return asarray(a).ndim


308  Paris, France


  return asarray(a).ndim


309  Alabama Crimson Tide and Georgia Bulldogs


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


312  Georgia Moffett


  return asarray(a).ndim


313  bright colors such as yellow, blue, green, and red.


  return asarray(a).ndim


314  Pop Art


  return asarray(a).ndim


315  Brazilian real


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


320  an infectious disease caused by the poliovirus


  return asarray(a).ndim


321  Robin Williams


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


324  Uganda and Rwanda


  return asarray(a).ndim


325  an amount for each completed year of continuous service


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


330  electron


  return asarray(a).ndim
  return asarray(a).ndim


332  Monasteries in England


  return asarray(a).ndim
  return asarray(a).ndim


334  Las Vegas, Nevada


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


337  Fox and MLB International


  return asarray(a).ndim


338  August 27, 1908


  return asarray(a).ndim


339  an endocrine gland in the neck that secretes hormones


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


342  1861


  return asarray(a).ndim
  return asarray(a).ndim


344  Dari and Pashto


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


347  Australia is located in the continent of Oceania.


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


350  Strait of Anián


  return asarray(a).ndim


351  William Seward


  return asarray(a).ndim


352  Yes, rifles must be registered in Michigan.


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


357  October 26, 2018


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


361  Rita Levi-Montalcini


  return asarray(a).ndim


362  San Francisco, California


  return asarray(a).ndim
  return asarray(a).ndim


364  Level 7


  return asarray(a).ndim
  return asarray(a).ndim


366  a set of ideas and practices regarding the metaphysical relationship between the mind and body created by L. Ron Hubbard


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


369  Northwestern Arizona


  return asarray(a).ndim
  return asarray(a).ndim


371  Portraits, self-portraits, and works inspired by the nature and artifacts of Mexico.


  return asarray(a).ndim
  return asarray(a).ndim


373  Army Industrial College


  return asarray(a).ndim


374  Tim McGraw


  return asarray(a).ndim


375  Micron Technology is an American global corporation that specializes in the manufacture and development of advanced memory and storage technologies.


  return asarray(a).ndim


376  Bob Schieffer


  return asarray(a).ndim


377  Radioactivity


  return asarray(a).ndim
  return asarray(a).ndim


379  1864


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


382  Cardiopulmonary Resuscitation


  return asarray(a).ndim
  return asarray(a).ndim


384  a digital programmable computer


  return asarray(a).ndim


385 
For Francis Drake the explorer, he represented England.
For Francis Drake the musician, he represented the United States.


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


388  methane and other longer-chain hydrocarbons


  return asarray(a).ndim


389  Japan and Germany


  return asarray(a).ndim


390  The oldest academic honor society in the United States


  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim
  return asarray(a).ndim


396  Antonio Meucci and Alexander Graham Bell


  return asarray(a).ndim


397  Melbourne, with a population of 4.67 million.


  return asarray(a).ndim
  return asarray(a).ndim


In [848]:
test.to_hdf('hw_openqa_results.h5', key='test')
# dev.to_hdf('hw_openqa_results.h5', key='dev')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['question', 'generated_answer', 'result_0', 'query_0', 'context_0',
       'result_1', 'analysis_0', 'query_1', 'stripped'],
      dtype='object')]

  test.to_hdf('hw_openqa_results.h5', key='test')


In [860]:
test.columns

Index(['question', 'len', '# ww', '# words', 'generated_answer', 'result_0',
       'query_0', 'context_0', 'result_1', 'analysis_0', 'query_1',
       'stripped'],
      dtype='object')

In [862]:
gens = test.set_index('question')[['query_0', 'analysis_0', 'generated_answer']].T.to_dict()
gens

{'who sang for pink floyd?': {'query_0': 'Pink Floyd singer',
  'analysis_0': ' Syd Barrett was a founder member of the band Pink Floyd and that he was the lead singer, guitarist and principal songwriter',
  'generated_answer': 'Syd Barrett'},
 'what is epilepsy?': {'query_0': 'epilepsy',
  'analysis_0': ' epilepsy is a group of neurological disorders characterized by epileptic seizures and that it is a disease according to the ILAE 2005 conceptual definition',
  'generated_answer': 'a group of neurological disorders characterized by epileptic seizures'},
 'woodrow wilson was president of which university?': {'query_0': 'Woodrow Wilson university president',
  'analysis_0': ' Woodrow Wilson was the President of Princeton University from 1902 to 1910',
  'generated_answer': 'Princeton University'},
 'number of gold medals won by india in hockey?': {'query_0': 'India gold medals hockey',
  'analysis_0': " the Indian men's team won eight gold, one silver and two bronze medals in the Olymp

In [864]:
with open(os.path.join("data", "openqa", "cs224u-openqa-test-unlabeled.txt")) as f:
    questions = f.read().splitlines()
# for submission:
# 1. Make sure `gens` is a dict with the questions as the keys:
assert all(q in gens for q in questions)
# 2. Make sure the values are dicts and have the key we will use:
assert all(isinstance(d, dict) and "generated_answer" in d for d in gens.values())

In [865]:
with open("cs224u-openqa-bakeoff-entry.json", "wt") as f:
    json.dump(gens, f, indent=4) 