In [1]:
from primeqa.components.retriever.dense import ColBERTRetriever
from primeqa.components.reader.prompt import PromptGPTReader
from primeqa.pipelines.qa_pipeline import QAPipeline
import json

{"time":"2023-02-13 13:58:34,817", "name": "faiss.loader", "level": "INFO", "message": "Loading faiss with AVX2 support."}
{"time":"2023-02-13 13:58:35,184", "name": "faiss.loader", "level": "INFO", "message": "Successfully loaded faiss with AVX2 support."}


In [2]:
# setup ColBERT index
index_root = "/dccstor/colbert-ir/franzm/indexes/oct2_10_11/oct2_10_11_exp/indexes/"
index_name = "oct2_10_11_indname"
collection = "/dccstor/avi7/neural_ir/colbert/data/psgs_w100.tsv"


retriever = ColBERTRetriever(index_root = index_root, 
                                     index_name = index_name, 
                                     collection = collection, 
                                     max_num_documents = 3)
retriever.load()

[Feb 13, 13:58:45] #> base_config.py from_path /dccstor/colbert-ir/franzm/indexes/oct2_10_11/oct2_10_11_exp/indexes//oct2_10_11_indname/metadata.json
[Feb 13, 13:58:45] #> base_config.py from_path args loaded! 
[Feb 13, 13:58:45] #> base_config.py from_path args replaced ! 
[Feb 13, 13:58:54] #>>>>> at ColBERT name (model type) : /dccstor/colbert-ir/franzm/experiments/oct2_7_12_1.5e-06/none/2022-10/09/15.21.39/checkpoints/colbert.dnn.batch_91287.model
[Feb 13, 13:58:54] #>>>>> at BaseColBERT name (model type) : /dccstor/colbert-ir/franzm/experiments/oct2_7_12_1.5e-06/none/2022-10/09/15.21.39/checkpoints/colbert.dnn.batch_91287.model
[Feb 13, 13:59:00] factory model type: xlm-roberta-large
[Feb 13, 13:59:14] get query model type: xlm-roberta-large
[Feb 13, 13:59:15] get doc model type: xlm-roberta-large
[Feb 13, 13:59:52] #> Loading codec...
[Feb 13, 13:59:52] #> base_config.py from_path /dccstor/colbert-ir/franzm/indexes/oct2_10_11/oct2_10_11_exp/indexes//oct2_10_11_indname/metadata.js

In [3]:
# setup a Prompt Reader: here we use a GPT reader
reader = PromptGPTReader(api_key='API KEY HERE')
reader.load()

In [4]:
# setup the pipeline
pipeline = QAPipeline(retriever, reader)

21015325it [01:06, 317140.50it/s]


In [5]:
# start asking questions without retriever
questions = ["number of participating countries in tour de france 2017 ?"]
answers = pipeline.run(questions, use_retriever=False)
print(json.dumps(answers, indent=4))

[
    {
        "example_id": 0,
        "text": "\n\nAnswer: There were 22 teams that participated in the 2017 Tour de France, representing a total of 29 countries."
    }
]


In [7]:
# start asking questions with retriever
questions = ["number of participating countries in tour de france 2017 ?"]
prompt_prefix = "Answer the following question after looking at the text."
answers = pipeline.run(questions, prefix=prompt_prefix, use_retriever=True)
print(json.dumps(answers, indent=4))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 79.38it/s]


[
    {
        "example_id": 0,
        "text": "\n\nAnswer: 32 countries",
        "passages": [
            "\"2017 Tour de France\"\n \"de France. The total number of riders that finished the race was 167. The riders came from 32 countries. Six countries had more than 10 riders in the race: France (39), Italy (18), Belgium (16), Germany (16), the Netherlands (15), and Spain (13). The average age of riders in the race was 29.4 years, ranging from the 22-year-old \u00c9lie Gesbert () to the 40-year-old Haimar Zubeldia (). had the youngest average age while had the oldest. The teams entering the race were: In the lead up to the 2017 Tour de France, Chris Froome () was seen by many pundits\"",
            "\"2018 Tour de France\"\n \"reduced the number of riders per team for Grand Tours from 9 to 8, resulting in a start list total of 176, instead of the usual 198. Of these, 35 competed in their first Tour de France. The total number of riders that finished the race was 145. The riders 

In [8]:
# start asking questions without retreiver
questions = ["How many area codes are in New Hampshire ?"]
answers = pipeline.run(questions, use_retriever=False)
print(json.dumps(answers, indent=4))

[
    {
        "example_id": 0,
        "text": "\n\nThere are three area codes in New Hampshire: 603, 617, and 857."
    }
]


In [9]:
# start asking questions with retreived passages
questions = ["How many area codes are in New Hampshire ?"]
prompt_prefix = "Answer the following question after looking at the text."
answers = pipeline.run(questions, prefix=prompt_prefix, use_retriever=True)
print(json.dumps(answers, indent=4))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.56it/s]


[
    {
        "example_id": 0,
        "text": "\n\nAnswer: One",
        "passages": [
            "\"Area code 603\"\n \"previously allocated telephone numbers, as well as number pooling, the exhaustion time frame has been moved beyond 2025. Since New Hampshire has only one area code, callers in the state can reach any other telephone within the 603 area code with 7-digit dialing. Area code 603 Area code 603 is the sole area code for the U.S. state of New Hampshire in the North American Numbering Plan (NANP). It was created as one of the original 86 numbering plan areas in October 1947. As of April 2011, the 603 code was nearing exhaustion and a second area code for New\"",
            "\"Area code 603\"\n \"Area code 603 Area code 603 is the sole area code for the U.S. state of New Hampshire in the North American Numbering Plan (NANP). It was created as one of the original 86 numbering plan areas in October 1947. As of April 2011, the 603 code was nearing exhaustion and a second a