In [1]:
%load_ext autoreload
%autoreload 2

In [64]:
import gc
import re
import csv
import os
import torch
import Stemmer
import pandas as pd
from tqdm import tqdm
from typing import Any, Dict, Iterator, List, Mapping, Optional
from langchain_core.callbacks.manager import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import BasePromptTemplate
from langchain_community.graphs.networkx_graph import get_entities
from langchain.chains import GraphQAChain
from langchain.chains.llm import LLMChain
import bm25s
import heapq
import networkx as nx
from utils.preprocessing import stemmer, preprocess_text
from utils.graph import KGraphPreproc
from utils.llm.mistral import MistralLLM
from utils.prompt import GRAPH_QA_PROMPT, ENTITY_PROMPT, NO_CONTEXT_PROMPT, EVALUATE_CONTEXT_PROMPT, RERANK_TRIPLETS_PROMPT, RERANK_CANDIDATE_ENTITIES_PROMPT

# ToG Algorithm pseudocode
given a list of entities $E^{D-1}$:
1. Select all relations $R^D$
2. Select top-N relevant $E^{D-1}$-$R^D$-? | ?-$R^D$-$E^{D-1}$
3. Extend P by top-N paths
4. Decide if enough to answer the question (YES/NO)
5. Repeat (NO), return (YES)


In [1]:
a = set([1,2,3])
b = set([3,4,5])
b.difference(a)

{4, 5}

In [29]:
class ToGChain(GraphQAChain):
    max_exploration_depth: int = 10
    beam_width: int = 25
    max_triplets: int = 250
    plain_qa_chain: LLMChain
    evaluate_context_chain: LLMChain

    def tog_answer(self, question):
        """
        Think-on-Graph QA implementation
        """
        explored_entities = set()
        P = [
            # step: [triplet1, ...], 
        ] # max_depth X beam_width
        max_depth = self.max_exploration_depth
        beam_width = self.beam_width

        # init entities from question        
        extracted_entities = set(
            self.extract_entities(question)
        )
        unexplored_entities = extracted_entities.difference(explored_entities)
        explored_entities.update(unexplored_entities)
        # extract all triplets and rank them
        triplets = self.extract_triplets(unexplored_entities)
        # ranked_triplets: list[triplet]
        ranked_triplets = self.rerank_triplets_bm25(question, triplets, top_k=beam_width)
        # extend the paths by next step
        P.append(ranked_triplets)

        # check at least one path to explore is initialised
        if len(P[0]):
            for depth in range(1, max_depth):
                self._run_manager.on_text(f"Exploring depth: {depth}", color="green", end="\n", verbose=self.verbose)
                ######## Extraction
                tail_entities = set()
                for t in P[depth-1]:
                    tail_entities.add(t.split()[-1])
                # select unexplored tails and update
                unexplored_entities = tail_entities.difference(explored_entities)
                explored_entities.update(unexplored_entities)
                # extract and rank triplets
                triplets = self.extract_triplets(unexplored_entities)
                ranked_triplets = self.rerank_triplets_bm25(question, triplets, top_k=beam_width)
                # extend the paths by next step
                P.append(ranked_triplets)
                ######## Reasoning
                is_enough = self.check_enough_context_to_answer(question, reasoning_chain=P)
                if is_enough:
                    return self.generate_answer_with_context(question, reasoning_chain=P)
                else:
                    continue
        # otherwise, use internal knowledge
        return self.generate_answer_no_context(question)
    
    #### GRAPH OPEATIONS AND RANKING

    def rerank_triplets_llm(self, question, triplets, top_k=max_triplets):
        top_triplets = []
        min_heap = []
        for triplet_portion in triplets[::top_k]:
            self.

    def extract_triplets(self, entities, depth=1):
        subgraph_entities = set()
        for entity in entities:
            processed_entity = preprocess_text(entity)
            node = self.graph.preprocessed_nodes.get(processed_entity)
            if node is None:
                continue
            bfs_nodes = nx.single_source_shortest_path_length(
                self.graph._graph,
                node,
                cutoff=depth
            ).keys()
            subgraph_entities.update(bfs_nodes)
        subgraph = self.graph._graph.subgraph(subgraph_entities)
        # 
        triplets = []
        mid2name_dict = self.graph.mid2name
        for head, tail, attrs in subgraph.edges(data=True):
            rel = attrs.get("relation", None)
            if head and rel and tail:
                triplets.append(
                    f"{mid2name_dict.get(head, '')} {rel} {mid2name_dict.get(tail, '')}"
                )
        return triplets

    
    #### PROMPTING AND CHAINS

    def generate_answer_with_context(self, question, reasoning_chain):
        context = "\n".join(["\n".join(chain) for chain in reasoning_chain])
        self._run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
        self._run_manager.on_text(context, color="green", end="\n", verbose=self.verbose)
        return self.qa_chain(
            {"question": question, "context": context},
            callbacks=self._run_manager.get_child(),
        )
    
    def generate_answer_no_context(self, question):
        return self.plain_qa_chain(
            {"question": question},
            callbacks=self._run_manager.get_child(),
        )
    
    def check_enough_context_to_answer(self, question, reasoning_chain):
        context = "\n".join(["\n".join(chain) for chain in reasoning_chain])
        self._run_manager.on_text("Checking if the context is sufficient:", end="\n", verbose=self.verbose)
        self._run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
        self._run_manager.on_text(context, color="green", end="\n", verbose=self.verbose)
        answer = self.evaluate_context_chain(
            {"question": question, "context": context},
            callbacks=self._run_manager.get_child(),
        )
        self._run_manager.on_text(f"Model decision: {answer['text']}", end="\n", verbose=self.verbose)
        if "yes" in answer["text"].strip().lower():
            return True
        return False

    def extract_entities(self, string):
        entity_string = self.entity_extraction_chain.run(string)
        self._run_manager.on_text("Entities Extracted:", end="\n", verbose=self.verbose)
        self._run_manager.on_text(
            entity_string, color="green", end="\n", verbose=self.verbose
        )
        entities = get_entities(entity_string)
        return entities

    #### INITIALIZATION AND INVOKATION

    def _call(
        self,
        inputs: Dict[str, Any],
        run_manager: Optional[CallbackManagerForChainRun] = None,
    ) -> Dict[str, str]:
        """Extract entities, look up info and answer question."""
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        self._run_manager = _run_manager
        question = inputs[self.input_key]        
        # 
        result = self.tog_answer(question)
        return {self.output_key: result[self.qa_chain.output_key]}
    
    @classmethod
    def from_llm(
        cls,
        llm: BaseLanguageModel,
        qa_prompt: BasePromptTemplate = GRAPH_QA_PROMPT,
        qa_no_context_prompt: BasePromptTemplate = NO_CONTEXT_PROMPT,
        evaluate_context_prompt: BasePromptTemplate = EVALUATE_CONTEXT_PROMPT,
        entity_prompt: BasePromptTemplate = ENTITY_PROMPT,
        **kwargs: Any,
    ) -> GraphQAChain:
        """Initialize from LLM."""
        qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
        plain_qa_chain = LLMChain(llm=llm, prompt=qa_no_context_prompt)
        evaluate_context_chain = LLMChain(llm=llm, prompt=evaluate_context_prompt)
        entity_chain = LLMChain(llm=llm, prompt=entity_prompt)

        return cls(
            qa_chain=qa_chain,
            plain_qa_chain=plain_qa_chain,
            evaluate_context_chain=evaluate_context_chain,
            entity_extraction_chain=entity_chain,
            **kwargs,
        )

In [6]:
metaqa = pd.read_csv(f"/datasets/MetaQA/3hop/qa_test.txt", sep="\t", header=None)
metaqa.rename(columns={0: "Question", 1: "Answers"}, inplace=True)
metaqa_graph = KGraphPreproc.get_metaqa_graph()

In [3]:
fbqa = pd.read_json("/datasets/FreebaseQA/FreebaseQA-eval.json")
fbkb_graph = KGraphPreproc.get_fbkb_graph()

In [4]:
mistral = MistralLLM()

In [5]:
def get_fbqa_data(question_row):
    """
    Takes in a dataset row and returns Q and A as strings
    """
    question = question_row.Questions.get("RawQuestion", None)
    parse = question_row.Questions.get("Parses", [None])[0]
    if not parse:
        print(f"error in question: {question}")
        return question, None
    answer = parse.get("Answers")
    return question, answer

In [6]:
q, a = get_fbqa_data(fbqa.iloc[1135])
print(a)
q

[{'AnswersMid': 'm.04v0v72', 'AnswersName': ['david copperfield']}]


"Which Dickens novel features the character 'Uriah Heep'?"

In [7]:
fbkb_graph.preprocessed_nodes[preprocess_text("charles dickens")]

'/m/01v9724'

## Rank rels

In [82]:
def rank_rel_portion(question, entity, rel_portion):
    """
    Prompts the LLM to rank candidate relations and parses the response
    """
    prompt = RERANK_TRIPLETS_PROMPT.format(
        question=question,
        entity=entity,
        relations="; ".join(rel_portion))
    # print(prompt)
    response = mistral.get_response(prompt, temperature=0.15)
    return response

def extract_ranked_rels(rel_set, rel_rank_string, group=0):
    """
    Regex extracts the scores and relations
    Makes sure the extracted relations are real
    """
    pattern = re.compile("[^()]+;\s?[0-9.]+")
    pos = 0
    scored_rels = []
    while m := pattern.search(rel_rank_string, pos):
        pos = m.start() + 1
        rel, rank = m[group].split(";")
        if rel in rel_set:
            scored_rels.append((rel, float(rank)))
    return scored_rels
    

In [83]:
max_size = 250
def rank_entity_rels(question, mid, mid_rels):
    """
    Goes over candidate rels in portions, ranks them
    Returns the list of relevance ranked rels
    """
    rel_heap = []
    step = max_size
    for i in range(0, len(mid_rels), step):
        rel_set = mid_rels[i : i+step]
        answer = rank_rel_portion(question, entity, rel_set)
        # [(rel, rank)]
        ranked_rels = extract_ranked_rels(rel_set, answer)
        for rel, score in ranked_rels:
            if len(rel_heap) < step:
                heapq.heappush(rel_heap, (score, rel, mid))
            else:
                if score > rel_heap[0][0]:
                    heapq.heappushpop(rel_heap, (score, rel, mid))
    top_rels = sorted(rel_heap, key=lambda x: -x[0])
    return top_rels

## Rank entities

In [84]:
def rank_candidate_entities_portion(question, mid, rel, candidates):
    """
    Prompts the LLM to rank candidate entities and parses the response
    """
    prompt = RERANK_CANDIDATE_ENTITIES_PROMPT.format(
        question=question,
        relation=rel,
        entities="; ".join(candidates))
    # print(prompt)
    response = mistral.get_response(prompt, temperature=0.15)
    return response

def extract_ranked_entities(entity_set, entity_set_rank_string, group=0):
    """
    Regex extracts the scores and entities
    Makes sure the extracted entities are real
    """
    pattern = re.compile("[^()]+;\s?[0-9.]+")
    pos = 0
    scored_entities = []
    while m := pattern.search(entity_set_rank_string, pos):
        pos = m.start() + 1
        entity, rank = m[group].split(";")
        if entity in entity_set:
            scored_entities.append((entity, float(rank)))
    return scored_entities

In [88]:
def rank_candidate_triplets(question, mid, rel, rel_score, candidate_entities):
    """
    Goes over candidate other end entities in portions
    Returns the list of ranked resulting triplets
    Score = rel_score * entity_score
    """
    # return [(score, "mid-rel-entity1"), ...]
    triplet_heap = []
    step = max_size
    for i in range(0, len(candidate_entities), step):
        cand_entity_set = candidate_entities[i : i+step]
        answer = rank_candidate_entities_portion(question, entity, rel, cand_entity_set)
        # [(rel, rank)]
        ranked_entities = extract_ranked_entities(cand_entity_set, answer)
        for cand_entity, score in ranked_entities:
            total_score = rel_score*score
            heapq.heappush(triplet_heap, (total_score, (mid, rel, cand_entity)))
    top_triplets = sorted(triplet_heap, key=lambda x: -x[0])
    return top_triplets


In [63]:
q

"Which Dickens novel features the character 'Uriah Heep'?"

In [None]:
entity = "Charles Dickens"
mid = fbkb_graph.preprocessed_nodes.get(preprocess_text(entity))
print(mid)
entities = [mid]

explored_entities = set(entities)
P = [
    [e] for e in entities
    # step: [triplet1, ...], 
] # max_depth X beam_width

max_depth = 5
beam_width = 25
if len(P[0]):
    for depth in range(1, max_depth):
        print("depth:", depth)
        tail_entities = set()
        for t in P[depth-1]:
            tail_entities.add(t.split()[-1])
        
        # search and prune rels
        top_rels = []
        print("exploring entities")
        for entity_mid in entities:
            entity = fbkb_graph.mid2name[entity_mid]
            print(f"exploring {entity}")
            mid_rels = set(map(lambda t: t[2]["relation"], fbkb_graph._graph.edges(mid, data=True)))
            mid_rels = list(mid_rels)
            top_rels.extend(rank_entity_rels(q, entity, mid_rels))
            heapq.heapify(top_rels)
        top_rels = sorted(top_rels, key=lambda x: -x[0])
        # top_rels = [(0.3, '/influence/influence_node/influenced_by', 'Charles_Dickens'), (0.2, '/people/person/places_lived./people/place_lived/location', 'Charles_Dickens'), (0.2, '/people/person/profession', 'Charles_Dickens'), (0.1, '/people/cause_of_death/people', 'Charles_Dickens'), (0.1, '/people/person/nationality', 'Charles_Dickens')]
        print(f"top rels: {top_rels[:5]}")
        # search and prune entities on other end of rels
        top_new_triplets = []
        # [ (tail, top_rel, top_candidate)]
        for rel_score, rel, entity in top_rels:
            mid = fbkb_graph.name2mid[entity]
            print(f"exploring candidate entities for: {rel}")
            candidate_entities = [
                fbkb_graph.mid2name[v] for u,v,data in fbkb_graph._graph.edges(mid, data=True) if data.get("relation") == rel
            ]
            print(f"candidate_entities: {candidate_entities}")
            ranked_triplets = rank_candidate_triplets(q, mid, rel, rel_score, candidate_entities)
            top_new_triplets.extend(ranked_triplets)
            heapq.heapify(top_new_triplets)
        # sort the triplets
        top_new_triplets = sorted(top_new_triplets, key=lambda x: -x[0])
        print(f"top paths: {top_new_triplets[:5]}")
        # remove the scores and extend P
        P.append(
            list(map(lambda t: t[1], top_new_triplets))
        )
        print("End of round \n")
        break
        
    


/m/01v9724
depth: 1
exploring entities
exploring Charles_Dickens


## LLM-guided traversal
1. Extract entities from question (LLM)
2. Fetch relevant triplets (Graph)
3. Rerank triplets based on relevance (BM25, SBERT, LLM)
    3.1 
    3.2 
4. Extend path beam list (P) with top-k triplets
5. Check if the current pathlist is enough to answer (LLM)
6. Repeat or Generate Answer (LLM)

In [38]:
chain = ToGChain.from_llm(
    llm=mistral,
    graph=metaqa_graph,
    verbose=True,
)
chain.invoke(q)



[1m> Entering new ToGChain chain...[0m
Entities Extracted:
[32;1m[1;3mSydney, Australia, Olympics, Triathlon, Men's event, Medals, 2000, 2008, Australia (as the host country)[0m


Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/1165 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/1165 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/1165 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

[32;1m[1;3mExploring depth: 1[0m


Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/129 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/129 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/129 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/4 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/4 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/4 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/3 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/3 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/3 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/14 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/14 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/14 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/1085 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/1085 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/1085 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/106 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/106 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/106 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/316 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/316 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/316 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/824 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/824 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/824 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/56 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/56 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/56 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Checking if the context is sufficient:
Full Context:
[32;1m[1;3molympics has_tags Munich
olympics has_tags Miracle
olympics has_tags Chariots of Fire
olympics has_tags Cool Runnings
olympics has_tags The Other Dream Team
2008 release_year Asterix at the Olympic Games
Lantana has_tags sydney
australia has_tags Australia
Finding Nemo has_tags sydney
Drama has_genre The Country Teacher
Drama has_genre Summer Hours
australia has_tags $9.99
australia has_tags Lantana
australia has_tags On the Beach
story has_tags Australia
Drama has_genre Australia
Chopper has_tags australia
Drama has_genre Men of Honor
Drama has_genre Diamond Men
hugh jackman has_tags X-Men
nicole kidman has_tags Australia
Australia has_tags baz luhrmann
hugh jackman has_tags Australia
2000 release_year The New Country
2000 release_year A Storm in Summer
olympics has_tags Munich
2008 release_year Australia
2000 release_year X-Men
Running has_genre Sport
Lantana has_tags sydney
sydney has_tags Finding Nemo
australia has_t

{'query': "Triathlon has been an Olympic sport since the 2000 Summer Olympics in Sydney, Australia. Which country has won the most medals in the men's event in the first 3 Olympiads to 2008?",
 'result': 'Australia, USA, Germany'}