In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import torch
import gc
import Stemmer
import re
from tqdm import tqdm
import csv
import gc
from utils.graph import KGraphPreproc
from utils.graph.chain import GraphChain
from utils.llm.mistral import MistralLLM

In [3]:
fbqa = pd.read_json("/datasets/FreebaseQA/FreebaseQA-eval.json")
def get_fbqa_data(question_row):
    """
    Takes in a dataset row and returns Q and A as strings
    """
    question = question_row.Questions.get("RawQuestion", None)
    parse = question_row.Questions.get("Parses", [None])[0]
    if not parse:
        print(f"error in question: {question}")
        return question, None
    answer = parse.get("Answers")
    return question, answer

####### load the graph
fbkb_graph = KGraphPreproc.get_fbkb_graph()

In [4]:
from utils.prompt import GRAPH_QA_PROMPT, ENTITY_PROMPT

mistral = MistralLLM()
chain = GraphChain.from_llm(
    llm=mistral,
    graph=fbkb_graph,
    qa_prompt=GRAPH_QA_PROMPT,
    entity_prompt=ENTITY_PROMPT,
    verbose=False,
)

In [5]:
def get_response(prompt):
    global chain
    # del mistral
    gc.collect()
    torch.cuda.empty_cache()
    r = chain.invoke(prompt)
    return r["result"]

In [60]:
chain.exploration_depth = 1

In [79]:

###### tests

experiment_name = "kb2"
bline_path = f"/datasets/FreebaseQA/results/{experiment_name}.csv"
bline = pd.read_csv(bline_path)
l = len(bline)
baseline_results = list(bline.Model.values)

fbqa = pd.read_json("/datasets/FreebaseQA/FreebaseQA-eval.json")
# fbqa.Questions[0].get("RawQuestions", None)
results = []
for i, r in tqdm(list(fbqa.iterrows())):
    if i == 10:
        break
    if i < l:
        continue
    q, a = get_fbqa_data(r)
    # 
    # print("Question:", q)
    # print("Answer:", a)
    # print("Prompt:", prompt)
    print(q)
    response = get_response(q)
    print("Model:", response)
    results.append(response)

    if i % 250 == 0:
        with open(bline_path, "w") as f:
            writer = csv.writer(f)
            writer.writerow(["Model"])
            for r in results:
                writer.writerow([str(r)])

with open(bline_path, "w") as f:
    writer = csv.writer(f)
    writer.writerow(["Model"])
    for r in results:
        writer.writerow([str(r)])


  0%|          | 0/3996 [00:00<?, ?it/s]

Who is the female presenter of the Channel 4 quiz show '1001 things you should know'?


[1m> Entering new GraphChain chain...[0m
Entities Extracted:
[32;1m[1;3mAnne Robinson[0m
Full Context:
[32;1m[1;3m[0m


  0%|          | 1/3996 [00:00<49:11,  1.35it/s]


[1m> Finished chain.[0m
Model: Anita Rani
Who produced the film 12 Angry Men, which was scripted by Reginald Rose, starred Henry Fonda and was directed by Sidney Lumet?


[1m> Entering new GraphChain chain...[0m
Entities Extracted:
[32;1m[1;3m12 Angry Men, Reginald Rose, Henry Fonda, Sidney Lumet[0m
['BAFTA_Award_for_Best_Actor_in_a_Leading_Role /award/award_category/nominees./award/award_nomination/nominated_for Network', 'BAFTA_Award_for_Best_Actor_in_a_Leading_Role /award/award_category/nominees./award/award_nomination/nominated_for Murder_on_the_Orient_Express', 'BAFTA_Award_for_Best_Actor_in_a_Leading_Role /award/award_category/nominees./award/award_nomination/nominated_for Serpico', 'BAFTA_Award_for_Best_Actor_in_a_Leading_Role /award/award_category/nominees./award/award_nomination/nominated_for On_Golden_Pond', 'BAFTA_Award_for_Best_Actor_in_a_Leading_Role /award/award_winning_work/awards_won./award/award_honor/award Dog_Day_Afternoon', 'BAFTA_Award_for_Best_Actor_in_a_L

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/218 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/218 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/218 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Full Context:
[32;1m[1;3mSidney_Lumet /people/person/profession Film_Producer-GB
Sidney_Lumet /people/person/profession Television_producer-GB
Equus /film/director/film Sidney_Lumet
Serpico /film/director/film Sidney_Lumet
Sidney_Lumet /film/director/film The_Verdict
Sidney_Lumet /film/director/film The_Wiz
The_Manchurian_Candidate /film/actor/film./film/performance/film Sidney_Lumet
Sidney_Lumet /film/film/film_festivals 1982_Cannes_Film_Festival
Sidney_Lumet /film/director/film Murder_on_the_Orient_Express
Sidney_Lumet /award/award_nominee/award_nominations./award/award_nomination/award BAFTA_Award_for_Best_Direction
War_and_Peace /film/actor/film./film/performance/film Henry_Fonda
Henry_Fonda /film/actor/film./film/performance/film The_Longest_Day
Henry_Fonda /film/actor/film./film/performance/film In_Harm's_Way
Henry_Fonda /film/actor/film./film/performance/film The_Grapes_of_Wrath
Henry_Fonda /film/actor/film./film/performance/film On_Golden_Pond
Sidney_Lumet /people/person/prof

  0%|          | 2/3996 [00:05<3:17:38,  2.97s/it]


[1m> Finished chain.[0m
Model: Sidney Lumet (produced the film 12 Angry Men)
Who directed the films; The Fisher King (1991), 12 Monkeys (1995) and the Brothers Grimm (2005)?


[1m> Entering new GraphChain chain...[0m
Entities Extracted:
[32;1m[1;3mTerry Gilliam[0m
['The_Brothers_Grimm /film/film/distributors./film/film_film_distributor_relationship/region United_Kingdom', 'The_Brothers_Grimm /film/director/film Terry_Gilliam', 'Minnesota /people/marriage_union_type/unions_of_this_type./people/marriage/location_of_ceremony Marriage', 'Minnesota /location/location/contains Minneapolis', 'Minnesota /people/person/places_lived./people/place_lived/location Terry_Gilliam', 'Minneapolis /location/hud_county_place/place Minneapolis', 'Minneapolis /people/person/place_of_birth Terry_Gilliam', 'BAFTA_Academy_Fellowship_Award /award/award_nominee/award_nominations./award/award_nomination/award Terry_Gilliam', 'BAFTA_Academy_Fellowship_Award /award/award_nominee/award_nominations./award/aw

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/125 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/125 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/125 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Full Context:
[32;1m[1;3mThe_Brothers_Grimm /film/director/film Terry_Gilliam
Terry_Gilliam /film/director/film The_Fisher_King
The_Brothers_Grimm /film/film/distributors./film/film_film_distributor_relationship/region United_Kingdom
Academy_Award_for_Best_Original_Screenplay /award/award_category/nominees./award/award_nomination/nominated_for The_Fisher_King
Golden_Globe_Award_for_Best_Director_-_Motion_Picture /award/award_category/nominees./award/award_nomination/nominated_for The_Fisher_King
Michael_Palin /film/actor/film./film/performance/film Brazil
Eric_Idle /film/actor/film./film/performance/film The_Adventures_of_Baron_Munchausen
Brazil /film/film/release_date_s./film/film_regional_release_date/film_release_region United_Kingdom
The_Adventures_of_Baron_Munchausen /film/film/country United_Kingdom
The_Adventures_of_Baron_Munchausen /film/director/film Terry_Gilliam
The_Imaginarium_of_Dr._Parnassus /film/film/country United_Kingdom
The_Imaginarium_of_Dr._Parnassus /film/direct

  0%|          | 3/3996 [00:07<2:44:24,  2.47s/it]


[1m> Finished chain.[0m
Model: Terry Gilliam
Who directed the 2013 film 12 Years a Slave?


[1m> Entering new GraphChain chain...[0m
Entities Extracted:
[32;1m[1;3mSteve McQueen[0m
['Barbra_Streisand /award/award_winner/awards_won./award/award_honor/award_winner Steve_McQueen', 'Barbra_Streisand /people/person/profession Actor-GB', 'Barbra_Streisand /people/person/spouse_s./people/marriage/type_of_union Marriage', 'Barbra_Streisand /people/person/profession Film_Producer-GB', 'Barbra_Streisand /people/person/nationality United_States_of_America', 'Steve_McQueen /people/person/nationality United_States_of_America', 'Steve_McQueen /film/actor/film./film/performance/film The_Sand_Pebbles', 'Steve_McQueen /award/award_nominee/award_nominations./award/award_nomination/award Academy_Award_for_Best_Actor', 'Steve_McQueen /people/person/profession Actor-GB', 'Steve_McQueen /award/award_nominee/award_nominations./award/award_nomination/award_nominee Julie_Andrews', 'Steve_McQueen /peopl

Split strings: 0it [00:00, ?it/s]

Stem Tokens:   0%|          | 0/39 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/39 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/39 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Full Context:
[32;1m[1;3mSteve_McQueen /film/actor/film./film/performance/film The_Sand_Pebbles
Steve_McQueen /film/actor/film./film/performance/film The_Towering_Inferno
United_States_of_America /film/film/country The_Sand_Pebbles
United_States_of_America /film/film/country The_Towering_Inferno
Steve_McQueen /people/person/profession Film_Producer-GB
Barbra_Streisand /people/person/profession Film_Producer-GB
Marriage /people/marriage_union_type/unions_of_this_type./people/marriage/location_of_ceremony Long_Island
Los_Angeles /people/marriage_union_type/unions_of_this_type./people/marriage/location_of_ceremony Marriage
Los_Angeles /location/hud_county_place/place Los_Angeles
Myocardial_infarction /medicine/disease/risk_factors Male
Actor-GB /dataworld/gardening_hint/split_to Actor-GB
Actor-GB /people/person/profession Julie_Andrews
Julie_Andrews /people/person/spouse_s./people/marriage/type_of_union Marriage
Indianapolis /location/hud_county_place/place Indianapolis
Indianapolis /pe

  0%|          | 4/3996 [00:10<3:19:50,  3.00s/it]


[1m> Finished chain.[0m
Model: The question refers to a film that is not present in the provided context. The context only includes information about films starring Steve McQueen and Barbra Streisand, but it does not provide information about who directed the 2013 film 12 Years a Slave. Therefore, the answer to the question cannot be determined based on the provided context.
Which British athlete won the 100 m. at the 1924 Olympics?


[1m> Entering new GraphChain chain...[0m


  0%|          | 4/3996 [00:11<3:16:22,  2.95s/it]


KeyboardInterrupt: 

In [71]:
import bm25s
import Stemmer  # optional: for stemming
from utils.preprocessing import preprocess_text

ts = bm25s.tokenize(
    map(
        preprocess_text,
        triplets
    )
)
retriever = bm25s.BM25()
retriever.index(ts)

query = "Who produced the film 12 Angry Men, which was scripted by Reginald Rose, starred Henry Fonda and was directed by Sidney Lumet?"
print(preprocess_text(query))
query_tokens = bm25s.tokenize(preprocess_text(query), stemmer=stemmer)

results, scores = retriever.retrieve(query_tokens, k=len(ts))

for i in range(results.shape[1]):
    doc, score = results[0, i], scores[0, i]
    print(f"Rank {i+1} (score: {score:.3f}): {triplets[doc]}")

Split strings: 0it [00:00, ?it/s]

BM25S Count Tokens:   0%|          | 0/218 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/218 [00:00<?, ?it/s]

who produc the film 12 angri men which was script by reginald rose star henri fonda and was direct by sidney lumet


Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]

Rank 1 (score: 4.336): Sidney_Lumet /people/person/profession Film_Producer-GB
Rank 2 (score: 3.653): Sidney_Lumet /people/person/profession Television_producer-GB
