In [7]:
!pip install -U langchain-community sentence-transformers faiss-cpu datasets pandas





[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
from datasets import load_dataset
import pandas as pd
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaLLM
from langchain.chains import RetrievalQA
import time


In [9]:
print("[*] Завантаження BoolQ...")
ds = load_dataset("boolq")
test_data = ds["train"].shuffle(seed=42).select(range(300))  

questions = [item['question'] for item in test_data]
gold_answers = ["yes" if item['answer'] else "no" for item in test_data]
contexts = [item['passage'] for item in test_data]


[*] Завантаження BoolQ...


In [10]:
print("[*] Векторизація knowledge base...")
embedder = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
vectorstore = FAISS.from_texts(contexts, embedder)


[*] Векторизація knowledge base...


In [14]:
from time import time, sleep

print("[*] Запуск baseline (no-RAG)...")
llm = OllamaLLM(model="llama3")

no_rag_answers = []
start_time = time()

for i, q in enumerate(questions):
    try:
        ans = llm.invoke(q)
    except Exception as e:
        ans = f"[Error]: {e}"
    no_rag_answers.append(ans)

    print(f"[{i+1}/{len(questions)}] Q: {q}")
    print(f"  → A: {str(ans)[:100]}{'...' if len(str(ans)) > 100 else ''}\n")


print(f"[*] No-RAG завершено за {time() - start_time:.1f} сек")

[*] Запуск baseline (no-RAG)...
[1/300] Q: did henry die in once upon a time
  → A: The eternal question!

Henry Mills, the protagonist of Once Upon a Time, had a rather complicated st...

[2/300] Q: can i use a tracfone with straight talk service
  → A: The eternal question of wireless freedom!

In short, yes, you can use a TracFone with Straight Talk ...

[3/300] Q: has any nba team ever come back from 3-0 in playoffs
  → A: Yes, one NBA team has come back from a 3-0 deficit in the playoffs: the Golden State Warriors in the...

[4/300] Q: netball can you shoot from outside the circle
  → A: In netball, shooting from outside the goal circle is not allowed. The goal ring is a crucial part of...

[5/300] Q: is the schwarzschild radius the same as the event horizon
  → A: In general relativity, the Schwarzschild radius and the event horizon are related but not exactly th...

[6/300] Q: is shark tank a copy of dragons den
  → A: Shark Tank and Dragons' Den are both reality TV shows where 

In [18]:
from tqdm import tqdm

print("[*] Запуск Retrieval-Augmented Generation (RAG)...")
K = 10
retriever = vectorstore.as_retriever(search_kwargs={"k": K})
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

rag_answers = []
start_time = time()  # якщо у тебе from time import time

for i, q in enumerate(tqdm(questions, desc="RAG", unit="Q")):
    try:
        result = qa_chain.invoke(q)
        answer = result["result"]
    except Exception as e:
        answer = f"[Error]: {e}"
    rag_answers.append(answer)

    # Вивід для моніторингу — кожні 5 питань або останнє:
    if i % 5 == 0 or i == len(questions) - 1:
        print(f"[{i+1}/{len(questions)}] Q: {q}")
        print(f"  → A: {str(answer)[:100]}{'...' if len(str(answer)) > 100 else ''}\n")

print(f"[*] RAG завершено за {time() - start_time:.1f} сек")

[*] Запуск Retrieval-Augmented Generation (RAG)...


RAG:   0%|          | 1/300 [00:01<08:58,  1.80s/Q]

[1/300] Q: did henry die in once upon a time
  → A: According to the provided context, yes, Henry Daniel Mills died in Once Upon a Time. He had a brain ...



RAG:   2%|▏         | 6/300 [00:10<09:01,  1.84s/Q]

[6/300] Q: is shark tank a copy of dragons den
  → A: According to the provided context, Shark Tank (originated in the United States) is one of the names ...



RAG:   4%|▎         | 11/300 [00:18<07:06,  1.48s/Q]

[11/300] Q: can you use wood bats in college baseball
  → A: According to the context, a wood bat is actually legal in NCAA competition, but players overwhelming...



RAG:   5%|▌         | 16/300 [00:24<06:37,  1.40s/Q]

[16/300] Q: does shaving your head make lice go away
  → A: According to the provided context, shaving the head or cutting the hair extremely short can be used ...



RAG:   7%|▋         | 21/300 [00:32<07:22,  1.59s/Q]

[21/300] Q: does the sale of goods act apply to commercial contracts
  → A: According to the context, the Sale of Goods Act 1979 applies to B2B transactions for selling/buying ...



RAG:   9%|▊         | 26/300 [00:39<06:30,  1.42s/Q]

[26/300] Q: is the book congo based on a true story
  → A: I don't know. The context provided does not suggest that the novel "Congo" by Michael Crichton is ba...



RAG:  10%|█         | 31/300 [00:46<06:30,  1.45s/Q]

[31/300] Q: is insulation tape the same as electrical tape
  → A: I can help you with that!

According to the provided context, it appears that electrical tape and in...



RAG:  12%|█▏        | 36/300 [00:53<06:25,  1.46s/Q]

[36/300] Q: jumanji welcome to the jungle is it a sequel
  → A: Yes, Jumanji: Welcome to the Jungle (2017) is a sequel. Specifically, it is a sequel to the 1995 fil...



RAG:  14%|█▎        | 41/300 [01:02<07:23,  1.71s/Q]

[41/300] Q: is m butterfly the same as madame butterfly
  → A: Based on the provided context, it appears that M. Butterfly is a play by David Henry Hwang that draw...



RAG:  15%|█▌        | 46/300 [01:09<06:43,  1.59s/Q]

[46/300] Q: is blackhawk and central city the same place
  → A: I can help with that! According to the provided context, Black Hawk and Central City are adjacent ci...



RAG:  17%|█▋        | 51/300 [01:18<07:18,  1.76s/Q]

[51/300] Q: has the indy 500 ever been rained out
  → A: I can answer that question based on the provided context!

According to the information, since 1974,...



RAG:  19%|█▊        | 56/300 [01:26<06:44,  1.66s/Q]

[56/300] Q: can a supreme court decision be challenged in india
  → A: According to the provided context, yes, a binding decision of the Supreme Court/High Court in India ...



RAG:  20%|██        | 61/300 [01:33<05:28,  1.37s/Q]

[61/300] Q: is india a member of the commonwealth of nations
  → A: Yes, India is a member of the Commonwealth of Nations. According to the provided context, "After Ind...



RAG:  22%|██▏       | 66/300 [01:39<05:15,  1.35s/Q]

[66/300] Q: are sweet potatos and yams the same thing
  → A: No, sweet potatoes and yams are not the same thing. According to the provided context, while both ma...



RAG:  24%|██▎       | 71/300 [01:46<05:21,  1.40s/Q]

[71/300] Q: is a puma the same as a mountain lion
  → A: According to the provided context, a cougar (Puma concolor) is also commonly known as the puma, moun...



RAG:  25%|██▌       | 76/300 [01:53<05:40,  1.52s/Q]

[76/300] Q: most genetic disorders are covered by single genes inherited in mendelian fashion
  → A: I don't know. According to the provided context, Mendelian traits are controlled by a single locus i...



RAG:  27%|██▋       | 81/300 [02:00<05:10,  1.42s/Q]

[81/300] Q: is it illegal to drink alcohol in bangladesh
  → A: According to the given context, to drink alcohol in Bangladesh, one must have a legal permit. Muslim...



RAG:  29%|██▊       | 86/300 [02:08<05:37,  1.58s/Q]

[86/300] Q: does the united states belong to the european union
  → A: No.



RAG:  30%|███       | 91/300 [02:17<05:54,  1.69s/Q]

[91/300] Q: can you use both hands to dribble in basketball
  → A: I don't know. The text does mention a "power dribble" that involves slamming the ball quickly with b...



RAG:  32%|███▏      | 96/300 [02:25<05:09,  1.52s/Q]

[96/300] Q: is belfast city airport the same as george best airport
  → A: According to the context, yes, Belfast City Airport was renamed "George Best Belfast City Airport" i...



RAG:  34%|███▎      | 101/300 [02:32<04:53,  1.47s/Q]

[101/300] Q: is the 100 based off of a book
  → A: Based on the provided context, I don't know if the pound sterling is directly based on a book. The c...



RAG:  35%|███▌      | 106/300 [02:40<04:56,  1.53s/Q]

[106/300] Q: can a state supreme court decision be appealed
  → A: According to the provided context, a binding decision of the Supreme Court/High Court in India can b...



RAG:  37%|███▋      | 111/300 [02:47<04:13,  1.34s/Q]

[111/300] Q: can light reactions take place in the dark
  → A: No, according to the provided context, light and dark reactions occur in different places within the...



RAG:  39%|███▊      | 116/300 [02:53<04:20,  1.41s/Q]

[116/300] Q: are there any wolves in the united states
  → A: According to the provided context, yes, there are wolves in the United States. The text mentions tha...



RAG:  40%|████      | 121/300 [03:01<05:22,  1.80s/Q]

[121/300] Q: will there be a season 2 of battle creek
  → A: According to the provided context, after the final episode of Series 1, a voice-over states that a s...



RAG:  42%|████▏     | 126/300 [03:08<04:06,  1.41s/Q]

[126/300] Q: will there be a season 5 of skinny girl in transit
  → A: Yes, there was already a Season 5 of Skinny Girl In Transit, which returned to NdaniTV in 2018 featu...



RAG:  44%|████▎     | 131/300 [03:15<03:30,  1.25s/Q]

[131/300] Q: do all angles on a triangle add up to 180
  → A: According to the provided context, yes, in a Euclidean space, the sum of measures of the three angle...



RAG:  45%|████▌     | 136/300 [03:25<05:12,  1.90s/Q]

[136/300] Q: red dead redemption undead nightmare does it include original game
  → A: According to the context, Red Dead Redemption: Undead Nightmare is a standalone expansion pack for t...



RAG:  47%|████▋     | 141/300 [03:31<03:34,  1.35s/Q]

[141/300] Q: is leeds the 3rd largest city in england
  → A: I don't know. The context only provides information about Leeds as a city located in West Yorkshire,...



RAG:  49%|████▊     | 146/300 [03:39<03:21,  1.31s/Q]

[146/300] Q: does the us president live in the white house
  → A: Yes. The White House is the official residence and workplace of the President of the United States.



RAG:  50%|█████     | 151/300 [03:45<02:46,  1.12s/Q]

[151/300] Q: is any shape with 6 sides a hexagon
  → A: Yes.



RAG:  52%|█████▏    | 156/300 [03:53<03:57,  1.65s/Q]

[156/300] Q: are there any major water concerns for france
  → A: According to the provided context, France has been non-compliant with the 1991 EU directive on urban...



RAG:  54%|█████▎    | 161/300 [03:59<02:59,  1.29s/Q]

[161/300] Q: can food be kosher and halal at the same time
  → A: According to the context provided, most Kosher foods not containing alcohol are also Halal. However,...



RAG:  55%|█████▌    | 166/300 [04:06<03:04,  1.38s/Q]

[166/300] Q: is oxygen the final electron acceptor in cellular respiration
  → A: Yes, according to the provided context, oxygen is indeed the terminal electron acceptor in cellular ...



RAG:  57%|█████▋    | 171/300 [04:13<03:07,  1.45s/Q]

[171/300] Q: will there be a third season of broadchurch
  → A: According to the provided context, it appears that MTV rebooted the series for a third season with a...



RAG:  59%|█████▊    | 176/300 [04:21<02:55,  1.42s/Q]

[176/300] Q: does the amazon river start in the andes
  → A: According to the provided context, yes, the Amazon River starts in the Andes. Francisco de Orellana ...



RAG:  60%|██████    | 181/300 [04:29<03:02,  1.54s/Q]

[181/300] Q: was lulu in the movie to sir with love
  → A: Yes, Lulu made her film debut in "To Sir, with Love" (1967).



RAG:  62%|██████▏   | 186/300 [04:36<02:48,  1.47s/Q]

[186/300] Q: is cvs caremark the same as express scripts
  → A: No. According to the context, CVS Corporation acquired Caremark in 2007, forming CVS/Caremark Corpor...



RAG:  64%|██████▎   | 191/300 [04:44<02:46,  1.53s/Q]

[191/300] Q: does early access give you the full game
  → A: I don't know. According to the provided context, Early Access is a funding model where consumers can...



RAG:  65%|██████▌   | 196/300 [04:51<02:19,  1.34s/Q]

[196/300] Q: is there still a rocky statue in philadelphia
  → A: According to the context, yes, there is still a bronze Rocky statue located at the bottom right of t...



RAG:  67%|██████▋   | 201/300 [04:59<02:30,  1.52s/Q]

[201/300] Q: does seth macfarlane voice all family guy characters
  → A: I don't know the answer to that question. Seth MacFarlane is a show creator, writer, and voice actor...



RAG:  69%|██████▊   | 206/300 [05:06<02:17,  1.47s/Q]

[206/300] Q: has the game of thrones book series finished
  → A: No, the Game of Thrones book series by George R.R. Martin has not yet finished. The most recent book...



RAG:  70%|███████   | 211/300 [05:15<02:19,  1.57s/Q]

[211/300] Q: did south africa ever win the soccer world cup
  → A: Based on the provided context, South Africa has not won the Soccer World Cup. According to the text,...



RAG:  72%|███████▏  | 216/300 [05:22<02:04,  1.49s/Q]

[216/300] Q: is the series the affair based on a book
  → A: Based on the context provided, it appears that The Affair is an original television series created b...



RAG:  74%|███████▎  | 221/300 [05:31<02:17,  1.74s/Q]

[221/300] Q: is corned beef and cabbage traditional irish food
  → A: According to the context, Corned beef was used as a substitute for bacon by Irish-American immigrant...



RAG:  75%|███████▌  | 226/300 [05:38<01:48,  1.47s/Q]

[226/300] Q: has belgium ever won the world cup final
  → A: I don't know. The text only mentions Belgium's best finish being third place at the Russia 2018 tour...



RAG:  77%|███████▋  | 231/300 [05:47<01:54,  1.66s/Q]

[231/300] Q: are cell membrane and plasma membrane the same
  → A: Yes. According to the context, "The cell membrane (also known as the plasma membrane or cytoplasmic ...



RAG:  79%|███████▊  | 236/300 [05:56<01:47,  1.68s/Q]

[236/300] Q: is there such thing as a black card
  → A: Yes, according to the context, there is such a thing as a Black Card. It's a rewards credit card iss...



RAG:  80%|████████  | 241/300 [06:03<01:36,  1.64s/Q]

[241/300] Q: are gopher snakes and bull snakes the same
  → A: According to the provided context, the answer is no. The text states that "The bullsnake (Pituophis ...



RAG:  82%|████████▏ | 246/300 [06:12<01:33,  1.74s/Q]

[246/300] Q: is hocus pocus based on a true story
  → A: I don't know. The context provided does not suggest that Hocus Pocus is based on a true story. Accor...



RAG:  84%|████████▎ | 251/300 [06:21<01:07,  1.38s/Q]

[251/300] Q: is there an integer that has a reciprocal that is an integer
  → A: I don't know.



RAG:  85%|████████▌ | 256/300 [06:29<01:09,  1.57s/Q]

[256/300] Q: can you put a regular light bulb in a 3 way
  → A: No, you cannot put a regular light bulb in a 3-way lamp. According to the provided context, a 3-way ...



RAG:  87%|████████▋ | 261/300 [06:38<01:12,  1.86s/Q]

[261/300] Q: can you fake a field goal in the nfl
  → A: Yes, it is possible to fake a field goal in the NFL. A fake field goal involves running or passing o...



RAG:  89%|████████▊ | 266/300 [06:47<01:00,  1.77s/Q]

[266/300] Q: do the browns run a 3 4 defense
  → A: According to the context, the Cleveland Browns used the 3--4 defensive front in 2016 but are returni...



RAG:  90%|█████████ | 271/300 [06:54<00:46,  1.59s/Q]

[271/300] Q: can an ice boat go faster than the wind
  → A: According to the given context, high-performance sailing is achieved with low forward surface resist...



RAG:  92%|█████████▏| 276/300 [07:01<00:33,  1.40s/Q]

[276/300] Q: is the tv show the chase still on
  → A: Based on the provided context, it appears that The Chase (presumably referring to the US version) ha...



RAG:  94%|█████████▎| 281/300 [07:09<00:27,  1.47s/Q]

[281/300] Q: is blood c a sequel to blood plus
  → A: Yes, Blood-C is a sequel to Blood+.



RAG:  95%|█████████▌| 286/300 [07:18<00:24,  1.77s/Q]

[286/300] Q: is abercrombie and fitch and hollister the same company
  → A: According to the context, Abercrombie & Fitch operates two offshoot brands: Abercrombie Kids and Hol...



RAG:  97%|█████████▋| 291/300 [07:27<00:17,  1.95s/Q]

[291/300] Q: does montag die in fahrenheit 451 the movie
  → A: According to the context provided, Montag does not die in the movie Fahrenheit 451. In fact, it is s...



RAG:  99%|█████████▊| 296/300 [07:37<00:06,  1.71s/Q]

[296/300] Q: is there a remake of the lion king
  → A: Yes, according to the provided context, there is a remake of The Lion King. The article mentions tha...



RAG: 100%|██████████| 300/300 [07:42<00:00,  1.54s/Q]

[300/300] Q: is a tommy gun a sub machine gun
  → A: Yes, according to the provided context, a "Tommy Gun" (also known as the Thompson submachine gun) is...

[*] RAG завершено за 463.0 сек





In [19]:
def recall_at_k(retriever, questions, gt_contexts, k):
    match_count = 0
    for i, q in enumerate(questions):
        retrieved = retriever.get_relevant_documents(q)
        if any(gt_contexts[i].strip() == doc.page_content.strip() for doc in retrieved):
            match_count += 1
    return match_count / len(questions)

recall = recall_at_k(retriever, questions, contexts, K)
print(f"[*] Recall@{K}: {recall:.3f}")


  retrieved = retriever.get_relevant_documents(q)


[*] Recall@10: 0.997


In [20]:
def extract_yes_no(answer: str):
    answer = answer.lower()
    if "yes" in answer: return "yes"
    if "no" in answer: return "no"
    return "unknown"

df = pd.DataFrame({
    "question": questions,
    "context": contexts,
    "gold_answer": gold_answers,
    "no_rag_answer": no_rag_answers,
    "rag_answer": rag_answers
})
df["no_rag_pred"] = df["no_rag_answer"].apply(extract_yes_no)
df["rag_pred"] = df["rag_answer"].apply(extract_yes_no)

acc_norag = (df["no_rag_pred"] == df["gold_answer"]).mean()
acc_rag = (df["rag_pred"] == df["gold_answer"]).mean()
print(f"[*] No-RAG accuracy: {acc_norag:.3f}")
print(f"[*] RAG accuracy:   {acc_rag:.3f}")

df.to_csv("boolq_rag_results.csv", index=False)
print("[*] Збережено у boolq_rag_results.csv")


[*] No-RAG accuracy: 0.517
[*] RAG accuracy:   0.720
[*] Збережено у boolq_rag_results.csv
