In [1]:
import qdrant_client
from langchain.vectorstores import Qdrant
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
import os
import re
import json
from config import EMBEDDING_MODEL, QDRANT_HOST, QDRANT_API_KEY, QDRANT_COLECTION_NAME, EMBEDDING_SIZE



In [2]:
def load_json(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)
    
def load_txt(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    with open(file_path, "r", encoding="utf-8") as f:
        full_text = f.read()
    return full_text

def clean_text(text: str) -> str:
    text = re.sub(r'[ \t]+', ' ', text)
    text = re.sub(r'\n\s*\n+', '\n\n', text)

    return text.strip()

In [3]:
raw_docs = clean_text(load_txt("data/corpus.txt"))

print(raw_docs[:1000])

Title:200+ of the best deals from Amazon's Cyber Monday sale
Passage:Table of Contents Table of Contents Echo, Fire TV, and Kindle deals Apple deals TV deals Laptop deals Headphone and earbud deals Tablet deals Gaming deals Speaker deals Vacuum deals Kitchen deals Smart home deals Fitness deals Beauty tech deals Drone deals Camera deals Lego deals Gift card deals

UPDATE: Nov. 27, 2023, 5:00 a.m. EST This post has been updated with all of the latest Cyber Monday deals available at Amazon.

Amazon is dragging out the year's biggest shopping holiday(s) into 11 days of deals.

The retail giant began its Black Friday sale in the early morning of Friday, Nov. 17 (a week ahead of schedule) and was on top of making the switch to Cyber Monday language in the wee hours of Saturday, Nov. 25. Official Cyber Monday mode, which is currently on through Monday, Nov. 27, includes both a ton of deals carried over from Black Friday plus some new ones.

We're curating a running list of Amazon's best Cybe

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_text(raw_docs)
docs = [Document(page_content=chunk) for chunk in chunks]

In [5]:
from qdrant_client.http.models import Distance, VectorParams
#Load to Qdrant

embedding = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

client = qdrant_client.QdrantClient(
    QDRANT_HOST,
    api_key = QDRANT_API_KEY,
)

vector_store = Qdrant(
    client=client,
    collection_name=QDRANT_COLECTION_NAME,
    embeddings=embedding,
)

  from .autonotebook import tqdm as notebook_tqdm





  vector_store = Qdrant(


In [6]:
#Add docs to qdrant
#vector_store.add_documents(documents=docs)

In [7]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

query = "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?"
docs = retriever.get_relevant_documents(query)

for i, doc in enumerate(docs, 1):
    print(f"[{i}] {doc.page_content[:300]}...\n")


  docs = retriever.get_relevant_documents(query)


[1] Space inflation wasn’t the only issue. Between 2014 and 2015, Trump increased the value of the apartment to $US327 million from $US200 million. The prosecution argues that was to mask a drop in the value of one of his other properties. “A discrepancy of this order of magnitude, by a real estate...

[2] appraisals he’d received from professionals, New York State argues. Trump has been accused of significantly over-valuing his flagship downtown New York property. Credit: Bloomberg For example, 40 Wall Street was appraised by a real estate company at $US200 million in 2011 and $US220 million in...

[3] “In defendants’ world: rent regulated apartments are worth the same as unregulated apartments; restricted land is worth the same as unrestricted land; restrictions can evaporate into thin air; a disclaimer by one party casting responsibility on another party exonerates the other party’s lies,”...

[4] of this order of magnitude, by a real estate developer sizing up his own living space 

In [8]:
from llm import gemini_llm
from typing import List, Dict
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.vectorstores.base import VectorStore
from langchain.schema import BaseRetriever, Document

class IRCoTChain:
    def __init__(self, retriever: BaseRetriever, llm):
        self.retriever = retriever
        self.llm = llm

        self.reasoning_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["question", "context"],
                template="""
You are answering a multi-hop question. Use the context below to reason step-by-step.

Question: {question}

Context:
{context}

Reasoning:
"""
            )
        )

        self.query_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["reasoning"],
                template="""
Based on the reasoning below, what should be the next search query?

Reasoning:
{reasoning}

Next query:
"""
            )
        )

        self.answer_chain = LLMChain(
            llm=self.llm,
            prompt=PromptTemplate(
                input_variables=["question", "context1", "context2", "reasoning"],
                template="""
Answer the question below using the combined context and reasoning.

Question: {question}

First Evidence:
{context1}

Second Evidence:
{context2}

Reasoning:
{reasoning}

Final Answer:
"""
            )
        )

    def run(self, question: str) -> Dict:
        # Step 1: Retrieve initial context
        docs_0 = self.retriever.get_relevant_documents(question)
        context_0 = "\n\n".join([doc.page_content for doc in docs_0])

        # Step 2: Reasoning step
        reasoning = self.reasoning_chain.run(question=question, context=context_0)

        # Step 3: Reformulate query from reasoning
        new_query = self.query_chain.run(reasoning=reasoning)

        # Step 4: Second round retrieval
        docs_1 = self.retriever.get_relevant_documents(new_query)
        context_1 = "\n\n".join([doc.page_content for doc in docs_1])

        # Step 5: Generate final answer
        final_answer = self.answer_chain.run(
            question=question,
            context1=context_0,
            context2=context_1,
            reasoning=reasoning
        )

        return {
            "question": question,
            "final_answer": final_answer.strip(),
            "reasoning": reasoning.strip(),
            "reformulated_query": new_query.strip(),
            "evidence_1": context_0[:500],
            "evidence_2": context_1[:500]
        }


In [9]:
ircot = IRCoTChain(retriever=vector_store.as_retriever(), llm=gemini_llm)
result = ircot.run("Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?")


  self.reasoning_chain = LLMChain(
  reasoning = self.reasoning_chain.run(question=question, context=context_0)


In [10]:
print(result["evidence_1"])

The drama inside OpenAI gave the world its first glimpse of the bitter feuds among those who will determine the future of AI. But years before OpenAI’s near meltdown, there was a little-publicised but ferocious competition in Silicon Valley for control of the technology that is now quickly

Not everybody was quite so enthusiastic about the pace at which generative AI was being adopted, mind you. In March, OpenAI co-founder Elon Musk, as well as Steve Wozniak and a slew of associated AI researche


In [11]:
import re
from typing import List
from sklearn.metrics import f1_score
from collections import Counter

def normalize_text(s: str) -> str:
    s = s.lower()
    s = re.sub(r'[^a-z0-9\s]', '', s)
    s = re.sub(r'\s+', ' ', s).strip()
    return s

def f1_for_answer(prediction: str, ground_truth: str) -> float:
    pred_tokens = normalize_text(prediction).split()
    gt_tokens = normalize_text(ground_truth).split()
    
    if len(pred_tokens) == 0 or len(gt_tokens) == 0:
        return 0.0
    
    common = Counter(pred_tokens) & Counter(gt_tokens)
    num_same = sum(common.values())

    if num_same == 0:
        return 0.0

    precision = num_same / len(pred_tokens)
    recall = num_same / len(gt_tokens)
    f1 = 2 * precision * recall / (precision + recall)
    return f1


In [12]:
def evaluate_f1(testset: List[dict], predictions: List[str]) -> float:
    assert len(testset) == len(predictions), "Số lượng dự đoán không khớp testset"

    scores = []
    for item, pred in zip(testset, predictions):
        gt = item["answer"]
        score = f1_for_answer(pred, gt)
        scores.append(score)
    
    average_f1 = sum(scores) / len(scores)
    return average_f1


In [13]:
import time

def run_ircot_on_testset(testset, ircot_chain, max_questions=100, delay_seconds=5):
    predictions = []
    for idx, item in enumerate(testset[:max_questions]):
        query = item["query"]
        print(f"[{idx+1}/{max_questions}] Running IRCoT on query: {query[:80]}...")
        try:
            result = ircot_chain.run(query)
            prediction = result["final_answer"]
        except Exception as e:
            print(f"Error on query {idx}: {e}")
            prediction = ""
        predictions.append(prediction)
        time.sleep(delay_seconds) 
    return predictions


In [16]:
# Giả sử bạn đã có ircot = IRCoTChain(...)
testset = load_json("data/testset.json")
predictions = ['Sam Bankman-Fried', 'Trump', 'Sam Altman', "The Hacker News article on The Epoch Times does report an increase in revenue related to subscription models, but there's no information provided about the TechCrunch article.", 'Cannot be determined from the information provided.', 'Sam Bankman-Fried', 'Cannot be determined.', "The 'New York Times' attribution is unclear based on the provided context, while the 'Sporting News' suggestion about the Ravens' defense is implied but not explicitly stated.", 'This question cannot be answered with the given context.', '', '', 'Partially correct.\n\nThe first part of your answer is correct: The \'Age\' article does suggest the Davis Cup team is aiming for improvement, seeking their first win in 20 years.\n\nHowever, the second part of your answer is incorrect. The second piece of evidence *does* state that South Africa has "taken their game to new heights just to reach the semis." This clearly indicates an improvement sufficient to reach the semi-finals.\n\nTherefore, the corrected answer is:\n\nYes, the \'Age\' article suggests that Australia\'s Davis Cup team is aiming for an improvement in their performance compared to the previous year, and the \'Sporting News\' article indicates that the South Africa national rugby team has already achieved an improvement to reach the Rugby World Cup semi-finals.', 'Impossible to determine.', 'I am unable to answer this question. The provided text snippets do not contain the answer.', 'Cannot determine.', 'The European Commission.', '', '', 'Unable to answer.', 'No, the Sporting News article does not suggest that streaming services do not require a subscription for viewing the Cowboys vs. 49ers game.', 'This question cannot be answered from the given context.', "Based on the provided documents, the advice from Sporting News during the specified period (September 28, 2023, to December 18, 2023) did not involve reading requirements or focusing on hype. While one article discussed betting opportunities for Week 9 of the NFL season, it didn't specifically advise going with the favored Eagles. Instead, it mentioned moneyline, spread, over/under, and player prop bets.", 'Valve', '', '', 'Final Answer: I cannot determine the answer from the given information.', 'Unable to answer.', 'The TechCrunch article does suggest that Amazon\'s large language model (LLM) is not trained on kids\' responses, while I cannot determine if The Age article raises concerns about TikTok\'s pixel collecting data without consent since there is no information about "The Age" article provided in the context.', 'Based on the provided text, Nike\'s sales in North America fell 2%. The text doesn\'t give us any information about the CNBC report on Nike\'s Latin America and Asia Pacific unit or the Fortune article on U.S. home sales prices. We can only confirm a decrease for Nike\'s North American sales. Therefore, the report from Cnbc | World Business News Leader on "Nike\'s Latin America and Asia Pacific unit" is unknown, however the passage did confirm that the sales for North America did decrease.', 'The coverage of ski resorts appears to have shifted from a focus on "exclusive" and "five-star" experiences (Zermatt and Vail) to mentioning specific hotels and travel packages, such as British Airways Holidays trips to the Fairmont Tremblant with ski-in/ski-out locations and spa facilities. This suggests a potential shift towards more practical travel information alongside the luxury aspects.', '', '', 'Sam Altman', 'Uber', 'Cannot be determined.', 'No, Yes.', 'No', '', "My apologies, I am unable to answer the question, as the provided contexts do not contain the relevant information about Sygic's headquarters or CEO.", 'The provided context does not contain information about the CEO of Pets Best Insurance Services. Therefore, I cannot determine the letter of the alphabet that starts their name.', 'Yes', "Based on the context provided:\n1.  There is no information about The Independent - Sports reporting on the All Blacks' home victories.\n2.  There is no information about The Roar | Sports Writers Blog reporting on Argentina's victories.\nTherefore, it is not possible to determine if the reporting on the All Blacks' defeats by Argentina was consistent.", 'Yes', '', '', 'Sam Altman', "Yes, based on the provided text and reasoning, my answer is:\n\nYes, the anticompetitive behavior towards news publishers mentioned is a separate issue from financial influences, as they focus on distinct facets of Google's actions.", 'The TechCrunch article suggests that the success in "North America\'s EV market" is *inversely* related to the size and price; that is, the *failure* of small and lower-priced EVs is discussed. The Verge article does focus on Donald Trump\'s criticism of electric vehicles regarding their cost, range, and impact on American jobs. Therefore, the answer is Yes.', 'I am unable to answer this question. The provided context does not contain the names of any Zimbabwean ministers or details about partnerships to boost crop production.', 'This question cannot be answered from the given context.', '', '', 'The TalkSport article does suggest that Manchester United\'s defensive performance in the Champions League group stages is worse than in previous years, as indicated by a new record for goals conceded. The provided passage does include information from The Guardian article. The second passage states "When it comes to pressure in the Champions League of late, United do not cope well and there are few events trickier than a night in Istanbul". This implies that Manchester United\'s overall performance under pressure in the Champions League, especially in Istanbul, has been consistently poor.', 'Yes', "Based on the provided text, the TechCrunch article on Israel's tech employees suggests a far greater scale of impact on workforce mobilization due to the war compared to the TechCrunch article on GitLab's workforce reduction. The Israel article describes a situation where 10-30% of the entire tech workforce is being mobilized due to the war, and the destruction of the tech industry in Gaza. In contrast, the GitLab article describes a 7% reduction in headcount, affecting around 114 people. This is a significant difference in scale, as the Israel article implies a massive disruption to the tech industry due to widespread mobilization for war, while the GitLab article discusses a company-specific reduction in workforce size.\n\nTherefore, the final answer is:\n\nYes", 'No, the Sporting News did not report a Cowboys victory over the Seahawks, but yes, they did report a Lions win against the Packers.', 'I am sorry, but I cannot answer the question. The provided text snippets do not include the answer.', '', '', 'Yes', 'Lionel Messi', 'Sam Bankman-Fried', 'Yes', 'Meta', '', 'Google', '$\\boxed{Apple}$', 'Bettors', 'Spotify', 'No.', '', "The Sporting News article anticipates an impressive performance in the upcoming game for Jordan Love. The provided text from CBSSports.com reflects on Kirk Cousins' performance in last week's game with only 13 Fantasy points.", 'Cannot be determined.', "Yes, the CBSSports.com article suggests that the Minnesota Vikings' passing play percentage in Week 4 was lower than in previous weeks. The Sporting News articles indicate a strong defensive performance and consistent offensive results under Josh Dobbs' leadership compared to Kirk Cousins.", 'Everton', 'Final Answer: The final answer is $\\boxed{Federal Reserve}$', '', '', 'Sam Bankman-Fried', 'No.', '$\\boxed{Google}$', 'I am unable to determine whether there was consistency in the promotional offers reported for new customers at Caesars Sportsbook between the Sporting News report on September 26, 2023, and the CBSSports.com report, as the latter report is not provided.', "No. The Polygon article suggests Scorsese has *more* autonomy now, not less. The provided text does not include information about The Independent - Life and Style article or Scorsese's engagement with TikTok.", '', '', 'Yes, the passage includes the statement, "There was no attempt to remove Sam Altman from OpenAI by the co-founders of Anthropic,” said an", supporting the suggestion that \'The Age\' article indicates that Anthropic\'s co-founders did not try to remove Sam Altman from OpenAI. It also includes "In perhaps the most unexpected tech news of the year, billionaire and AI evangelist Sam Altman has been ejected from his CEO role at OpenAI by the company’s board after an apparent vote of no confidence." and "Sam Altman has been fired from OpenAI", implying that Sam Altman\'s departure from OpenAI was unexpected and not initiated by him, as the \'Fortune\' article suggests.', 'Yes', 'Yes', "Based on the information provided, the individual described fits the profile of someone who rapidly accumulated wealth and is now facing criminal charges for fraud and conspiracy. However, the person's name is not mentioned in the given context.", 'Cannot be determined.', '', 'Yes', 'Cannot be determined.', 'Sam Bankman-Fried', "It is not possible to determine the answer based on the context. The provided text focuses on Connor Bedard's debut and lacks information about what 'The New York Times' or 'Sporting News' articles suggest regarding his potential or the USC basketball team.", "Yes, between the report from CBSSports.com published on October 12, 2023, and the report from The Independent - Life and Style published on December 6, 2023, there was a change in the reporting of Taylor Swift's relationship status.", '', '', 'Both articles agree that Sportsbooks adjust their practices (profit from odds and lines, and tighten betting lines) based on certain conditions.', 'The provided context does not contain the answer to this question.']

#predictions = run_ircot_on_testset(testset, ircot)

# In từng dự đoán (tuỳ chọn)
for i, (gt, pred) in enumerate(zip(testset, predictions)):
    if gt['answer'] != pred:
        print(f"Q{i+1}: GT = {gt['answer']} | Pred = {pred}")




Q2: GT = Donald Trump | Pred = Trump
Q4: GT = Yes | Pred = The Hacker News article on The Epoch Times does report an increase in revenue related to subscription models, but there's no information provided about the TechCrunch article.
Q5: GT = Caesars Sportsbook | Pred = Cannot be determined from the information provided.
Q7: GT = Yes | Pred = Cannot be determined.
Q8: GT = Yes | Pred = The 'New York Times' attribution is unclear based on the provided context, while the 'Sporting News' suggestion about the Ravens' defense is implied but not explicitly stated.
Q9: GT = OpenAI | Pred = This question cannot be answered with the given context.
Q10: GT = Google | Pred = 
Q11: GT = Insufficient information. | Pred = 
Q12: GT = Yes | Pred = Partially correct.

The first part of your answer is correct: The 'Age' article does suggest the Davis Cup team is aiming for improvement, seeking their first win in 20 years.

However, the second part of your answer is incorrect. The second piece of evide

In [18]:
print(evaluate_f1(testset[:100], predictions))

0.2453465381009446
