In [None]:
!pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed llama-index-llms-openai

In [None]:
# from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader
# from llama_index.core import Settings


# from llama_index.core.query_pipeline import QueryPipeline
# from llama_index.retrievers.bm25 import BM25Retriever
# from llama_index.llms.huggingface import HuggingFaceLLM
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# from llama_index.core.postprocessor import MetadataReplacementPostProcessor
import torch
# from llama_index.llms.llama_cpp import LlamaCPP
# from llama_index.core.query_pipeline import QueryPipeline, FnComponent 
# from llama_index.core.query_pipeline import Link  # Add this import

from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
import qdrant_client
from qdrant_client import QdrantClient
from together import Together
import json

import os
from dotenv import load_dotenv, dotenv_values 

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
load_dotenv() 

In [None]:
def load_corpus(json_path):
    """Read corpus.json and convert to LlamaIndex documents"""
    from llama_index.core import Document
    import json
    
    with open(json_path) as f:
        data = json.load(f)
    
    documents = []
    for item in data:
        documents.append(Document(
            text=item["body"],
            metadata={
                "title": item["title"],
                "author": item["author"],
                "source": item["source"],
                "published_at": item["published_at"],
                "category":item["category"],
                "url":item["url"]
            }
        ))
    return documents


In [None]:
# # not in use
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
#     device="mps")
    
# node_parser = SentenceSplitter(
#         chunk_size=256,
#         chunk_overlap=25,
#     )
    
#     # Create processed nodes
# documents = load_corpus("corpus.json")
# nodes = node_parser.get_nodes_from_documents(documents)
    
# vector_index = VectorStoreIndex(nodes, embed_model=embed_model)

In [None]:
# Load documents and create nodes
documents = load_corpus("corpus.json") 
node_parser = SentenceSplitter(chunk_size=256, chunk_overlap=25)
nodes = node_parser.get_nodes_from_documents(documents)


In [2]:
from llama_index.embeddings.fastembed import FastEmbedEmbedding
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-large-en-v1.5")

In [None]:

# Connect to Qdrant Cloud
quadrant_client = qdrant_client.QdrantClient(
    url=os.getenv("QUADRANT_DB_URL"),  # Quadrant DB URL
    api_key=os.getenv("QUADRANT_API_KEY")
)

vector_store = QdrantVectorStore(
    client=quadrant_client,
    collection_name="bge-large-256-embedds",
    embedding_dim=1024  # Dimension for bge-large-en-v1.5 embeddings
)


In [None]:
from llama_index.core import StorageContext
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

index = VectorStoreIndex(
    nodes=nodes,
    storage_context=storage_context,
    embed_model=embed_model,
    show_progress=True
)

In [None]:
# # testing if context is retreived given a query
# print(quadrant_client.get_collections())  
# #print(f"Total nodes to index: {len(nodes)}")
# query= "Does the article from Polygon discussing the Barbie film describe Mattel's portrayal in the same light as how The Independent - Life and Style article describes Mattel's handling of the Wilma Mankiller Barbie doll?"

# query_embedding = embed_model.get_text_embedding(query)
    
#     # Search in Qdrant
# search_result = quadrant_client.search(
#         collection_name="multihop-embedds",
#         query_vector=query_embedding,
#         limit=10
#     )
# search_result

In [None]:
import json
from io import StringIO
def search_similar(query):
    query_embedding = embed_model.get_text_embedding(query)
    
    # Search in Qdrant
    search_result = quadrant_client.search(
        collection_name="bge-large-256-embedds",
        query_vector=query_embedding,
        limit=10
    )
    data_list = []
    
    for point in search_result:
        data_list.append({
        "text": f"[Excerpt from document]\ntitle: {point.payload.get("title")}\npublished_at: {point.payload.get("published_at")}\nsource: {point.payload.get("source")}\nExcerpt:\n-----\n{json.load(StringIO(point.payload.get("_node_content")))["text"]}",
        "score": point.score
        })
    return data_list

In [None]:
# ret_texts=[data["text"] for data in data_list]
# ret_texts

In [None]:
def get_relevant_docs(json_path):
    """Read corpus.json and convert to LlamaIndex documents"""
    from llama_index.core import Document
    import json
    
    with open(json_path) as f:
        data = json.load(f)
    
    documents = []
    for item in data:
        documents.append({
                "query": item["query"],
                "question_type": item["question_type"],
                "retrieval_list": search_similar(item["query"]),
                "gold_list": item["evidence_list"],
            }
        )
    return documents
similar_docs = get_relevant_docs("MultiHopRAG.json")

In [None]:
out_file = open("multihop_qa_256_final_output.json", "w")
json.dump(similar_docs, out_file, indent = 6)
out_file.close()

In [None]:
# #query = "What are 'skip-level' meetings?"
# #query="Does 'The New York Times' article attribute the success of the Buffalo Bills' defense to the contributions of Jordan Poyer, while the 'Sporting News' article suggests that the Baltimore Ravens' defense needs to improve before their game against the Cincinnati Bengals?"
# #query= "Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?"
# #query="Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?"
# #query="Do the TechCrunch article on software companies and the Hacker News article on The Epoch Times both report an increase in revenue related to payment and subscription models, respectively?"
# #query="Has the portrayal of Google's market practices in reports by The Age before October 22, 2023, remained consistent with the depiction in The Verge's coverage of the Epic v. Google case, and with TechCrunch's report on the class action antitrust suit filed against Google?"
# #query="Does the TechCrunch article suggest that Amazon's large language model (LLM) is not trained on kids' responses, while The Age article raises concerns about TikTok's pixel collecting data without consent?"
# #test_query="Do 'The Verge' and 'Engadget' articles both suggest that 'Consumers' have guides or opportunities to make better purchasing decisions, while 'TechCrunch' discusses 'Consumers' desire for a new model in a different sector?"
# test_query= "Does the TalkSport article suggest that Manchester United's defensive performance in the Champions League group stages is worse than in previous years, as indicated by a new record for goals conceded, while The Guardian article implies that Manchester United's overall performance under pressure in the Champions League, especially in Istanbul, has been consistently poor?"
# test_query= "Does the article from Polygon discussing the Barbie film describe Mattel's portrayal in the same light as how The Independent - Life and Style article describes Mattel's handling of the Wilma Mankiller Barbie doll?"
# test_query=similar_docs[0]["query"]
# ret_texts=[doc["text"] for doc in similar_docs[0]["retrieval_list"]]
# prefix = "Below is a question followed by some context from different sources. Please answer the question based on the context. The answer to the question is a word or entity. If the provided information is insufficient to answer the question, respond 'Insufficient Information'. Answer directly without explanation."
# response = llmClient.chat.completions.create(
#     model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
#     messages=[
#       {"role": "system", "content": "You are a helpful chatbot."},
#       {"role": "user", "content": f"{prefix}:{test_query}:{"\n".join(ret_texts)}"},
#     ],
# )

# response.choices[0].message.content

In [7]:
from kindo_api_methods import KindoAPI


In [None]:
kindo_api = KindoAPI(api_key=os.getenv("KINDO_API_KEY"))
#response = kindo_api.call_kindo_api(model="groq/llama3-70b-8192", messages=[{"role": "user", "content": f"{prefix}:{test_query}:{"\n".join(ret_texts)}"}], max_tokens=200).json()['choices'][0]['message']['content']
#response

In [None]:
with open("multihop_qa_256_output.json") as f:
    data = json.load(f)  
    similar_docs = []
    for item in data:
        similar_docs.append({
            "query":item["query"],
            "question_type":item["question_type"],
            "retrieval_list":item["retrieval_list"],
            "gold_list":item["gold_list"]
        })

In [None]:
import time

prefix = "Below is a question followed by some context from different sources. Please answer the question based on the context. The answer to the question is a word or entity. If the provided information is insufficient to answer the question, respond 'Insufficient Information'. Answer directly without explanation."
for i in range(len(similar_docs)):
    query=similar_docs[i]["query"]
    ret_texts=[doc["text"] for doc in similar_docs[i]["retrieval_list"]]
    response = kindo_api.call_kindo_api(model="groq/llama3-70b-8192", messages=[{"role": "user", "content": f"{prefix}:{query}:{"\n".join(ret_texts)}"}], max_tokens=200)['choices'][0]['message']['content']
    similar_docs[i]["answer"]=response
    if(i%10==0):
        time.sleep(2)

In [6]:
def get_gpt_data(json_path):
    with open(json_path) as f:
            data = json.load(f)
        
    documents = []
    for item in data:
        documents.append({
                "query": item["query"],
                "question_type": item["question_type"],
                "retrieval_list": item["retrieval_list"],
                "gold_list": item["gold_list"],
            }
        )
    return documents
gpt_4o_docs=get_gpt_data("multihop_qa_256_final_output.json")

In [None]:
import time
import json
import re
llm_model="azure/gpt-4o"
prefix = "Below is a question followed by some context from different sources. Please answer the question based on the context. The answer to the question is a word or entity. If the provided information is insufficient to answer the question, respond 'Insufficient Information'. Answer directly without explanation."
total_tokens=0
for i in range(len(gpt_4o_docs)):
    query = gpt_4o_docs[i]["query"]
    ret_texts=[doc["text"] for doc in gpt_4o_docs[i]["retrieval_list"]]
    total_tokens+=len("".join(ret_texts).split())
    response = kindo_api.call_kindo_api(
                model="azure/gpt-4o",
                messages=[{"role": "user", "content": f"{prefix}:{query}:\n{'\n'.join(ret_texts)}"}],
                max_tokens=200
            ).json()['choices'][0]['message']['content']
    gpt_4o_docs[i]["answer"] = response
    print(i,response)
    if i%5==0:
        print(total_tokens)
        total_tokens=0
        print("sleep for 60secs")
        time.sleep(60)


HTTP error occurred: 403 Client Error: Forbidden for url: https://llm.kindo.ai/v1/chat/completions, details: {'error': 'Invalid JSON response', 'content': '{"error":{"message":"CHAT_INSUFFICIENT_CREDITS: You have exceeded your current quota of daily API calls. Please contact support to upgrade your plan: https://kindo.ai/contact","type":"None","param":"None","code":"403"}}'}


AttributeError: 'dict' object has no attribute 'json'

In [12]:
out_file = open("multihop_qa_256_gpt4o_output.json", "w")
json.dump(gpt_4o_docs, out_file, indent = 6)
out_file.close()

In [None]:
Question Type: inference_query
 Precision: 0.87
 Recall: 0.87
 F1 Score: 0.87

Question Type: comparison_query
 Precision: 0.13
 Recall: 0.13
 F1 Score: 0.13

Question Type: null_query
 Precision: 0.21
 Recall: 0.21
 F1 Score: 0.21

Question Type: temporal_query
 Precision: 0.25
 Recall: 0.25
 F1 Score: 0.25

Overall Metrics:
 Precision: 0.40
 Recall: 0.40
 F1 Score: 0.40

In [None]:
Evaluate file: multihop_qa_256_output.json(512 due to mistake)
For file: multihop_qa_256_output.json(512 due to mistake)
Hits@10: 0.8457
Hits@4: 0.6568
MAP@10: 0.2149
MRR@10: 0.4438
--------------------

In [None]:
For file: multihop_qa_256_final_output.json
Hits@10: 0.7397
Hits@4: 0.5463
MAP@10: 0.1787
MRR@10: 0.3937
--------------------

In [None]:
for embeddings of 512 chunk size
Question Type: inference_query
 Precision: 0.87
 Recall: 0.87
 F1 Score: 0.87
 accuracy: 0.80

Question Type: comparison_query
 Precision: 0.13
 Recall: 0.13
 F1 Score: 0.13
 accuracy: 0.36

Question Type: null_query
 Precision: 0.21
 Recall: 0.21
 F1 Score: 0.21
 accuracy: 0.39

Question Type: temporal_query
 Precision: 0.25
 Recall: 0.25
 F1 Score: 0.25
 accuracy: 0.40

Overall Metrics:
 Precision: 0.40
 Recall: 0.40
 F1 Score: 0.40
 Accuracy: 0.46

In [None]:
Question Type: inference_query
 Precision: 0.91
 Recall: 0.91
 F1 Score: 0.91
 accuracy: 0.85

Question Type: comparison_query
 Precision: 0.12
 Recall: 0.12
 F1 Score: 0.12
 accuracy: 0.36

Question Type: null_query
 Precision: 0.33
 Recall: 0.33
 F1 Score: 0.33
 accuracy: 0.43

Question Type: temporal_query
 Precision: 0.30
 Recall: 0.30
 F1 Score: 0.30
 accuracy: 0.42

Overall Metrics:
 Precision: 0.44
 Recall: 0.44
 F1 Score: 0.44
 Accuracy: 0.47