### Weaviate Vector DB Creation

In [1]:
!docker compose up -d

 Container rag-comprehensive-weaviate-1  Starting
Error response from daemon: driver failed programming external connectivity on endpoint rag-comprehensive-weaviate-1 (c6c228a9bde799cef37ef2f27699c1a3c8d237e7492df36e64e0829adbf14db8): Bind for 0.0.0.0:50051 failed: port is already allocated


In [2]:
import os
import weaviate
from dotenv import load_dotenv
load_dotenv()

# Create weaviate client
google_api_key=os.getenv("GOOGLE_API_KEY")
client = weaviate.Client(
    url = "http://localhost:8080",  # Replace with your endpoint
    additional_headers = {
        "X-Google-Api-Key": google_api_key # Replace with your inference API key
    }
)
client.is_ready()

            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


True

### Data ingestion

In [3]:
from llama_index.core import SimpleDirectoryReader
# Load documents
documents=SimpleDirectoryReader("./data").load_data()

### Data Indexing

In [5]:
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings

Settings.llm = Gemini(models='gemini-pro', api_key=google_api_key)
Settings.embed_model = GeminiEmbedding(model_name="models/embedding-001")
Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 512
Settings.context_window = 3900

In [6]:
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.vector_stores.weaviate import WeaviateVectorStore

# construct vector store 
vector_store = WeaviateVectorStore(weaviate_client=client)

# set up storage for embeddings
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# set up the index 
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

### Data Querying

##### Query with Default Vector Search

In [7]:
from llama_index.core.response.notebook_utils import display_response

query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

display_response(response)


**`Final Response:`** The author wrote short stories and programmed on an IBM 1401 computer.

### Evaluation with ragas

In [8]:
import json
with open("./eval_data/eval_dataset.json", "r") as file:
    data = json.load(file)

question = []
contexts = []
answer = []
ground_truth = []

for example in data["examples"]:
    question.append(example['query'])
    contexts.append(example['reference_contexts'])
    ground_truth.append([example['reference_answer']])


In [9]:
for example in data['examples']:
    generated_answer =  query_engine.query(example['query'])
    answer.append(str(generated_answer))

KeyboardInterrupt: 

In [None]:
formatted_data = {
    'question': question,
    'contexts': contexts,
    'answer': answer,
    'ground_truth': ground_truth

}
output_file_path = "formatted_data.json"

with open(output_file_path, "w") as output_file:
    json.dump(formatted_data, output_file, indent=4)

In [10]:
from datasets import Dataset
with open("./eval_data/formatted_data.json", "r") as file:
    formatted_data = json.load(file)

formatted_data = Dataset.from_dict(formatted_data)

In [11]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas.metrics.critique import harmfulness

metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    harmfulness,
]

In [None]:
from ragas import evaluate

result = evaluate(dataset=formatted_data,metrics=metrics)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  54%|█████▎    | 118/220 [02:30<02:09,  1.27s/it]
Exception in thread Thread-53:
Traceback (most recent call last):
  File "c:\Users\thangta1\anaconda3\envs\venv\lib\threading.py", line 1009, in _bootstrap_inner
    self.run()
  File "c:\Users\thangta1\anaconda3\envs\venv\lib\site-packages\ragas\executor.py", line 93, in run
    results = self.loop.run_until_complete(self._aresults())
  File "c:\Users\thangta1\anaconda3\envs\venv\lib\asyncio\base_events.py", line 641, in run_until_complete
    return future.result()
  File "c:\Users\thangta1\anaconda3\envs\venv\lib\site-packages\ragas\executor.py", line 81, in _aresults
    raise e
  File "c:\Users\thangta1\anaconda3\envs\venv\lib\site-packages\ragas\executor.py", line 76, in _aresults
    r = await fu

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead.