## Intitial Setup

In [1]:
# Importing required libraries
import warnings
import os
import openai
from pathlib import Path
from dotenv import load_dotenv
import wandb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI

# Configuring warnings and environmental variables
warnings.filterwarnings("ignore")
WANDB_PROJECT = "test_local_alchemy_v0"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
import openai
import os

load_dotenv()  # take environment variables from .env.

openai.api_key = os.environ["OPENAI_API_KEY"]

## Read Documents

In [2]:
documents = SimpleDirectoryReader("./documents_4_v0").load_data()

In [3]:
print(documents[0])

Doc ID: 8586e2dd-975d-476b-9aaa-79afbab07f1f
Text: Ashoka Kumar Thakur vs Union Of India And Ors on 10 April, 2008
Author: Arijit Pasayat Bench: Arijit Pasayat, C.K. Thakker
CASE NO.: Writ Petition (civil)  265 of 2006 PETITIONER: Ashoka Kumar
Thakur RESPONDENT: Union of India and Ors DATE OF JUDGMENT: 10/04/2008
BENCH: Dr. ARIJIT PASAYAT & C.K. THAKKER JUDGMENT: J U D G M E N T
WRIT ...


In [4]:
len(documents)

1320

## W&B initialisation

In [5]:
# Initialize W&B for tracking and visualizations
from llama_index.core.callbacks import CallbackManager
from llama_index.callbacks.wandb import WandbCallbackHandler

wandb_args = {"project": WANDB_PROJECT, "name": "baseline-rag"}
wandb_callback = WandbCallbackHandler(run_args=wandb_args)
callback_manager = CallbackManager([wandb_callback])

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Streaming LlamaIndex events to W&B at https://wandb.ai/taaha-s-bajwa/test_local_alchemy_v0/runs/boutubi0
[34m[1mwandb[0m: `WandbCallbackHandler` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `llamaindex`.


## Openai/Mistral LLM

In [7]:
# from llama_index.llms.ollama import Ollama

# llm = Ollama(
#         model="mistral",
#         temperature=0.1,
#         context_window=3900, 
#         request_timeout=30.0
#         )

# llm = LlamaCPP(
#     # You can pass in the URL to a GGML model to download it automatically
#     model_url=None,
#     # optionally, you can set the path to a pre-downloaded model instead of model_url
#     model_path="/Users/nkise/Documents/projects/Courses 📜/RAG/llama.cpp/models/mistral-instruct-7b-q3k-small.gguf",
#     temperature=0.1,
#     max_new_tokens=512,
#     # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
#     context_window=3900,
#     # kwargs to pass to __call__()
#     generate_kwargs={},
#     # kwargs to pass to __init__()
#     # set to at least 1 to use GPU
#     model_kwargs={"n_gpu_layers": 1},
#     # transform inputs into Llama2 format
#     messages_to_prompt=messages_to_prompt,
#     completion_to_prompt=completion_to_prompt,
#     verbose=False,
# )

In [8]:
from llama_index.llms.openai import OpenAI

llm = OpenAI()

In [9]:
response = llm.complete("Hello! Can you tell me a poem about cats and dogs?")
print(response.text)

Of course! Here is a poem about cats and dogs:

In a world of fur and whiskers,
Where tails wag and purrs linger,
Cats and dogs roam together,
In harmony, they do not wither.

The cat, with eyes so bright,
Prowls through the night,
Silent and sleek,
With a graceful mystique.

The dog, loyal and true,
Barks at the moon,
Boundless energy and joy,
A faithful companion, oh boy!

Together they play and chase,
In a never-ending race,
Through fields of green,
The perfect team.

Cats and dogs, so different yet the same,
In their love and loyalty, they proclaim,
That in this world of fur and paws,
They will always be each other's cause.


# Baseline RAG

In [15]:
from llama_index.core import Settings
from llama_index.core import ServiceContext
from llama_index.embeddings.openai import OpenAIEmbedding

# Setting up the ServiceContext with the language model and embedding model
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
service_context = ServiceContext.from_defaults(
    llm=llm,
    # embed_model='local',
    # embed_model=OpenAIEmbedding(model="text-embedding-3-small"), 
    callback_manager=callback_manager
)

### Chroma Vector Store and Query engine

In [13]:
# import
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb

In [14]:
# # Create to disk
# db = chromadb.PersistentClient(path="./chroma_db_doc4_gpt003s")
# chroma_collection = db.get_or_create_collection("quickstart")
# vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# storage_context = StorageContext.from_defaults(vector_store=vector_store)

# index = VectorStoreIndex.from_documents(
#     documents, storage_context=storage_context
# )

In [15]:
# load from disk
db = chromadb.PersistentClient(path="./chroma_db_doc4_gpt003s")
chroma_collection = db.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
)

In [16]:
# Converting the index to a query engine for retrieval
query_engine = index.as_query_engine()

### Testing Query engine

In [22]:
# Defining a function to display responses
from llama_index.core.response.notebook_utils import display_response


def query_and_display(question):
    response = query_engine.query(question)
    display_response(response)

In [23]:
# Testing the query engine with different questions
query_and_display("What happened in Shayara Bano Case?")
query_and_display("Who was the judge in Young lawyers case?")

**`Final Response:`** Shayara Bano, the petitioner, intermittently performed her matrimonial duties and eventually left her matrimonial home with her father, maternal uncle, and children to live in her parental home. Despite her husband visiting her for maintenance and well-being, she refused to return to the matrimonial home when he requested. The husband later took custody of their two children, who have been in his care since then.

**`Final Response:`** Chief Justice Dipak Misra, along with Justices A.M. Khanwilkar, Rohinton Fali Nariman, D.Y. Chandrachud, and Indu Malhotra were the judges in the Young Lawyers case.

In [24]:
# Closing the W&B run after queries
wandb_callback.finish()

# Evaluation

### Generating Eval Questions

In [3]:
# Importing necessary modules for evaluation
import copy
import random
import nest_asyncio
import pandas as pd
from llama_index.core.evaluation import (
    DatasetGenerator,
    RelevancyEvaluator,
    ResponseEvaluator,
    RetrieverEvaluator,
)

In [79]:
# Initialize W&B for evaluation
wandb_args = {"project": WANDB_PROJECT, "name": "eval-questions-generation"}
wandb_callback = WandbCallbackHandler(run_args=wandb_args)
callback_manager = CallbackManager([wandb_callback])
llm_eval = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context = ServiceContext.from_defaults(
    llm=llm_eval, 
    # embed_model=embed_model, 
    callback_manager=callback_manager
)

[34m[1mwandb[0m: Streaming LlamaIndex events to W&B at https://wandb.ai/taaha-s-bajwa/test_local_alchemy_v0/runs/hhy6d6x6
[34m[1mwandb[0m: `WandbCallbackHandler` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `llamaindex`.


In [38]:
# Setting up the documents and generating questions for evaluation
random_documents = copy.deepcopy(documents)

# Shuffling the documents and selecting 4 random documents. Just to make the evaluation quicker
random.shuffle(random_documents)
random_documents = random_documents[:4]

In [39]:
random_documents

[Document(id_='4a0adc44-4ec2-47a9-ab12-402cbff5825b', embedding=None, metadata={'page_label': '490', 'file_name': '/home/darth/Documents/code/legal_AI_v0/local_rag/documents_4_v0/Kesavananda_Bharati_Sripadagalvaru_vs_State_Of_Kerala_And_Anr_on_24_April_1973.PDF', 'file_path': '/home/darth/Documents/code/legal_AI_v0/local_rag/documents_4_v0/Kesavananda_Bharati_Sripadagalvaru_vs_State_Of_Kerala_And_Anr_on_24_April_1973.PDF', 'file_type': 'application/pdf', 'file_size': 2761497, 'creation_date': '2024-03-10', 'last_modified_date': '2024-02-27'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text="individual have been subordinated to the general weal. No one has been allowed to so\nexercise his rights as to impinge upon the rights of others. Although different s

In [41]:
len(random_documents)

4

In [46]:
eval_questions=[]
eval_questions

[]

In [58]:
eval_questions=[]
# Generating questions from the documents for evaluation
data_generator = DatasetGenerator.from_documents(
    random_documents, service_context=service_context, num_questions_per_chunk=1
)

# Applying nest_asyncio to run async code in Jupyter
nest_asyncio.apply()
eval_questions = data_generator.generate_questions_from_nodes()

  return cls(


Failed to log trace tree to W&B: list index out of range
Failed to log trace tree to W&B: list index out of range
Failed to log trace tree to W&B: list index out of range
Failed to log trace tree to W&B: list index out of range


  return QueryResponseDataset(queries=queries, responses=responses_dict)


In [76]:
additional_questions = ['Who was the judge in Shayara Bano case?',
                        'What was the judgement of Shayara Bano Case?',
                        'Who was the accussed in Shayara Bano Case?',
                        'What was the Shayara Bano Case about?']


eval_questions = eval_questions + additional_questions

In [77]:
eval_questions

['According to the text, what is the view of later writers regarding the incorporation of natural rights in the Constitution and laws of the state?',
 'How do those who do not subscribe to the theory of enforceability of natural rights believe rights should be justiciable and enforceable?',
 'In the context of natural rights, what is the significance of the Constitution and statutory law according to the text?',
 "How does Willoughby's observation regarding natural rights relate to the written provisions of the Constitution?",
 'Can natural rights be enforced in courts of law if they have not been codified or made a part of the law, according to the text?',
 'According to the judgment, how is public morality related to constitutional morality in the context of religious practices at the Sabarimala temple?',
 'How does the court determine whether the exclusion of women of a certain age group at the Sabarimala temple is an essential practice under Hindu religion?',
 'In the case of Free 

In [80]:
# Persisting the questions to a CSV file using W&B, for further loading
# Create an artifact object
artifact = wandb.Artifact(name="eval-questions", type="text")

# Add the list of questions as a file to the artifact
with artifact.new_file("questions.txt", mode="w") as f:
    f.write("\n".join(eval_questions))

# Log the artifact to W&B
wandb.log_artifact(artifact)

<Artifact eval-questions>

In [81]:
# Lookup the artifact
artifact = wandb.use_artifact("eval-questions:v2")

# Get the file containing the list of questions
file = artifact.get_path("questions.txt")

# Download the artifact's contents
artifact_dir = artifact.download()

# Read the list of questions from the file
# with file.open("r") as f:
#     questions = f.read().split("\n")

# # Print the list of questions
# print(questions)

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [82]:
questions = eval_questions

In [83]:
wandb_callback.finish()

### Evaluation

In [5]:
# Initialize W&B for tracking and visualizations
from llama_index.core.callbacks import CallbackManager
from llama_index.callbacks.wandb import WandbCallbackHandler

# Initialize W&B for response evaluation
wandb_args = {"project": WANDB_PROJECT, "name": "baseline-evaluation"}
wandb_callback = WandbCallbackHandler(run_args=wandb_args)
callback_manager = CallbackManager([wandb_callback])

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Streaming LlamaIndex events to W&B at https://wandb.ai/taaha-s-bajwa/test_local_alchemy_v0/runs/upu8qfik
[34m[1mwandb[0m: `WandbCallbackHandler` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `llamaindex`.


In [7]:
# Assume 'file_path' is the path to your text file
file_path = 'artifacts/eval-questions:v2/questions.txt'

# Read the file and store each line in a list
with open(file_path, 'r') as file:
    eval_questions = file.read().splitlines()

# 'lines' now contains a list of strings, where each string is a line from the file
eval_questions


['According to the text, what is the view of later writers regarding the incorporation of natural rights in the Constitution and laws of the state?',
 'How do those who do not subscribe to the theory of enforceability of natural rights believe rights should be justiciable and enforceable?',
 'In the context of natural rights, what is the significance of the Constitution and statutory law according to the text?',
 "How does Willoughby's observation regarding natural rights relate to the written provisions of the Constitution?",
 'Can natural rights be enforced in courts of law if they have not been codified or made a part of the law, according to the text?',
 'According to the judgment, how is public morality related to constitutional morality in the context of religious practices at the Sabarimala temple?',
 'How does the court determine whether the exclusion of women of a certain age group at the Sabarimala temple is an essential practice under Hindu religion?',
 'In the case of Free 

In [19]:
# Preparing the data for evaluation
question_df = pd.DataFrame(columns=["questions"], data=eval_questions)
question_df.head()

Unnamed: 0,questions
0,"According to the text, what is the view of lat..."
1,How do those who do not subscribe to the theor...
2,"In the context of natural rights, what is the ..."
3,How does Willoughby's observation regarding na...
4,Can natural rights be enforced in courts of la...


In [11]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core import ServiceContext
from llama_index.embeddings.openai import OpenAIEmbedding

# Setting up the ServiceContext with the language model and embedding model
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

# Setup for evaluating the responses
llm_eval = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context_eval = ServiceContext.from_defaults(
    llm=llm_eval, 
    callback_manager=callback_manager
)

In [17]:
# Running the evaluation using BatchEvalRunner
from llama_index.core.evaluation import (
    BatchEvalRunner,
    FaithfulnessEvaluator,
    RelevancyEvaluator,
)

faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_eval)
relevancy_evaluator = RelevancyEvaluator(service_context=service_context_eval)
runner = BatchEvalRunner(
    {"faithfulness": faithfulness_evaluator, "relevancy": relevancy_evaluator},
    workers=8,
)

eval_results = await runner.aevaluate_queries(
    index.as_query_engine(), queries=eval_questions
)

In [18]:
# Make a dataframe from the results.
faithfulness_df = pd.DataFrame.from_records(
    [eval_result.dict() for eval_result in eval_results["faithfulness"]]
)
relevancy_df = pd.DataFrame.from_records(
    [eval_result.dict() for eval_result in eval_results["relevancy"]]
)
relevancy_df.head()

Unnamed: 0,query,contexts,response,passing,feedback,score,pairwise_source,invalid_result,invalid_reason
0,"According to the text, what is the view of lat...",[individual have been subordinated to the gene...,Later writers generally believe that natural r...,True,YES,1.0,,False,
1,How do those who do not subscribe to the theor...,[individual have been subordinated to the gene...,Those who do not subscribe to the theory of en...,True,YES,1.0,,False,
2,"In the context of natural rights, what is the ...",[individual have been subordinated to the gene...,The text emphasizes that natural rights have n...,True,YES,1.0,,False,
3,How does Willoughby's observation regarding na...,[individual have been subordinated to the gene...,Willoughby's observation suggests that the so-...,True,YES,1.0,,False,
4,Can natural rights be enforced in courts of la...,[individual have been subordinated to the gene...,Natural rights cannot be enforced in courts of...,True,YES,1.0,,False,


In [20]:
# save questions , faithfulness_df and relevancy_df to csv. Drop none columns from faithfulness_df and relevancy_df
question_df.to_csv("questions.csv", index=False)
faithfulness_df.dropna(axis=1).to_csv("faithfulness.csv", index=False)
relevancy_df.dropna(axis=1).to_csv("relevancy.csv", index=False)

In [21]:
# Make 2 new tables in Wandb for Faithfulness and Relevancy. Log the results.
# Firstly, create a table for Faithfulness.
import wandb

faithfulness_table = wandb.Table(dataframe=faithfulness_df)
relevancy_table = wandb.Table(dataframe=relevancy_df)

In [22]:
wandb.log({"faithfulness": faithfulness_table, "relevancy": relevancy_table})

In [23]:
# wandb log scalr mean of faithfulness and relevancy scores
wandb.log({"faithfulness_mean": faithfulness_df["score"].mean()})
wandb.log({"relevancy_mean": relevancy_df["score"].mean()})

In [24]:
faithfulness_df["score"].mean(), relevancy_df["score"].mean()

(1.0, 1.0)

In [25]:
wandb_callback.finish()