In [None]:
!pip install llama-index
!pip install openai
!pip install deepeval
!pip install llama-index.readers.json

Collecting llama-index
  Downloading llama_index-0.10.59-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama-index)
  Downloading llama_index_agent_openai-0.2.9-py3-none-any.whl.metadata (729 bytes)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama-index)
  Downloading llama_index_cli-0.1.13-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core==0.10.59 (from llama-index)
  Downloading llama_index_core-0.10.59-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama-index)
  Downloading llama_index_embeddings_openai-0.1.11-py3-none-any.whl.metadata (655 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.2.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.2.7-py3-none-any.whl.metadata (3.8 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama-index)
  Downloading llama_index_legacy-0.9.48-py3-none-any.whl.metadata (8.5 kB)
Collecting llama-ind

In [None]:
## Simple Baseline RAG

from llama_index.core import VectorStoreIndex, Response
from llama_index.readers.json import JSONReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from google.colab import userdata

import pandas as pd
import os
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

Settings.llm = OpenAI(temperature=0.2, model="gpt-3.5-turbo")

# Set default chunk size and overlap
Settings.chunk_size = 1024
Settings.chunk_overlap = 20

reader = JSONReader(
    # The number of levels to go back in the JSON tree. Set to 0 to traverse all levels. Default is None.
    levels_back=0,
)

flexsteel_doc = reader.load_data(input_file="/content/0000037472-23-000024.json", extra_info={})

flexsteel_index = VectorStoreIndex.from_documents(flexsteel_doc)
flexsteel_engine = flexsteel_index.as_query_engine(similarity_top_k=3)

query_engine_tools = [
    QueryEngineTool(
        query_engine=flexsteel_engine,
        metadata=ToolMetadata(
            name="flexsteel_10k",
            description=(
                "Provides information about flexsteel financials for year 2023"
            ),
        ),
    )
]


user_input = "What is the net sales of flexsteel comapny in residential area in 2023"

response_object = flexsteel_engine.query(
    user_input
)
print(response_object)






$393,692


In [None]:
# Simple Evaluator
filepath = '/content/evaluation_results4.xlsx'
df = pd.read_excel(filepath, sheet_name= 'kg_rag')
queries = df['Query']
responses = df['Response']
sources = df['Source']

import deepeval
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.metrics import FaithfulnessMetric
from deepeval.test_case import LLMTestCase
from deepeval import evaluate

deepeval.login_with_confident_api_key(userdata.get('DEEPEVAL_API_KEY'))


metric1 = AnswerRelevancyMetric(
    threshold=0.5,
    model="gpt-3.5-turbo",
    include_reason=True
)


metric2 = FaithfulnessMetric(
    threshold=0.5,
    model="gpt-3.5-turbo",
    include_reason=True
)

lst  =[]
for query, response, source in zip(queries, responses, sources):
        lst.append(LLMTestCase(
            input= query,
            actual_output= response,
            retrieval_context= [source]
        ))

from deepeval.dataset import EvaluationDataset

# # Initialize empty dataset object
# dataset = EvaluationDataset(lst)

# # Pull from Confident
# dataset.push(alias="My Confident Dataset")

# dataset.pull(alias ="My Confident Dataset")
# dataset.evaluate([metric1 ,metric2])