In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import nest_asyncio
nest_asyncio.apply()
from retrieva.handler import RagHandler
from dotenv import load_dotenv
import os
from retrieva import ROOT_PATH
from IPython.display import Markdown, display, clear_output
from retrieva.data import add_root

  from .autonotebook import tqdm as notebook_tqdm


# Testing the handler

In [3]:
# used in dev; in production pass the env variable to the containers
load_dotenv(os.path.join(ROOT_PATH, ".env"))

True

In [4]:
# define prompt viewing function
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))

In [5]:
print(os.environ["WEAVIATE_URL"], os.environ["DATA_FOLDER_PATH"], os.environ["USE_CLOUD_PIPELINE"])

http://localhost:10080 ./artifacts/sagemaker_documentation_small 0


In [6]:
rag_handler = RagHandler(
    index_name="SageMakerDocs",
    weaviate_url=os.environ["WEAVIATE_URL"],
    data_path=add_root(os.environ["DATA_FOLDER_PATH"]),
    cloud_based=int(os.environ["USE_CLOUD_PIPELINE"]),
    num_workers_injection=1
)

2024-05-02 13:43:24,527 | INFO | handler - __init__() 
>>>> Using weaviate db at http://localhost:10080 with cloud flag at 0

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5


            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
Use pytorch device_name: cuda
2024-05-02 13:43:28,686 | INFO | handler - __init__() 
>>>> Loading SageMakerDocs...



In [7]:
# testing
query = "how can I solve a RL aws problem with sagemaker?"
# query = "do you know anything about sagemkaer toolkit?"
# query = "am I an OVNI?"
resp_stream = rag_handler.get_response(query,
                                       similarity_top_k=2,
                                       streaming=True)

In [8]:
sentence = ""
for text in resp_stream.response_gen:
    # return the texts as they arrive.
    clear_output()
    sentence += text
    print(sentence)

Unfortunately, based on the provided context information, your query seems to be unrelated to the topic at hand. The given context is about integrating Amazon SageMaker with Application Auto Scaling using service-linked roles, and it does not directly relate to solving RL AWS problems with SageMaker. 

However, if you are looking for information on how to solve reinforcement learning (RL) problems using Amazon SageMaker, you can refer to the following resources:

1. Reinforcement Learning in Amazon SageMaker - This is an introductory tutorial that covers the basics of RL and walks through setting up an RL environment using SageMaker.

2. Deeplearning Containers for Reinforcement Learning on AWS - This resource provides a list of pre-trained RL models, as well as instructions on how to use them with Amazon SageMaker.

3. Amazon SageMaker RL Components - This documentation provides an overview of the RL components available in SageMaker, such as the RL algorithm trainer, hyperparameter t

## Checking the prompts

In [11]:
prompts_dict = rag_handler.engine.get_prompts()
display_prompt_dict(prompts_dict)

**Prompt Key**: response_synthesizer:text_qa_template<br>**Text:** <br>

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


<br><br>

**Prompt Key**: response_synthesizer:refine_template<br>**Text:** <br>

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 


<br><br>

# Evaluation

In [28]:
from sklearn.model_selection import train_test_split
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response
from llama_index.llms.openai import OpenAI
import pandas as pd
import logging
import sys

In [10]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

reader = SimpleDirectoryReader(add_root(os.environ["DATA_FOLDER_PATH"]))
documents = reader.load_data()

In [11]:
_, test_docs = train_test_split(documents, test_size=0.10, random_state=42)

In [12]:
# NOTE: This generation should be based on a local model for IP reasons
data_generator = DatasetGenerator.from_documents(documents)

  return cls(


In [None]:
eval_questions = data_generator.generate_questions_from_nodes(50)

In [25]:
eval_questions[:3]

['What are the main functionalities implemented by the SageMaker Training and Inference toolkits?',
 'How do the toolkits help in adapting containers to run scripts, train algorithms, and deploy models on SageMaker?',
 'What does the library define for users when installed?']

In [30]:
# NOTE: This could also be a local model
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
evaluator_gpt4 = RelevancyEvaluator(llm=llm)

In [49]:
results = []
for question in eval_questions:
    response_vector = rag_handler.get_response(question,
                                               similarity_top_k=2,
                                               streaming=False)
    eval_result = evaluator_gpt4.evaluate_response(
        query=question, response=response_vector
    )
    results.append(eval_result)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST h

In [51]:
eval_df = pd.DataFrame([dict(r) for r in results])
eval_df[:2]

Unnamed: 0,query,contexts,response,passing,feedback,score,pairwise_source,invalid_result,invalid_reason
0,What are the main functionalities implemented ...,[Using the SageMaker Training and Inference To...,The main functionalities implemented by the Sa...,True,YES,1.0,,False,
1,How do the toolkits help in adapting container...,[Using the SageMaker Training and Inference To...,The SageMaker Training and Inference Toolkits ...,True,YES,1.0,,False,


In [52]:
(eval_df["feedback"] == "YES").mean()
# other metrics can be extracted: f1, etc

0.84