## RAG 실습

- 앞에서 bedrock 사용법, AOSS 사용법을 모두 배웠으니 이제 기본적인 RAG가 어떻게 동작하는지 살펴볼 차례입니다.


In [None]:
import json
import boto3

In [None]:
%store -r

In [None]:
try:
    print(aoss_client)
except:
    try:
        print(collection_name)
        print(vector_index_name)
        print(aoss_endpoint)
    except:
        collection_name = "rag-hol-aoss-collection"
        vector_index_name = "rag-hol-index-vector"
        aoss_endpoint = "gtp9tx1kvlcpucvtkse6.us-west-2.aoss.amazonaws.com"
    
    from opensearchpy import OpenSearch, RequestsHttpConnection
    from requests_aws4auth import AWS4Auth
    import botocore
    import time

    import sagemaker

    sess = sagemaker.Session()
    role = sagemaker.get_execution_role()
    region = boto3.Session().region_name

    service = 'aoss'
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key, credentials.secret_key,
                       region, service, session_token=credentials.token)
    
    def get_aoss_client(host):
        client = OpenSearch(
            hosts=[{'host': host, 'port': 443}],
            http_auth=awsauth,
            use_ssl=True,
            verify_certs=True,
            connection_class=RequestsHttpConnection,
            timeout=6000
        )
        return client
    
    aoss_client = get_aoss_client(aoss_endpoint)


In [None]:
bedrock = boto3.client("bedrock-runtime")
bedrock_model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
embedding_model_id = "amazon.titan-embed-text-v2:0"

def get_llm_output(prompt):
    body = json.dumps({
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 1024,
                "temperature" : 0.1,
                "top_p": 0.5,
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                        ],
                    }
                ],
            }) 

    response = bedrock.invoke_model(
        body=body, 
        modelId=bedrock_model_id,
        accept='application/json',
        contentType='application/json')

    response_body = json.loads(response.get("body").read())
    llm_output = response_body.get("content")[0].get("text")
    return llm_output

def get_embedding_output(query):
    
    body = {
        "inputText": query,
        "dimensions": 1024,
        "normalize": True
    }
    
    response = bedrock.invoke_model(
        body=json.dumps(body), 
        modelId=embedding_model_id,
        accept='application/json',
        contentType='application/json')

    response_body = json.loads(response.get("body").read())
    embedding = response_body.get("embedding")
    return embedding


prompt_template = """
You're a helpful assistant to answer the question.
Use the following pieces of <CONTEXT> to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

<CONTEXT>
{context}
</CONTEXT>

Question: {question}
Helpful Answer:"""


def get_semantic_rag(user_query):
    vector = get_embedding_output(user_query)
    vector_query = {
      "query": {
        "knn": {
          "content_embeddings": {
            "vector": vector,
            "k": 5
          }
        }
      }
    }
    
    response = aoss_client.search(index=vector_index_name, body=vector_query, size=5)
    vector_search_results = [result["_source"]["content"] for result in response["hits"]["hits"]]
    
    context_data = "\n\n".join(vector_search_results)
    
    llm_input = prompt_template.format(context=context_data, question=user_query)
    
    llm_output = get_llm_output(llm_input)
    
    return {"llm_input": llm_input, "llm_output": llm_output}

In [None]:
output = get_semantic_rag("교육에서 챗봇을 어떻게 활용해야 하나요")

In [None]:
print(output["llm_output"])

In [None]:

def get_normalized_result(search_results, add_meta, weight=1.0):
    hits = search_results["hits"]["hits"]
    if len(hits) == 0:
        return []
    
    max_score = float(search_results["hits"]["max_score"])
    
    results = []
    for hit in hits:
        normalized_score = float(hit["_score"]) / max_score
        weight_score = normalized_score if weight == 1.0 else normalized_score * weight
        results.append({
            "doc_id": hit["_id"],
            "score": weight_score,
            "content": hit["_source"]["content"],
            "meta": add_meta
        })
        
    return results

def get_hybrid_rag(user_query):
    result_limit = 5
    vec_weight = 0.6
    lex_weight = 0.55
    threshold = 0.05
    
    # Get vector search result
    vector = get_embedding_output(user_query)
    vector_query = {
      "query": {
        "knn": {
          "content_embeddings": {
            "vector": vector,
            "k": 5
          }
        }
      }
    }
    vector_response = aoss_client.search(index=vector_index_name, body=vector_query, size=10)
    vector_result = get_normalized_result(vector_response, "vector", vec_weight)
    
    # Get lexical search result
    keyword_query = {"query": {"match": {"content": query_text}}}
    keyword_response = aoss_client.search(index=vector_index_name, body=keyword_query, size=10)
    keyword_result = get_normalized_result(keyword_response, "lexical", lex_weight)
    
    vector_ids = [vec["doc_id"] for vec in vector_result]
    for keyword in keyword_result:
        if keyword["doc_id"] not in vector_ids:
            vector_result.append(keyword)
    
    items = vector_result
    sorted_items = list(filter(lambda val: val["score"] > threshold, items))
    
    if len(sorted_items) > result_limit:
        sorted_items = sorted_items[:result_limit]
    
    context_data = "\n\n".join([item["content"] for item in sorted_items])
    llm_input = prompt_template.format(context=context_data, question=user_query)
    llm_output = get_llm_output(llm_input)
    return {"llm_input": llm_input, "llm_output": llm_output}

In [None]:
# query_text = "교육에서 챗봇을 어떻게 활용해야 하나요"
query_text = "How to use chatbot for education?"
output = get_hybrid_rag(query_text)

In [None]:
print(output["llm_output"])