# Corporate RAG for Asset Management

## 1. SetUp the Environment

In [None]:
# import dependencies
import os
import time
from llmware.library import Library
from llmware.retrieval import Query
from llmware.prompts import Prompt
from llmware.setup import Setup

## 2. Test with an Example

In [None]:
# Create a test library and load it with llmware samples
test_library = Library().create_new_library("Agreements")
samples_path = Setup().load_sample_files()
test_library.add_files(os.path.join(samples_path,"Agreements"))

# Create vector embeddings for the library and store them in Milvus
test_library.install_new_embedding(embedding_model_name="industry-bert-contracts", vector_db="milvus")

# Perform a semantic search in the test library
os.environ["TOKENIZERS_PARALLELISM"] = "false" # HuggingFace tokenizer warning to be avoided

# Construct and perform a test query
test_query = 'Resignation'
query_res = Query(test_library).semantic_query(test_query, result_count=20)
print(query_res)

## 3. RAG

In [None]:
# Create a library and load it with llmware samples
library = Library().create_new_library("Project_lib")
library.add_files('/Users/gerasimosplegas/Desktop/bling-rag-llm-project/docs')

# Create vector embeddings for the library and store them in Milvus
# Opt for the industry-bert-asset-management model which is trained for our domain
library.install_new_embedding(embedding_model_name="industry-bert-asset-management", vector_db="milvus")

# Construct the query
query = 'What is defined as criticality?'
query_res = Query(library).semantic_query(query, result_count=2)
print(query_res)

In [None]:
embedded_text = ''
for q in query_res:
   embedded_text += '\n'.join(q['text'].split("\'\'"))


# check all of the pertinent HuggingFace models for performance
models = ["llmware/bling-1b-0.1",
             "llmware/bling-1.4b-0.1",
             "llmware/bling-falcon-1b-0.1",
             "llmware/bling-cerebras-1.3b-0.1",
             "llmware/bling-sheared-llama-1.3b-0.1",
             "llmware/bling-sheared-llama-2.7b-0.1",
             "llmware/bling-red-pajamas-3b-0.1",
             ]


model_list_small = [
             "llmware/bling-1b-0.1",
             ]


# iterate through each model, prompt them and get the answer
for model in models:
    t0 = time.time()
    print(f"\n > Loading Model: {model}...")
    prompter = Prompt().load_model(model, from_hf=True, api_key="")
    
    t1 = time.time()
    print(f"\n > Model {model} load time: {t1-t0} seconds")
    
    print(f"Query: {query}")
    output = prompter.prompt_main(query, context=embedded_text
                                 , prompt_name="default_with_context",temperature=0.0)
    
    llm_response = output["llm_response"].strip("\n")
    print(f"\n > LLM Response: {llm_response}")
    print(f"\n > LLM Usage: {output['usage']}")
    
    t2 = time.time()
    print(f"\nTotal processing time: {t2-t1} seconds")

## 4. Benchmark against OpenAI Models

In [None]:
# Set the API Key, either by setting the env var or editing it directly here:
openai_api_key = ['YOUR_OPENAI_KEY']

In [None]:
# Create a new prompter using any desired model (GPT-3.5)and add the query_results
prompt_text = "Summarize the criticality provisions"
print (f"\n > Prompting LLM with '{prompt_text}'")
prompter = Prompt().load_model("gpt-3.5-turbo", api_key=openai_api_key)
sources = prompter.add_source_query_results(query_res)

In [None]:
# Prompt the LLM with the sources and query string
responses = prompter.prompt_with_source(prompt_text, prompt_name="summarize_with_bullets")
for response in responses:
    print ("\n > LLM response\n" + response["llm_response"])