<a href="https://colab.research.google.com/github/llm-finetune/experiment-tracking/blob/main/LLMWare_fastStart_ex5_chrome.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install llmware

In [None]:
!pip install grpcio==1.60.0



In [None]:
import os
from llmware.library import Library
from llmware.retrieval import Query
from llmware.setup import Setup
from llmware.status import Status
from llmware.prompts import Prompt
from llmware.configs import LLMWareConfig

In [None]:
LLMWareConfig().set_active_db("sqlite")

In [None]:
#   for this example, we will use an embedding model that has been 'fine-tuned' for contracts
embedding_model = "mini-lm-sbert" #"industry-bert-contracts"

In [None]:
#   note: "faiss" is being used for fast start / no set-up required - we would recommend installing
#   a proper vector db for greater scalability and performance
vector_db = "faiss"

In [None]:
# pick any name for the library
lib_name = "example5_library"

In [None]:
example_models = ["llmware/bling-1b-0.1", "llmware/bling-tiny-llama-v0", "llmware/dragon-yi-6b-gguf"]

# use local cpu model
llm_model_name = example_models[0]

In [None]:
#semantic_rag(lib_name, embedding_model, llm_model_name)

In [None]:
library_name=lib_name
embedding_model_name=embedding_model

In [None]:
# Step 1 - Create library which is the main 'organizing construct' in llmware
print ("\nupdate: Step 1 - Creating library: {}".format(library_name))

library = Library().create_new_library(library_name)


update: Step 1 - Creating library: example5_library


In [None]:
library

<llmware.library.Library at 0x7d28e77a6f80>

In [None]:
# Step 2 - Pull down the sample files from S3 through the .load_sample_files() command
    #   --note: if you need to refresh the sample files, set 'over_write=True'
print ("update: Step 2 - Downloading Sample Files")

sample_files_path = Setup().load_sample_files(over_write=False)


update: Step 2 - Downloading Sample Files


In [None]:
sample_files_path

'/root/llmware_data/sample_files'

In [None]:
contracts_path = os.path.join(sample_files_path, "Legal")

In [None]:
contracts_path

'/root/llmware_data/sample_files/Legal'

In [None]:
# Step 3 - point ".add_files" method to the folder of documents that was just created
#   this method parses all of the documents, text chunks, and captures in MongoDB
print("update: Step 3 - Parsing and Text Indexing Files")

library.add_files(input_folder_path=contracts_path)

update: Step 3 - Parsing and Text Indexing Files


{'docs_added': 1,
 'blocks_added': 669,
 'images_added': 0,
 'pages_added': 297,
 'tables_added': 0,
 'rejected_files': []}

In [None]:
library.file_copy_path

'/root/llmware_data/accounts/llmware/example5_library/uploads/'

In [None]:
# Step 4 - Install the embeddings
print("\nupdate: Step 4 - Generating Embeddings in {} db - with Model- {}".format(vector_db, embedding_model))

library.install_new_embedding(embedding_model_name=embedding_model_name, vector_db=vector_db)


update: Step 4 - Generating Embeddings in faiss db - with Model- mini-lm-sbert


config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

update: embedding_handler - FAISS - Embeddings Created: 500 of 669
update: embedding_handler - FAISS - Embeddings Created: 669 of 669


{'embeddings_created': 669,
 'embedded_blocks': 669,
 'embedding_dims': 384,
 'time_stamp': 'Wed Mar 27 19:36:09 2024'}

In [None]:
# RAG steps start here ...

print("\nupdate: Loading model for LLM inference - ", llm_model_name)

prompter = Prompt().load_model(llm_model_name)

query = "What does audio-video electronic mean?"


update: Loading model for LLM inference -  llmware/bling-1b-0.1


config.json:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.11G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

In [None]:
#   key step: run semantic query against the library and get all of the top results
results = Query(library).semantic_query(query, result_count=50, embedding_distance_threshold=1.0)


In [None]:
 #   if you want to look at 'results', uncomment the two lines below
for i, res in enumerate(results):
   print("update: ", i, res["file_source"], res["distance"], res["text"])


update:  0 Bharatiya_Nagarik_Suraksha_Sanhita.pdf 0.41569698  Clause  532 of the Bill relates to trial and proceedings to be held in electronic mode. This Clause  provides that trials and proceedings under this Code, may be held in electronic mode, by use of electronic communication or use of audio-video electronic means. Clause  533 of the Bill relates to repeal and savings. This Clause  provides that the Code of Criminal Procedure, 1973 is  repealed.
update:  1 Bharatiya_Nagarik_Suraksha_Sanhita.pdf 0.45289993  2. (1) In this Sanhita, unless the context otherwise requires,- (a) "audio-video electronic" means shall include use of any communication device for the purposes of video conferencing, recording of processes of identification,  search and seizure or evidence, transmission of electronic communication and for   such other purposes and by such other means as the State Government may, by rules  provide;"; (b) "bailable offence" means an offence which is shown as bailable in the Fi

In [None]:
for i, contract in enumerate(os.listdir(contracts_path)):

    qr = []

    if contract != ".DS_Store":

        print("\nContract Name: ", i, contract)

        #   we will look through the list of semantic query results, and pull the top results for each file
        for j, entries in enumerate(results):

            library_fn = entries["file_source"]
            if os.sep in library_fn:
                # handles difference in windows file formats vs. mac / linux
                library_fn = library_fn.split(os.sep)[-1]

            if library_fn == contract:
                print("Top Retrieval: ", j, entries["distance"], entries["text"])
                qr.append(entries)

        #   we will add the query results to the prompt
        source = prompter.add_source_query_results(query_results=qr)

        #   run the prompt
        response = prompter.prompt_with_source(query, prompt_name="default_with_context", temperature=0.3)

        #   note: prompt_with_resource returns a list of dictionary responses
        #   -- depending upon the size of the source context, it may call the llm several times
        #   -- each dict entry represents 1 call to the LLM

        for resp in response:
            if "llm_response" in resp:
                print("\nupdate: llm answer - ", resp["llm_response"])

        # start fresh for next document
        prompter.clear_source_materials()


Contract Name:  0 Bharatiya_Nagarik_Suraksha_Sanhita.pdf
Top Retrieval:  0 0.41569698  Clause  532 of the Bill relates to trial and proceedings to be held in electronic mode. This Clause  provides that trials and proceedings under this Code, may be held in electronic mode, by use of electronic communication or use of audio-video electronic means. Clause  533 of the Bill relates to repeal and savings. This Clause  provides that the Code of Criminal Procedure, 1973 is  repealed.
Top Retrieval:  1 0.45289993  2. (1) In this Sanhita, unless the context otherwise requires,- (a) "audio-video electronic" means shall include use of any communication device for the purposes of video conferencing, recording of processes of identification,  search and seizure or evidence, transmission of electronic communication and for   such other purposes and by such other means as the State Government may, by rules  provide;"; (b) "bailable offence" means an offence which is shown as bailable in the First Sc

In [None]:
def semantic_rag (library_name, embedding_model_name, llm_model_name):

    """ Illustrates the use of semantic embedding vectors in a RAG workflow
        --self-contained example - will be duplicative with some of the steps taken in other examples """

    # Step 1 - Create library which is the main 'organizing construct' in llmware
    print ("\nupdate: Step 1 - Creating library: {}".format(library_name))

    library = Library().create_new_library(library_name)

    # Step 2 - Pull down the sample files from S3 through the .load_sample_files() command
    #   --note: if you need to refresh the sample files, set 'over_write=True'
    print ("update: Step 2 - Downloading Sample Files")

    sample_files_path = Setup().load_sample_files(over_write=False)
    contracts_path = os.path.join(sample_files_path, "Agreements")

    # Step 3 - point ".add_files" method to the folder of documents that was just created
    #   this method parses all of the documents, text chunks, and captures in MongoDB
    print("update: Step 3 - Parsing and Text Indexing Files")

    library.add_files(input_folder_path=contracts_path)

    # Step 4 - Install the embeddings
    print("\nupdate: Step 4 - Generating Embeddings in {} db - with Model- {}".format(vector_db, embedding_model))

    library.install_new_embedding(embedding_model_name=embedding_model_name, vector_db=vector_db)

    # RAG steps start here ...

    print("\nupdate: Loading model for LLM inference - ", llm_model_name)

    prompter = Prompt().load_model(llm_model_name)

    query = "what is the executive's base annual salary"

    #   key step: run semantic query against the library and get all of the top results
    results = Query(library).semantic_query(query, result_count=50, embedding_distance_threshold=1.0)

    #   if you want to look at 'results', uncomment the two lines below
    #   for i, res in enumerate(results):
    #       print("update: ", i, res["file_source"], res["distance"], res["text"])

    for i, contract in enumerate(os.listdir(contracts_path)):

        qr = []

        if contract != ".DS_Store":

            print("\nContract Name: ", i, contract)

            #   we will look through the list of semantic query results, and pull the top results for each file
            for j, entries in enumerate(results):

                library_fn = entries["file_source"]
                if os.sep in library_fn:
                    # handles difference in windows file formats vs. mac / linux
                    library_fn = library_fn.split(os.sep)[-1]

                if library_fn == contract:
                    print("Top Retrieval: ", j, entries["distance"], entries["text"])
                    qr.append(entries)

            #   we will add the query results to the prompt
            source = prompter.add_source_query_results(query_results=qr)

            #   run the prompt
            response = prompter.prompt_with_source(query, prompt_name="default_with_context", temperature=0.3)

            #   note: prompt_with_resource returns a list of dictionary responses
            #   -- depending upon the size of the source context, it may call the llm several times
            #   -- each dict entry represents 1 call to the LLM

            for resp in response:
                if "llm_response" in resp:
                    print("\nupdate: llm answer - ", resp["llm_response"])

            # start fresh for next document
            prompter.clear_source_materials()

    return 0