In [1]:
!pip install llmware

Collecting llmware
  Downloading llmware-0.2.14-py3-none-any.whl (56.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.0/56.0 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3==1.24.53 (from llmware)
  Downloading boto3-1.24.53-py3-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.5/132.5 kB[0m [31m795.1 kB/s[0m eta [36m0:00:00[0m
Collecting openai>=1.0.0 (from llmware)
  Downloading openai-1.30.1-py3-none-any.whl (320 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.6/320.6 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymongo>=4.7.0 (from llmware)
  Downloading pymongo-4.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (670 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.0/670.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting Wikipedia-API==0.6.0 (from llmware)
  Downloading Wikipedia_API-0.6.0-py3-none-any.whl 

In [2]:
import os
import re
from llmware.prompts import Prompt, HumanInTheLoop
from llmware.setup import Setup
from llmware.configs import LLMWareConfig
from llmware.retrieval import Query
from llmware.library import Library

In [3]:
LLMWareConfig().set_active_db("sqlite")
example_models = ["llmware/bling-1b-0.1", "llmware/bling-tiny-llama-v0", "llmware/dragon-yi-6b-gguf"]
model_name = example_models[0]

In [4]:
""" Example #4a:  Main general case to run a RAG workflow from a Library """

# Load the llmware sample files
print (f"\n > Loading the llmware sample files...")
sample_files_path = Setup().load_sample_files()
contracts_path = os.path.join(sample_files_path,"Agreements")

contracts_lib = Library().create_new_library("example4_library")
contracts_lib.add_files(contracts_path)


 > Loading the llmware sample files...


{'docs_added': 15,
 'blocks_added': 2211,
 'images_added': 0,
 'pages_added': 204,
 'tables_added': 0,
 'rejected_files': []}

In [5]:
# questions that we want to ask each contract
question_list = [{"topic": "executive employment agreement", "llm_query": "What are the names of the two parties?"},
                 {"topic": "base salary", "llm_query": "What is the executive's base salary?"},
                 {"topic": "governing law", "llm_query": "What is the governing law?"}]

print (f"\n > Loading model {model_name}...")


 > Loading model llmware/bling-1b-0.1...


In [6]:
# get a list of all of the unique documents in the library
q = Query(contracts_lib)

In [7]:
# doc id list
doc_list = q.list_doc_id()
print("update: document id list - ", doc_list)

update: document id list -  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]


In [8]:
# filename list
fn_list = q.list_doc_fn()
print("update: filename list - ", fn_list)

update: filename list -  ['Artemis Poseidon EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Leto EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Bia EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Amphitrite EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Rhea EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Gaia EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Nike EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Nyx EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Demeter EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Persephone EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Metis EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Eileithyia EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Athena EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'Aphrodite EXECUTIVE EMPLOYMENT AGREEMENT.pdf']


In [9]:
prompter = Prompt().load_model(model_name)

for i, doc_id in enumerate(doc_list):

  print("\nAnalyzing contract: ", str(i+1), doc_id, fn_list[i])

  print("LLM Responses:")

  for question in question_list:

    query_topic = question["topic"]
    llm_question = question["llm_query"]

    doc_filter = {"doc_ID": [doc_id]}
    query_results = q.text_query_with_document_filter(query_topic,doc_filter,result_count=5,exact_mode=True)

    verbose = False
    if verbose:
      # this will display the query results from the query above
      for j, qr in enumerate(query_results):
        print("update: querying document - ", query_topic, j, doc_filter, qr)

      source = prompter.add_source_query_results(query_results)

       #   *** this is the call to the llm with the source packaged in the context automatically ***
      responses = prompter.prompt_with_source(llm_question, prompt_name="default_with_context", temperature=0.3)

      # unpacking the results from the LLM
      for r, response in enumerate(responses):
        print("update: llm response -  ", llm_question, re.sub("[\n]"," ", response["llm_response"]).strip())

      # We're done with this contract, clear the source from the prompt
      prompter.clear_source_materials()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.11G [00:00<?, ?B/s]



tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]


Analyzing contract:  1 1 Artemis Poseidon EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  2 2 Leto EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  3 3 Bia EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  4 4 Amphitrite EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  5 5 Rhea EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  6 6 Gaia EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  7 7 Nike EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  8 8 Nyx EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  9 9 Demeter EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  10 10 Persephone EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  11 11 Metis EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Responses:

Analyzing contract:  12 12 Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf
LLM Respo

In [10]:
# Save jsonl report to jsonl to /prompt_history folder
print("\nPrompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(),prompter.prompt_id))
prompter.save_state()


Prompt state saved at:  /root/llmware_data/prompt_history/ca2dbf62-6aee-4519-aff1-93851b69150e


<llmware.prompts.Prompt at 0x7af7082d8250>

In [11]:
# Save csv report that includes the model, response, prompt, and evidence for human-in-the-loop review
csv_output = HumanInTheLoop(prompter).export_current_interaction_to_csv()
print("\nCSV output saved at:  ", csv_output)


CSV output saved at:   {'report_name': 'interaction_report_Wed May 22 13:02:20 2024.csv', 'report_fp': '/root/llmware_data/prompt_history/interaction_report_Wed May 22 13:02:20 2024.csv', 'results': 0}
