In [1]:
import os
import time
from llmware.library import Library
from llmware.retrieval import Query
from llmware.prompts import Prompt
from llmware.setup import Setup

library = Library().create_new_library("Agreements")
sample_files_path = Setup().load_sample_files()
library.add_files(os.path.join(sample_files_path,"Agreements"))

library.install_new_embedding(embedding_model_name="industry-bert-contracts", vector_db="milvus")

os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid a HuggingFace tokenizer warning
query_results = Query(library).semantic_query("Termination", result_count=20)

print(query_results)

  from .autonotebook import tqdm as notebook_tqdm


[{'query': 'Termination', '_id': '657aa32a82fb49b74d01d5eb', 'text': "        3.  Termination. The Employment Period shall terminate on the occurrence of any of the first to occur of (a) the   sixth (6th) anniversary of the Effective Date and (b) a Sale of the Company, unless earlier terminated as   provided in Section 3.1 through 3.4.    3.1. Termination Without Cause; Resignation for Good Reason. Employer may terminate Executive's   employment under this Section 3.1 at any time without Cause upon not less than thirty (30) days' prior   written notice to Executive; provided, however, that, in the event that such notice is given, Executive shall   be allowed to seek other employment during such notice period.", 'doc_ID': 12, 'block_ID': 28, 'page_num': 5, 'content_type': 'text', 'author_or_speaker': '', 'special_field1': '', 'file_source': 'Artemis Poseidon EXECUTIVE EMPLOYMENT AGREEMENT.pdf', 'added_to_collection': 'Thu Dec 14 08:39:35 2023', 'table': '', 'coords_x': 427, 'coords_y': 

In [5]:
library = Library().create_new_library("Project_lib")
library.add_files('/Users/gerasimosplegas/Desktop/bling-rag-llm-project/docs')
library.install_new_embedding(embedding_model_name="industry-bert-asset-management", vector_db="milvus")

q = 'What is defined as criticality?'
query_results = Query(library).semantic_query(q, result_count=2)

print(query_results)

[{'query': 'What is defined as criticality?', '_id': '657c4d6ea866bd54d201b7e1', 'text': '        Handbook & Toolkit   November 2023    WD-21-04      15 |  P a g e   CRITICALITY SCORE   The product of the consequence of failure and likelihood of failure ratings provides the overall   criticality score (risk score) for a given asset. The higher the score, the greater risk.       Calculating criticality scores gives a structured approach for comparing risk across asset types.    Quantifying risk for each asset type provides an informed prioritization process that not only identifies   the highest risk assets, but also allows for the comparison of risk reduction options.', 'doc_ID': 3, 'block_ID': 59, 'page_num': 16, 'content_type': 'text', 'author_or_speaker': '', 'special_field1': '', 'file_source': 'wd-21-04.pdf', 'added_to_collection': 'Fri Dec 15 14:58:21 2023', 'table': '', 'coords_x': 131, 'coords_y': 469, 'coords_cx': 15, 'coords_cy': 38, 'external_files': '', 'score': 0.0, 'simil

In [6]:
embedded_text = ''
for qr in query_results:
   embedded_text += '\n'.join(qr['text'].split("\'\'"))


# check all of the models for performance

model_list = ["llmware/bling-1b-0.1",
             "llmware/bling-1.4b-0.1",
             "llmware/bling-falcon-1b-0.1",
             "llmware/bling-cerebras-1.3b-0.1",
             "llmware/bling-sheared-llama-1.3b-0.1",
             "llmware/bling-sheared-llama-2.7b-0.1",
             "llmware/bling-red-pajamas-3b-0.1",
             ]


model_list_small = [
             "llmware/bling-1b-0.1",
             ]


# adapted from the BLING demo
query = q
for model_name in model_list:
    t0 = time.time()
    print(f"\n > Loading Model: {model_name}...")
    prompter = Prompt().load_model(model_name, from_hf=True, api_key="")
    
    t1 = time.time()
    print(f"\n > Model {model_name} load time: {t1-t0} seconds")
    
    print(f"Query: {query}")
    output = prompter.prompt_main(query, context=embedded_text
                                 , prompt_name="default_with_context",temperature=0.0)
    
    llm_response = output["llm_response"].strip("\n")
    print(f"LLM Response: {llm_response}")
    print(f"LLM Usage: {output['usage']}")
    
    t2 = time.time()
    print(f"\nTotal processing time: {t2-t1} seconds")


 > Loading Model: llmware/bling-1b-0.1...

 > Model llmware/bling-1b-0.1 load time: 41.652539014816284 seconds
Query: What is defined as criticality?
LLM Response:  •The product of the consequence of failure and likelihood of failure ratings provides the overall   criticality score for a given asset.
•The higher the score, the greater risk
•Handbook & Toolkit   November 2023    WD-21-04      15 |  P a g e   CRITICALITY SCORE   The product of the consequence of failure and likelihood of failure ratings provides the overall   criticality score for a given asset. The higher the score, the greater risk.
LLM Usage: {'input': 257, 'output': 104, 'total': 361, 'metric': 'tokens', 'processing_time': 133.18225002288818}

Total processing time: 133.27629017829895 seconds

 > Loading Model: llmware/bling-1.4b-0.1...

 > Model llmware/bling-1.4b-0.1 load time: 63.293179988861084 seconds
Query: What is defined as criticality?
LLM Response:  The product of the consequence of failure and likelihood 

In [15]:
# Update this value with your own API Key, either by setting the env var or editing it directly here:
openai_api_key = "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"

In [16]:
# Create a new prompter using the GPT-4 and add the query_results captured above
prompt_text = "Summarize the criticality provisions"
print (f"\n > Prompting LLM with '{prompt_text}'")
prompter = Prompt().load_model("gpt-3.5-turbo", api_key=openai_api_key)
sources = prompter.add_source_query_results(query_results)


 > Prompting LLM with 'Summarize the criticality provisions'


In [17]:
# Prompt the LLM with the sources and a query string
responses = prompter.prompt_with_source(prompt_text, prompt_name="summarize_with_bullets")
for response in responses:
    print ("\n > LLM response\n" + response["llm_response"])

ERROR:root:error: OpenAI model inference produced error - You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors. 



 > LLM response
AI Output Not Available
