In [2]:
#! pip install "llama-index==0.11.15"
# !pip install llama-index-readers-file
# !pip install python-dotenv
# !pip install llama-index-llms-nvidia
# !pip install llama-index-embeddings-nvidia


# RAG
- RAG is a technique for augmenting LLM knowledge with additional data.
- LLMs can reason about wide-ranging topics, but their knowledge is limited to the public data up to a specific point in time that they were trained on.
- If you want to build AI applications that can reason about private data or data introduced after a model's cutoff date, you need to augment the knowledge of the model with the specific information it needs.
- The process of bringing the appropriate information and inserting it into the model prompt is known as retrieval augmented generation (RAG).

# LLamaindex
LlamaIndex is a framework for building context-augmented generative AI applications with LLMs including agents and workflows.

https://docs.llamaindex.ai/en/stable/

# NIM
NIM is a set of optimized cloud-native microservices designed to shorten time-to-market and simplify deployment of generative AI models anywhere, across cloud, data center, and GPU-accelerated workstations. It expands the developer pool by abstracting away the complexities of AI model development and packaging for production ‌using industry-standard APIs.

https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/

https://docs.api.nvidia.com/nim/reference/llm-apis

![image.png](images/NIM.png)

 # NVIDIA API Catalog
 https://docs.api.nvidia.com/
 
- NVIDIA API Catalog is a hosted platform for accessing a wide range of microservices online.
- You can test models on the catalog and then export them with an NVIDIA AI Enterprise license for on-premises or cloud deployment

# Microsoft Phi3 3.5 128K
https://github.com/microsoft/Phi-3CookBook

# LLama 3.2 3b Instruct
https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct

In [3]:
from dotenv import dotenv_values
import os
# read env file
ROOT_DIR = os.getcwd()
config = dotenv_values(os.path.join(ROOT_DIR, "keys", ".env"))

In [5]:
os.environ['NVIDIA_API_KEY'] = config.get('NVIDIA_API_KEY')


# Meta/llama-3.2-3b-instruct

In [11]:
# Settings enables global configuration as a singleton object throughout your application.
# Here, it is used to set the LLM, embedding model, and text splitter configurations globally.
from llama_index.core import Settings
from llama_index.llms.nvidia import NVIDIA

# Here we are using meta/llama-3.2-3b-instruct model from API Catalog
Settings.llm = NVIDIA(model="meta/llama-3.2-3b-instruct", temperature=0.7)



In [12]:
from llama_index.embeddings.nvidia import NVIDIAEmbedding
Settings.embed_model = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")


In [8]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# load documents
data = SimpleDirectoryReader(input_dir=os.path.join(ROOT_DIR, "data"),required_exts=[".pdf"]).load_data()

# indexing documents using vector store
index = VectorStoreIndex.from_documents(data)

In [27]:
len(data)

143

In [26]:
data[1]

Document(id_='196e75ec-3152-45b7-af73-3dba128a1976', embedding=None, metadata={'page_label': '2', 'file_name': 'Parser Source 2.pdf', 'file_path': 'D:\\repos\\llamaindex\\data\\Parser Source 2.pdf', 'file_type': 'application/pdf', 'file_size': 1930169, 'creation_date': '2024-10-26', 'last_modified_date': '2024-10-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text="Aardvark Constructions Limited  \nDate  Company Profile  Company Number  \n09/08/2024  123456  \n \nDate 09/08/2024  AARDVCONST/MCPARTLA -C Page 1 Main Details  \n Name:  Aardvark Constructions Limited  \n QuickRef:  AARDVCONST  \n Country:  United Kingdom  \n Company Number:  123456  \n Incorporated:  20/10/2020  \n Company Type:  Limited by Shares  \n Company Status:  Active  \n Dissolved: 

In [13]:
# converting vector store to query engine
query_engine = index.as_query_engine(similarity_top_k=3)

# generating query response
response = query_engine.query("Appointments Board Positions list with names and other details of Aardvark Constructions Limited")
print(response)

Abbles, James - ABBLES -J - Director - 19/04/2023 - Trainer
Abdreatta, Leopoldo - ABDREATT -L - Director - 18/10/2023 - Secretary
Adam, Nicole - ADAMS -N - Alternate Director - 04/04/2023 - CFO, Non Executive Director - 10/04/2024
Alberts, Stoffel - ALBERTS -S - Company Secretary - 16/12/2022 - Accountant
Rutter, Gus - RUTTER -G - Director - 07/03/2024 - Director


In [17]:
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine import CondensePlusContextChatEngine

# creating chat memory buffer
memory = ChatMemoryBuffer.from_defaults(token_limit=4500)

# creating chat engine
chat_engine = CondensePlusContextChatEngine.from_defaults(index.as_retriever(),memory=memory)
prompt = """
Provide Main Details of the company Aardvark Constructions Limited. Including following details:
Name:
Country:
Company Number:
Incorporated:
Company Type:
Company Status:
Primary Addresses Registered Office:
Accounting Dates:
Confirmation Statement:
"""
# generating chat response
response = chat_engine.chat(prompt)
print(str(response))

Based on the provided documents, here are the Main Details of Aardvark Constructions Limited:

* Name: Aardvark Constructions Limited
* Country: United Kingdom
* Company Number: 123456
* Incorporated: 20/10/2020
* Company Type: Limited by Shares
* Company Status: Active
* Primary Addresses Registered Office: 6 Chancery Road, London, WC2A 5DP, United Kingdom
* Accounting Dates: Last Period End: 16/11/2022, Current Period End: 16/11/2024
* Confirmation Statement: Filed on 17/02/2023, Next Overdue: 03/03/2023


In [18]:
prompt = """
From Management Details extract:
Managed By:
Managed By Email:
"""
response = chat_engine.chat(prompt)
print(str(response))

Based on the Management Details of Aardvark Constructions Limited, here are the extracted details:

* Managed By: Caroline McPartland
* Managed By Email: cmcpartland@diligent.com


In [19]:
prompt = """
Past Names of the Company with their period 
"""
response = chat_engine.chat(prompt)
print(str(response))

Based on the provided documents, here are the Past Names of the Company with their period:

* Diligent Holdings PLC (26/05/2023 - 05/03/2024)
* Diligent Holdings LTD (28/02/2023 - 26/05/2023)
* Diligent Holdings PLC 2 (08/09/2022 - 28/02/2023)
* Diligent Holdings LLC (08/09/2022 - 08/09/2022)
* Diligent Holdings PLC 1 (26/02/2002 - 08/09/2022)
* Blueprint Holdings Plc (31/01/2002 - 26/02/2002)
* KLGH plc (18/06/2001 - 31/01/2002)
* Blueprint Investment Holdings Plc (23/04/2001 - 18/06/2001)
* BLUEPRINT Ireland Holdings PLC (09/07/1989 - 23/04/2001)


In [20]:
prompt = """
Appointments Board Positions list with names and other details
"""
response = chat_engine.chat(prompt)
print(str(response))

Based on the provided documents, here is the list of Appointments to Board Positions:

1. Chair:
	* Martin Adams (ADAMS -M) - 03/02/2023 - 09/02/2023
	* Nicole Adams (ADAMS -N) - 09/02/2023 - 22/03/2023
	* Curtis Duncan (DUNCAN -C) - 08/02/2023 - 22/02/2023
2. Chief Executive:
	* Mohammed Malek (MALEK -M) - 29/12/2022 - 23/01/2023
	* Martin Adams (ADAMS -M) - 07/02/2023 - 14/02/2023
	* James Abbles (ABBLES -J) - 23/01/2023 - 14/02/2023
	* Brian Stafford (STAFFORD -B) - 28/11/2023 - 09/03/2023
	* Nicole Adams (ADAMS -N) - 22/03/2023 - 31/03/2023
3. Deputy Chief Executive:
	* Caroline Ansbach (ANSBACH -C) - 19/01/2023 - 26/01/2023
	* Jack Brady (NBRAD) - 22/02/2023 - Present
4. Company Secretary:
	* Rosslyn Adams (ADAMS -R) - 09/03/2023 - Present
	* Paloma Plews (PLEWS -P) - 11/10/2023 - Present
	* Nicole Adams (ADAMS -N) - 09/02/2023 - 22/03/2023
5. Chair:
	* Nicole Adams (ADAMS -N) - 09/02/2023 - 14/03/2023
	* Curtis Duncan (DUNCAN -C) - 08/02/2023 - 14/03/2023
	* Jack Brady (NBRAD) - 

# Microsoft  Phi-3-small-128k-instruct

In [21]:
# Here we are using meta/llama-3.2-3b-instruct model from API Catalog
Settings.llm = NVIDIA(model="microsoft/phi-3-small-128k-instruct", temperature=0.7)

In [22]:
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine import CondensePlusContextChatEngine

# creating chat memory buffer
memory = ChatMemoryBuffer.from_defaults(token_limit=4500)

# creating chat engine
chat_engine = CondensePlusContextChatEngine.from_defaults(index.as_retriever(),memory=memory)
prompt = """
Provide Main Details of the company Aardvark Constructions Limited. Including following details:
Name:
Country:
Company Number:
Incorporated:
Company Type:
Company Status:
Primary Addresses Registered Office:
Accounting Dates:
Confirmation Statement:
"""
# generating chat response
response = chat_engine.chat(prompt)
print(str(response))

 Name: Aardvark Constructions Limited
  Country: United Kingdom
  Company Number: 123456
  Incorporated: 20/10/2020
  Company Type: Limited by Shares
  Company Status: Active
  Primary Addresses Registered Office: 6 Chancery Road, London, WC2A 5DP, United Kingdom
  Accounting Dates: Last Period End 16/11/2022, Current Period End 16/11/2024


In [23]:
prompt = """
From Management Details extract:
Managed By:
Managed By Email:
"""
response = chat_engine.chat(prompt)
print(str(response))

 Managed By: Caroline McPartland
  Managed By Email: cmcpartland@diligent.com


In [24]:
prompt = """
Past Names of the Company with their period 
"""
response = chat_engine.chat(prompt)
print(str(response))

 The company had a past name "Aardvark and Son Ltd" which was used from 20/10/2021 to 20/10/2022.
  The company was previously known as "Aardvark Construction" from 20/10/2020 to 20/10/2021.
  The company was incorporated as "Aardvark Constructions Limited" from 20/10/2020 onwards.


In [25]:
prompt = """
Appointments Board Positions list with names and other details
"""
response = chat_engine.chat(prompt)
print(str(response))

1. Name: Adams, Martin
    Position: Director
    Appointed: 15/07/2024
    Job Title: Director
    Class: A Manager
    Secretary: Yes
 2. Name: Adams, Nicole
    Position: Alternate Director
    Appointed: 23/03/2023
    Job Title: CFO
    Class: A Manager
 3. Name: Adams, Rosslyn
    Position: Director
    Appointed: 13/06/2024
    Job Title: Solicitor
    Class: A Manager
 4. Name: Armstrong, Lance
    Position: Alternate Director
    Appointed: 14/08/2023
    Job Title: Company Director
    Class: A Manager
 5. Name: Ashcroft, John Peter
    Position: Director
    Appointed: 16/01/2002
    Job Title: Company Director
    Class: A Manager
 6. Name: Bamford, Malcolm Ernest
    Position: Director
    Appointed: 11/03/1996
    Job Title: Lawyer
    Class: A Manager
 7. Name: Beynon, Maldwyn
    Position: Director
    Appointed: 11/11/2023
    Job Title: Financial Arranger
    Class: A Manager
 8. Name: Blake, John
    Position: Director
    Appointed: 25/09/2002
    Job Title: Company