## Install/Import all env

In [1]:
## Import ENV
import os
import time
from getpass import getpass
import kdbai_client as kdbai
import pandas as pd
import requests
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain_community.vectorstores import KDBAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [2]:
def read_api_key(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()
KDBAI_ENDPOINT = 'https://cloud.kdb.ai/instance/qpglwft3zm'
KDBAI_API_KEY = read_api_key('../API_Key/KDBAI_API_KEY.txt')    

OPENAI_API_KEY = read_api_key('../API_Key/OPENAI_API_KEY.txt')
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["KDBAI_API_KEY"] = KDBAI_API_KEY

In [3]:
TEMP = 0.0
K = 3

In [15]:
print("Create a KDB.AI session...")
session = kdbai.Session(endpoint=KDBAI_ENDPOINT, api_key=KDBAI_API_KEY)
print('Create table "documents"...')
schema = {
    "columns": [
        {"name": "id", "pytype": "str"},
        {"name": "text", "pytype": "bytes"},
        {
            "name": "embeddings",
            "pytype": "float32",
            "vectorIndex": {"dims": 1536, "metric": "L2", "type": "hnsw"},
        },
        {"name": "tag", "pytype": "str"},
        {"name": "title", "pytype": "bytes"},
    ]
}
if 'documents' in session.list():
    session.table("documents").drop()
table = session.create_table("documents", schema)

Create a KDB.AI session...
Create table "documents"...


# Load data 1:(A law paper)

In [17]:
%%time
PDF = "Déclaration_des_droits_de_l_homme_et_du_citoyen.pdf"

print("Read a PDF...")
loader = PyPDFLoader(PDF)
pages = loader.load_and_split()
len(pages)

Read a PDF...
CPU times: user 118 ms, sys: 16.8 ms, total: 135 ms
Wall time: 155 ms


3

In [18]:
%%time
print("Create a Vector Database from PDF text...")
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
texts = [p.page_content for p in pages]
metadata = pd.DataFrame(index=list(range(len(texts))))
metadata["tag"] = "law"
metadata["title"] = "Déclaration des Droits de l'Homme et du Citoyen de 1789".encode(
    "utf-8"
)
vectordb = KDBAI(table, embeddings)
vectordb.add_texts(texts=texts, metadatas=metadata)

Create a Vector Database from PDF text...
CPU times: user 241 ms, sys: 44.5 ms, total: 286 ms
Wall time: 2.24 s


['aec3c93b-0ea1-476f-a20c-280e65f8c669',
 '8684fb3f-7510-4ca8-aef2-97c1e947e262',
 '43f0ab40-5834-44c3-8343-8448ee66aba8']

# Load data 2: (A biology paper)

In [19]:
%%time
print("Read a PDF...")
loader = PyPDFLoader("02.pdf")
pages = loader.load_and_split()
len(pages)

Read a PDF...
CPU times: user 100 ms, sys: 4.26 ms, total: 105 ms
Wall time: 105 ms


8

In [20]:
%%time
print("Create a Vector Database from PDF text...")
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
texts = [p.page_content for p in pages]
metadata = pd.DataFrame(index=list(range(len(texts))))
metadata["tag"] = "biology"
metadata["title"] = "Editorial: Charles Darwin, Jean-Baptiste Lamarck, and 21st centuryarguments on the fundamentals of biology".encode(
    "utf-8"
)
vectordb = KDBAI(table, embeddings)
vectordb.add_texts(texts=texts, metadatas=metadata)

Create a Vector Database from PDF text...
CPU times: user 68.4 ms, sys: 5.99 ms, total: 74.4 ms
Wall time: 2.34 s


['19e3d0a7-a66a-4c6f-ad52-295cf60f5d12',
 '20fdd073-db6b-474a-a504-c83c6e0a0e00',
 '1be85d3d-b959-4df1-b885-e450a1951be7',
 '192b51e0-3880-4536-96b2-d6b03d18c317',
 'f3f92e14-a980-44a9-9dd7-c15b02910c5f',
 'f9eda253-57ac-48cb-969b-d9c4a78b8c5d',
 '727b3ea3-4603-4abe-af08-f48f7c6b2a74',
 '03c28cf8-02ba-455f-8275-8ec02c5f7197']

## Design langchen(no memory(history) is used for now.)

In [22]:
%%time
print("Create LangChain Pipeline...")
qabot = RetrievalQA.from_chain_type(
    chain_type="stuff",
    llm=ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=TEMP),
    retriever=vectordb.as_retriever(search_kwargs=dict(k=K)),
    return_source_documents=True,
)

Create LangChain Pipeline...
CPU times: user 13.2 ms, sys: 3.46 ms, total: 16.6 ms
Wall time: 18.2 ms


In [23]:
%%time
Input_Q = "What is the weather today?"
Q = f"Is {Input_Q} relevant to any of the documents? 1 if yes else 0."
print(f"\n\n{Q}\n")
print(qabot.invoke(dict(query=Q))["result"])



Is What is the weather today?? relevant to any of the documents? 1 if yes else 0.

0
CPU times: user 26.4 ms, sys: 6.64 ms, total: 33 ms
Wall time: 1.78 s


In [24]:
%%time
Q = "Summarize the document in English:"
print(f"\n\n{Q}\n")
print(qabot.invoke(dict(query=Q))["result"])



Summarize the document in English:

The document is the Declaration of the Rights of Man and of the Citizen of 1789. It was written by the representatives of the French people and aims to declare the natural, inalienable, and sacred rights of every individual. These rights include freedom, property, security, and resistance to oppression. The document emphasizes the importance of equality and the principle that sovereignty resides in the nation. It also highlights the role of law in protecting individual rights and ensuring the common good. The document asserts the right to freedom of thought, expression, and religion, as well as the right to a fair trial and the presumption of innocence. It emphasizes the importance of public accountability and the need for a just and equitable distribution of resources. The document concludes by stating that any society that does not guarantee the protection of rights and the separation of powers does not have a constitution.
CPU times: user 19 ms,

In [25]:
%%time
Q = "What is the most relevant document in your database to the question:the fundamentals of biology in the 21st century? give me the title"
print(f"\n\n{Q}\n")
print(qabot.invoke(dict(query=Q))["result"])



What is the most relevant document in your database to the question:the fundamentals of biology in the 21st century? give me the title

The most relevant document in the database is titled "Editorial: Charles Darwin, Jean-Baptiste Lamarck, and 21st century arguments on the fundamentals of biology."
CPU times: user 19 ms, sys: 3.55 ms, total: 22.5 ms
Wall time: 2.09 s


In [26]:
%%time
Q = "What is the most relevant document in your database to the question:the Rights of Man and of the Citizen of 1789? give me the title"
print(f"\n\n{Q}\n")
print(qabot.invoke(dict(query=Q))["result"])



What is the most relevant document in your database to the question:the Rights of Man and of the Citizen of 1789? give me the title

The most relevant document in my database to the question "the Rights of Man and of the Citizen of 1789" is the "Déclaration des Droits de l'Homme et du Citoyen de 1789" (Declaration of the Rights of Man and of the Citizen of 1789).
CPU times: user 20.1 ms, sys: 4.31 ms, total: 24.5 ms
Wall time: 2.62 s


In [27]:
%%time
Q = "Summarize this document(title:Editorial: Charles Darwin, Jean-Baptiste Lamarck, and 21st centuryarguments on the fundamentals of biology) in English:"
print(f"\n\n{Q}\n")
print(qabot.invoke(dict(query=Q))["result"])



Summarize this document(title:Editorial: Charles Darwin, Jean-Baptiste Lamarck, and 21st centuryarguments on the fundamentals of biology) in English:

This document is an editorial that discusses the arguments and relevance of Charles Darwin and Jean-Baptiste Lamarck's theories in the context of 21st-century biology. The author highlights the challenges to the gene-centric approach in evolutionary biology and calls for a more inclusive and multi-scale approach. The editorial emphasizes the need for a paradigm shift in understanding the underlying causal dynamics in living systems and evolution. The author also encourages further debate and contributions on these issues. The document provides historical background on Lamarck and Darwin's theories and challenges the notion that the Modern Synthesis completely replaced non-Darwinian theories of evolution. Overall, the editorial aims to stimulate discussion and presents a framework for a more comprehensive biological synthesis.
CPU times

In [28]:
%%time
Q = "What are the rights and duties of the man, the citizen and the society ?"
print(f"\n\n{Q}\n")
print(qabot.invoke(dict(query=Q))["result"])



What are the rights and duties of the man, the citizen and the society ?

According to the Declaration of the Rights of Man and of the Citizen of 1789, the rights and duties of man, citizen, and society are as follows:

Rights of Man:
1. Men are born and remain free and equal in rights. Social distinctions can only be based on common utility.
2. The purpose of political association is to protect the natural and imprescriptible rights of man, which include liberty, property, security, and resistance to oppression.
3. Sovereignty resides essentially in the nation, and no individual or group can exercise authority that does not come from the nation.
4. Freedom consists of being able to do anything that does not harm others. The exercise of natural rights is limited only by the rights of others, as determined by the law.
5. The law has the right to prohibit only actions that are harmful to society. Anything not prohibited by law cannot be prevented, and no one can be compelled to do what

In [29]:
table.drop() # Rem

True