In [1]:
import os
from dotenv import load_dotenv
from langchain.document_loaders import  PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from IPython.display import display
import ipywidgets as widgets


In [7]:
load_dotenv()

OPENAI_API_KEY = os.getenv('OEPNAI_API_KEY')

In [8]:
loader = PyPDFDirectoryLoader('./pdfs/')

In [9]:
loader.load()[0]

Document(page_content='Attention Is All You Need\nAshish Vaswani\x03\nGoogle Brain\navaswani@google.comNoam Shazeer\x03\nGoogle Brain\nnoam@google.comNiki Parmar\x03\nGoogle Research\nnikip@google.comJakob Uszkoreit\x03\nGoogle Research\nusz@google.com\nLlion Jones\x03\nGoogle Research\nllion@google.comAidan N. Gomez\x03y\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser\x03\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin\x03z\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more paral

In [29]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 50,
    length_function = len,
)
documents = loader.load()
print(documents)
chunks = text_splitter.split_documents(documents)#[doc.page_content for doc in loader.load()])

print(chunks[:2])

[Document(page_content='arXiv:1305.0168v1  [quant-ph]  1 May 2013The classical limit of quantum optics: not what it seems at ﬁr st sight\nYakir Aharonov1,2, Alonso Botero3, Shmuel Nussinov1, Sandu Popescu4, Jeﬀ Tollaksen2, and Lev Vaidman1,2\n1School of Physics and Astronomy, Tel Aviv University, Tel Av iv, Israel\n2Institute of Quantum Studies and Faculty of Physics,\nChapman University, 1 University Drive, Orange, CA 92866, U SA\n3Departamento de Fisica, Universidad de Los Andes, Bogota, C olumbia and\n4H.H.Wills Physics Laboratory, University of Bristol, Tynd all Avenue, Bristol BS8 1TL, U.K.\nWhat is light and how to describe it has always been a central s ubject in physics. As our under-\nstanding has increased, so have our theories changed: Geome trical optics, wave optics and quantum\noptics are increasingly sophisticated descriptions, each referring to a larger class of phenomena than\nits predecessor. But how exactly are these theories related ? How and when wave optics reduce

In [31]:
#Avoid unescaped unicode characters such as \\u0000
for chunk in chunks:
    chunk.page_content = chunk.page_content.encode("ascii", "ignore").decode()

In [32]:
# Get embedding model
embeddings = OpenAIEmbeddings()

# Create vector database
#db = FAISS.from_documents(chunks, embeddings)

from langchain.vectorstores import SupabaseVectorStore
from supabase.client import Client, create_client
supabase_url = os.environ.get("PUBLIC_SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY")
print(supabase_url,supabase_key)
supabase: Client = create_client(supabase_url, supabase_key)
db = SupabaseVectorStore.from_documents(chunks, embeddings, client=supabase, tablename='documents')


http://localhost:3003/ eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU


In [None]:
query = "What is the classical limit of quantum optics?"
docs = db.similarity_search(query)
docs[:3]

In [None]:
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")

query = "What is the classical limit of quantum optics?"
docs = db.similarity_search(query)

chain.run(input_documents=docs, question=query)

In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), db.as_retriever(), memory=memory)

In [None]:
def on_submit(_):
    query= input_box.value
    input_box.value = ""
    if query.lower() == 'exit':
        print('Thanks, c ya')
        return 
    result = qa({"question": query})

    display(widgets.HTML(f'<b>User:</b> {query}'))
    display(widgets.HTML(f'<b><font color="blue">Chatbot:</font></b> {result["answer"]}'))

print('Welcome to PDFChat where you can chat with research papers.')
input_box = widgets.Text(placeholder='Please enter your question:')
input_box.on_submit(on_submit)

display(input_box)