# To Do

include sources by paragraph:  
  https://python.langchain.com/docs/use_cases/question_answering/how_to/vector_db_qa  
  first split by paragraph   
  assign unique paragraph key  
  pass and prompt to gpt  

implement streaming:  
  https://python.langchain.com/docs/modules/model_io/models/llms/streaming_llm  

improve gradio

# Import

In [2]:
from dotenv import load_dotenv
import os
import openai

# Langchain tools
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Qdrant
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

# Gradio
import gradio as gr

# Setup

In [3]:
# API key
load_dotenv()
openai.api_key  = os.environ['OPENAI_API_KEY']

# PDF path
pdf_path = "./pdf/Dupre_economy_as_science.pdf"

# Setup LLM
llm = OpenAI(temperature=0, top_p=0.1, n=1)

# Embedding

In [4]:
pdf_path = "./pdf/Dupre_economy_as_science.pdf"
loader = PyPDFLoader(pdf_path)
docs = loader.load_and_split()
embeddings = OpenAIEmbeddings()
qdrant = Qdrant.from_documents(
    docs,
    embeddings,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="Dupre",
)

# Prompting

In [14]:
# Setup system prompt

prompt_template = """Use the following pieces of context to answer the question 
at the end. If you don't know the answer, just say that you don't know, 
don't try to make up an answer. Makes sure to answer the question thoroughly.

{context}

Question: {question}
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)


In [9]:
# Give some user prompt examples

example_questions=[
    "What is the main argument of this paper?",
    "How does the author support the main argument?",
    "What is the author's conclusion?",
]

# Chat GUI

In [27]:
# Define QA function
# This works

def get_answers(query, history):
    found_docs = qdrant.max_marginal_relevance_search(query, k=2, fetch_k=10)
    chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
    summary = chain.run({'question': query, 'input_documents': found_docs})
    return summary

In [15]:
# Define QA function
# experimental

def get_answers_exp(query, history):
    found_docs = qdrant.max_marginal_relevance_search(query, k=2, fetch_k=10)
    chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
    summary = chain.run(question = query, input_documents = found_docs)
    return summary

In [16]:
# Run Gradio

pdf_chatter = gr.ChatInterface(fn=get_answers_exp, examples=example_questions, title="PDF Chatter")
pdf_chatter.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


