In [1]:
import os
import warnings
from openai import OpenAI

# Define OpenAI API_KEY
with open("/home/savitha07/.env") as env:
    for line in env:
        key, value = line.strip().split('=')
        os.environ[key] = value

client = OpenAI(
    api_key=os.environ.get('OPENAI_API_KEY'),
)

os.environ["TAVILY_API_KEY"] = os.environ.get('OPENAI_API_KEY')

warnings.filterwarnings("ignore")

In [2]:
# Model selection
import datetime

current_date = datetime.datetime.now().date()
target_date = datetime.date(2024, 6, 12)

if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [3]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings


In [4]:
# Load the PDF file

from langchain.document_loaders import PyPDFLoader

loaders = [
    # Duplicate documents on purpose - messy data
PyPDFLoader(
      "docs/MachineLearning-Lecture01.pdf"),
    PyPDFLoader(
      "docs/MachineLearning-Lecture01.pdf"),
    PyPDFLoader(
      "docs/MachineLearning-Lecture02.pdf"),
    PyPDFLoader(
      "docs/MachineLearning-Lecture03.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [5]:

# Document splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

splits = text_splitter.split_documents(docs)

len(splits)

228

In [6]:
# Load document and create VectorDB

persist_directory = 'docs/chroma/'
embedding = OpenAIEmbeddings()
vectordb = Chroma(embedding_function=embedding)

In [7]:
# 3: Similarity Search to select relevant chunks (splits)

question = "What are major topics for this class?"
docs = vectordb.similarity_search(question,k=3)
len(docs)


0

In [8]:
# Create LLM

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm.predict("Hello world!")

  warn_deprecated(


'Hello! How can I assist you today?'

In [9]:
# RetrievalQA Chain

from langchain.prompts import PromptTemplate
template = """Use the following pieces of \
   context to answer \
   the question at the end. If you don't know \
   the answer, \
   just say that you don't know, don't try \
   to make up an \
   answer. Use three sentences maximum. \
   Keep the answer as \
   concise as possible. Always say \
   "thanks for asking!" \
   at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

In [10]:
QA_CHAIN_PROMPT = PromptTemplate(
     input_variables=["context", "question"],
     template=template,)

In [11]:
from langchain.chains import RetrievalQA

question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
   retriever=vectordb.as_retriever(),
   return_source_documents=True,
   chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

  warn_deprecated(


'Yes, probability is a class topic in mathematics courses. It involves the study of chance and likelihood of events occurring. Thanks for asking!'

In [12]:
# ConversationalRetrievalChain

from langchain.memory import ConversationBufferMemory

# Create memory
memory = ConversationBufferMemory(
    memory_key="chat_history", 
    return_messages=True
)

In [13]:
# QA

from langchain.chains import ConversationalRetrievalChain

retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [14]:
# Testing

# First question
question = "Is probability a class topic?"
result = qa({"question": question})

result['answer']

'Yes, probability is a common topic covered in mathematics classes, particularly in courses like statistics, probability theory, and sometimes even in introductory math classes. It is an important concept in understanding uncertainty and making informed decisions based on data.'

In [15]:
# Followup question

question = "why are those prerequesites needed?"
result = qa({"question": question})

# In[ ]:

result['answer']

'Prerequisites for probability classes are typically needed to ensure that students have the necessary mathematical background to understand and engage with the concepts and techniques covered in the course. Probability involves mathematical concepts such as algebra, calculus, and statistics, so having a strong foundation in these areas is important for success in a probability class.'

In [16]:
# Create a chatbot that works on your documents

from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

In [17]:
def load_db(file, chain_type, k):
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()
    # split documents
    text_splitter = RecursiveCharacterTextSplitter(
           chunk_size=1000, 
           chunk_overlap=150)
    docs1 = text_splitter.split_documents(documents)
    # define embedding
    embeddings = OpenAIEmbeddings()
    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs1, 
           embeddings)
    # define retriever
    retriever = db.as_retriever(search_type="similarity", 
           search_kwargs={"k": k})
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0), 
        chain_type=chain_type, 
        retriever=retriever, 
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa 

In [22]:
# ! pip install panel

Collecting panel
  Downloading panel-1.3.8-py2.py3-none-any.whl.metadata (24 kB)
Collecting bokeh<3.4.0,>=3.2.0 (from panel)
  Downloading bokeh-3.3.4-py3-none-any.whl.metadata (12 kB)
Collecting param<3.0,>=2.0.0 (from panel)
  Downloading param-2.0.2-py3-none-any.whl.metadata (5.9 kB)
Collecting pyviz-comms>=2.0.0 (from panel)
  Downloading pyviz_comms-3.0.1-py3-none-any.whl.metadata (7.5 kB)
Collecting xyzservices>=2021.09.1 (from panel)
  Downloading xyzservices-2023.10.1-py3-none-any.whl.metadata (4.0 kB)
Collecting markdown (from panel)
  Downloading Markdown-3.5.2-py3-none-any.whl.metadata (7.0 kB)
Collecting markdown-it-py (from panel)
  Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)
Collecting linkify-it-py (from panel)
  Downloading linkify_it_py-2.0.3-py3-none-any.whl.metadata (8.5 kB)
Collecting mdit-py-plugins (from panel)
  Downloading mdit_py_plugins-0.4.0-py3-none-any.whl.metadata (2.7 kB)
Collecting uc-micro-py (from linkify-it-py->panel)
  Downloa

In [24]:
import panel as pn
import param

In [32]:
# cbfs class

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])

    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.loaded_file = "docs/MachineLearning-Lecture01.pdf"
        self.qa = load_db(self.loaded_file,"stuff", 4)

    def call_load_db(self, count):
        # init or no file specified :
        if count == 0 or file_input.value is None:  
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")


In [33]:
    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', 
               pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, 
                          "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, 
               width=600, 
               style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)


In [36]:
    @param.depends('db_query ', )
    def convchain(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", 
            styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),pn.pane.Str(self.db_query ))

In [37]:
    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", 
            styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)


In [38]:
    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(
                  pn.Row(pn.pane.Str("No History Yet")), 
                   width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(
            f"Current Chat History variable", 
            styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)


In [39]:
    def clr_history(self,count=0):
            self.chat_history = []
            return
