In [7]:
import openai
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_EMBEDDING_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

#init Azure OpenAI
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

In [8]:
from langchain.document_loaders import PyPDFLoader

pdfs = os.scandir('../transcript')
loaders = []
for i in pdfs:
    loaders.append(PyPDFLoader(i.path))

docs = []
for loader in loaders:
    docs.extend(loader.load())

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
)
splits = text_splitter.split_documents(docs)
print(splits)

[Document(page_content="MachineLearning-Lecture01  \nInstructor (Andrew Ng):  Okay. Good morning. Welcome to CS229, the machine \nlearning class. So what I wanna do today is ju st spend a little time going over the logistics \nof the class, and then we'll start to  talk a bit about machine learning.  \nBy way of introduction, my name's  Andrew Ng and I'll be instru ctor for this class. And so \nI personally work in machine learning, and I' ve worked on it for about 15 years now, and \nI actually think that machine learning is th e most exciting field of all the computer \nsciences. So I'm actually always excited about  teaching this class. Sometimes I actually \nthink that machine learning is not only the most exciting thin g in computer science, but \nthe most exciting thing in all of human e ndeavor, so maybe a little bias there.  \nI also want to introduce the TAs, who are all graduate students doing research in or \nrelated to the machine learni ng and all aspects of machin e learn

In [10]:
len(splits)

279

In [11]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
embedding=OpenAIEmbeddings(deployment=OPENAI_EMBEDDING_DEPLOYMENT_NAME,model=OPENAI_EMBEDDING_MODEL_NAME, chunk_size=1)

In [12]:
# from langchain.vectorstores import FAISS
# persist_directory = 'docs/chroma/'

# vectordb = FAISS.from_documents(
#     documents=splits,
#     embedding=embedding
# )

# vectordb.save_local("dbs/documentation/faiss_index")



In [13]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.llms import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI

In [14]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))


In [15]:
vectordb = FAISS.load_local("dbs/documentation/faiss_index", embeddings=embedding)
# from langchain.vectorstores import DocArrayInMemorySearch
# vectordb = DocArrayInMemorySearch.from_documents(splits, embedding)

llm = AzureChatOpenAI(    
                  deployment_name=OPENAI_DEPLOYMENT_NAME,
                      model=OPENAI_MODEL_NAME,
                      openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
                      openai_api_version=OPENAI_DEPLOYMENT_VERSION,
                      openai_api_key=OPENAI_API_KEY
                     )

In [16]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)

In [17]:
question = "What are major topics for this class?"
docs = vectordb.similarity_search(question,k=3)

In [18]:
# result = qa_chain.run("what are the major topics covered in the course?")
# result

In [19]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [20]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [21]:
# # LANGCHAIN_TRACING_V2 = "true"
# # LANGCHAIN_ENDPOINT = "https://api.langchain.plus"
# # LANGCHAIN_API_KEY = "ls__9439049e51634c3db7599568acb21f5e"

# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
# os.environ["LANGCHAIN_API_KEY"] = "ls__9439049e51634c3db7599568acb21f5e"

In [22]:
# qa_chain_mr = RetrievalQA.from_chain_type(
#     llm,
#     retriever=vectordb.as_retriever(),
#     chain_type="map_reduce"
# )
# result = qa_chain_mr({"query": question})
# result["result"]

In [23]:
import panel as pn  # GUI
pn.extension()


In [24]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [25]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
    # ,
    # verbose=True
)

In [26]:
# question = "who are the TA's"
# result = qa({"question": question})
# result

In [27]:
question = "Give me a summary of lecture 1"
result = qa.run(question)
print(result)

In lecture 1 of the machine learning class, the instructor, Andrew Ng, introduces the logistics of the class and talks briefly about machine learning. He also introduces the TAs who are all graduate students doing research in or related to machine learning. Andrew Ng mentions that he has been working on machine learning for about 15 years now and thinks that it is the most exciting field of all the computer sciences. He also hands out course information handouts and talks about the online resources available.
