In [1]:
import langchain
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader,TextLoader
from langchain.text_splitter import  RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
import config 

import os
os.environ["PINECONE_API_KEY"] = config.PineconeKey
Pineconekey = config.PineconeKey
OpenAIKey = config.OpenAIKey
IndexName = config.IndexName
Localize = config.localize


In [2]:
file_path = "docs/230811236.pdf"   # load pdf text
loader = PyPDFLoader(file_path)  
data = loader.load()

In [None]:
print({len(data)})
# print({len(data[0].page_content)})
print(data)

In [4]:
#Select a splitter and split the text into multiple chunks.
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 
texts = loader.load_and_split(splitter)

In [None]:
# print(len(texts))
texts[2]

In [None]:
#Use OpenAIEmbeddings 
import getpass
import os
embeddings =OpenAIEmbeddings(api_key=OpenAIKey)
embeddings

In [None]:
vectors = embeddings.embed_query("hi")
vectors

In [None]:
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore


#Decide whether to use Pinecone or Chroma.

if(Localize):
    persist_directory = 'db2'
    vectorstore = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=persist_directory)   
    print("Database: Chroma")

else:
    pc = Pinecone(api_key=Pineconekey)
    index = pc.Index(IndexName)
    index.describe_index_stats ()

    pv= PineconeVectorStore(pinecone_api_key=Pineconekey,embedding=embeddings,index_name= IndexName)
    vectorstore = pv.from_documents(texts, embeddings,index_name = IndexName)
    print("Database: Pinecone")


In [None]:
query = "What is ros"
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)

In [10]:
from langchain.chains.prompt_selector import ConditionalPromptSelector, is_chat_model
from langchain.prompts import PromptTemplate,MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import format_document
from langchain_core.runnables import RunnableParallel
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.prompts import format_document
from langchain_core.runnables import RunnableParallel
from operator import itemgetter


In [11]:
# setting model "gpt-3.5-turbo"
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo",api_key= OpenAIKey)

In [12]:


_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""




template = """You are an expert researcher. Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say you don't know. DO NOT try to make up an answer.
If the question is not related to the context or chat history, politely respond that you are tuned to only answer questions that are related to the context.

{context}

Question: {question}

Helpful answer in markdown:"""

In [13]:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

retriever = vectorstore.as_retriever()


In [14]:
_inputs = RunnableParallel(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: get_buffer_string(x["chat_history"])
    )
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
)
_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"],
}
conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | llm

In [None]:
conversational_qa_chain.invoke(
    {
        "question": "ROSGPT",
        "chat_history": [],
    }
)



In [None]:
conversational_qa_chain.invoke(
    {
        "question": "whst is ROS",
        "chat_history": [
            HumanMessage(content="ROSGPT"),
            AIMessage(content="ROSGPT is a framework that includes ROSGPT and ROSGPT Vision. ROSGPT is a broker that converts human commands given in natural language to explicit robotic commands. On the other hand, ROSGPT Vision allows the execution of robotic tasks using visual and language prompts, enabling robots to address complex real-world scenarios by processing visual data and making informed decisions."),
        ],
    }
)

In [17]:
chat_history = []

question = "What is Task Decomposition?"
ai_msg =conversational_qa_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

In [None]:
second_question = "What are common ways of doing it?"
conversational_qa_chain.invoke({"question": second_question, "chat_history": chat_history})