In [24]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
import os
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import TextLoader

In [25]:
loader = TextLoader("example-info.txt")
data = loader.load()

In [26]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap  = 100,
    length_function = len,
    add_start_index = True,)
texts = text_splitter.split_documents(data)

In [28]:
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
pinecone.init(
            api_key= os.environ['PINECONE_API_KEY'],
            environment= 'asia-southeast1-gcp-free'
)
index_name = pinecone.Index('coursecrafter')
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

In [29]:
vectordb = Pinecone.from_documents(texts, embeddings, index_name='coursecrafter')

In [30]:
retriever = vectordb.as_retriever()

In [31]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages= True)
chain = ConversationalRetrievalChain.from_llm(llm, retriever= retriever, memory= memory)

In [33]:
query = 'which professor has the lowest gpa'
chain.run({'question': query})

'The professor with the lowest GPA is Professor HUANG R (H) with a GPA of 3.069.'

In [36]:
query = 'what about the highest gpa?'
chain.run({'question': query})

'The professor with the highest GPA is HU X with a GPA of 3.672.'