In [3]:
# Load environment variables
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
# Setup a document loader
from langchain.document_loaders import DirectoryLoader

loader = DirectoryLoader('./', "*.txt")
docs = loader.load()
print(docs[0].page_content[:100])

You are a Ai bot for LaSara Medical Group

About LaSara Medical Group LaSara Medical Group specializ


In [5]:
# Split the text
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False
)

texts = text_splitter.split_documents(docs)
texts[0].page_content

'You are a Ai bot for LaSara Medical Group'

In [6]:
# Create an embedding
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [13]:
# Create a vector store using zep
from langchain.vectorstores.zep import CollectionConfig
from langchain.vectorstores import ZepVectorStore
import os

# Collection config is needed if we're creating a new Zep Collection
config = CollectionConfig(
    name='lasaraknowledge',
    description="documents regarding lasara medical group",
    metadata={},
    is_auto_embedded=False,
    embedding_dimensions=1536  # this should match the model you've configured Zep to use.
)

vectordb = ZepVectorStore.from_documents(
    documents=texts, 
    collection_name='lasaraknowledge',
    embedding=embeddings, 
    config=config,
    api_url=os.environ['ZEP_API_URL'],
)

In [14]:
# Search in our vector store
query = "I want a free consultation."
docs = vectordb.similarity_search(query)
print(docs[0].page_content)

for schedule free consultation visit https://calendly.com/d/grf-5sr-mtt this url


In [15]:
# Create a retriever from the vector db
retriever = vectordb.as_retriever()
retriever.get_relevant_documents(query=query)

[Document(page_content='for schedule free consultation visit https://calendly.com/d/grf-5sr-mtt this url', metadata={'source': 'booking_url.txt'}),
 Document(page_content='with the doctor. Testimonials', metadata={'source': 'about_page.txt'}),
 Document(page_content='first step towards restoring natural functionality by scheduling a visit with the doctor.', metadata={'source': 'about_page.txt'}),
 Document(page_content='Testimonials:', metadata={'source': 'home_page.txt'})]

In [16]:
# Create a conversation agent
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0, max_tokens=500)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

In [17]:
# Test the retriever
response = qa_chain(query)
response['result']

'You can schedule a free consultation with the doctor by visiting this URL: https://calendly.com/d/grf-5sr-mtt.'