### References Langchain
https://python.langchain.com/v0.1/docs/use_cases/question_answering/quickstart/

### References Chroma db  
https://docs.trychroma.com/guides  
https://github.com/neo-con/chromadb-tutorial  
https://python.langchain.com/v0.1/docs/integrations/vectorstores/chroma/  


In [1]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
import os
import sys
sys.path.append('../..')

from py3810.myUtils import pickle_dump, pickle_load
path_lumen_docs = '..\langchain\docs\lumen\\docs\\'
path_chatbot = '..\langchain\chatbot\\'

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv('.env\my_api_key.env')) # read local .env file

os.environ["OPENAI_API_KEY"]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"]
os.environ["ToMarkDown_API_KEY"]
os.environ["SECRET_KEY"]


'my_secret_key'

In [3]:
import textwrap

def print_wrapped(text, width=80):
  """
  Prints a long string to the console, wrapped to fit within a specified width.

  Args:
      text: The long string to be wrapped.
      width: The desired width for each line (default: 80 columns).
  """
  wrapped_text = textwrap.wrap(text, width=width)
  for line in wrapped_text:
    print(line)

# Example usage
long_string = "This is a very long string that needs to be wrapped to fit within 80 columns. It can contain spaces, punctuation, and even newlines."
print_wrapped(long_string)

This is a very long string that needs to be wrapped to fit within 80 columns. It
can contain spaces, punctuation, and even newlines.


In [4]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [5]:
persist_directory = path_chatbot + "./chroma_openai_ef"
embedding_function = OpenAIEmbeddings()
collection_name = 'lumen_docs_combined'

In [6]:
my_query = "What is lumen optometric's address?"

#### Only run cell below to add new data


In [7]:
from langchain.docstore.document import Document

lumen_address_phone_hours = \
  "Lumen Optometric address is located at 14 West Sierra Madre Blvd, Sierra Madre, CA 91024. \
   Lumen Optometric office is located at 14 West Sierra Madre Blvd, Sierra Madre, CA 91024. \
   Lumen Optometric location is 14 West Sierra Madre Blvd, Sierra Madre, CA 91024. \
   Lumen Optometric phone number is (626) 921-0199. \
   Lumen Optometric office hours are Tuesday, Wednesday, Friday, and Saturday from 9:45 am to 5:30 pm, \
   and Thursday from 9:45 am to 1:30 pm."

doc_lumen_address_phone_hours =  [Document(
  page_content=lumen_address_phone_hours,
  metadata={"description:": "address, office location, phone number, office hours"}
  )]

# load data
doc_0 = pickle_load(filename_pickle='lumen_docs_website', path_pickle_dump=path_lumen_docs)
doc_1 = pickle_load(filename_pickle='lumen_docs_pdfs', path_pickle_dump=path_lumen_docs)
doc_2 = pickle_load(filename_pickle='lumen_docs_videos', path_pickle_dump=path_lumen_docs)
doc_3 = pickle_load(filename_pickle='lumen_docs_youtube', path_pickle_dump=path_lumen_docs)
docs = doc_lumen_address_phone_hours + doc_0 + doc_1 + doc_2 + doc_3

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# save to disk
db0 = Chroma.from_documents(
  documents=splits,
  embedding=embedding_function,
  collection_name=collection_name,
  persist_directory=persist_directory
  )

db0_ans = db0.similarity_search(my_query)
# print(docs[0].page_content)
print(f'db0_ans:{db0_ans}')

db0_ans:[Document(page_content='local experts at Lumen Optometric are here to help. Patients Are at the Heart\nof Everything We Do Our primary tenet is to provide excellent patient care\nthat balances clarity, comfort and optimal health over your lifetime. Lumen\nOptometric has knowledgeable, caring eye specialists who are more than happy\nto share their time and expertise with you. Our areas of concentration\ninclude: Comprehensive eye exams Orthokeratology (Ortho-K) Neurolens therapy\nScleral lenses for Keratoconus or High Prescriptions Whether you’re in for an\nannual exam or a more specialized treatment, you can count on personalized\nservice and the highest quality of eye care. Lumen Optometric has a full\ncollection of stylish, durable, high-performance products to protect your\nvision. And to make your experience even more convenient and affordable, we\naccept most major vision insurance. Schedule Your Exceptional Eye Care\nExperience When you trust your eye care to Lumen Optome

In [8]:

# load from disk
vectorstore = Chroma(
  embedding_function=embedding_function,
  collection_name=collection_name,
  persist_directory=persist_directory
  )
ans = vectorstore.similarity_search(my_query)
print(f'ans:{ans}')

ans:[Document(page_content='Lumen Optometric address is located at 14 West Sierra Madre Blvd, Sierra Madre, CA 91024.    Lumen Optometric office is located at 14 West Sierra Madre Blvd, Sierra Madre, CA 91024.    Lumen Optometric location is 14 West Sierra Madre Blvd, Sierra Madre, CA 91024.    Lumen Optometric phone number is (626) 921-0199.    Lumen Optometric office hours are Tuesday, Wednesday, Friday, and Saturday from 9:45 am to 5:30 pm,    and Thursday from 9:45 am to 1:30 pm.', metadata={'description:': 'address, office location, phone number, office hours'}), Document(page_content="patient care. Our professional optometrist offers cutting-edge, holistic eye\ncare that restores clarity, comfort and health to your vision. We use the\nlatest technologies to accurately diagnose and treat an assortment of eye\nconditions, so reach out and let Lumen Optometric give you a high-quality\noptical exam. To request an eye appointment or to learn more about Lumen\nOptometric, please call o

In [11]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:
queries = [
  # "14 West Sierra Madre Boulevard",
  "What is lumen optometric's phone number?",
  "What is lumen optometric's address?",
  "What is lumen optometric's location?",
  "do you take vision insurance",
  "what type of insurance do you take",
  "what are the names of the insurance that you take",
  "What is Ortho-k",
  "What does research say about Ortho-k",
  "What equipment do they have?",
  "What equipment do you use for othro-k?",
  ]

In [15]:
# for query in queries[0:2]:
for query in queries:
  print(f'query: {query}')
  print_wrapped(f'answr: {rag_chain.invoke(query)}')
  print('')
  print_wrapped(f'relevant docs: {retriever.get_relevant_documents(query)}')
  print(f'{"="*5}\n')

query: What is lumen optometric's phone number?
answr: You can contact Lumen Optometric at (626) 921-0199.

relevant docs: [Document(page_content='(626) 921-0199
info@lumenoptometric.comServicesComprehensive Eye Exams Contact\nLens Exams
Orthokeratology Neurolens Therapy Scleral Lenses / Keratoconus\nQuick LinksHome
About Us Sitemap Hours Of OperationTuesday9:45 am - 5:30\npmWednesday9:45 am -
5:30 pmThursday9:45 am - 1:30 pmFriday9:45 am - 5:30\npmSaturday9:45 am - 5:30
pmCopyright © 2020 Lumen Optometric. All Rights\nReserved.X[contact-form-7 404
"Not Found"]Top', metadata={'description': 'The trusted eye exam doctor from
Lumen Optometric highlights the importance of a healthy diet to reduce the risk
of cataracts.', 'language': 'en-US', 'source':
'https://www.lumenoptometric.com/blog/eye-care/how-eating-a-healthy-diet-can-
reduce-the-risk-of-cataracts/', 'title': 'How Eating a Healthy Diet Can Reduce
the Risk of Cataracts'})]
=====

query: What is lumen optometric's address?
answr: L