In [1]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.schema import Document
from dotenv import load_dotenv
import openai 
import shutil
import os

os.environ['OPENAI_API_KEY'] = '[my key]'

DATA_PATH = 'data/books'
CHROMA_PATH = "chroma"

def load_documents():
    loader = DirectoryLoader(DATA_PATH, glob='*.md')
    documents = loader.load()
    return documents
documents = load_documents()

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=300,
    length_function=len,
    add_start_index=True,
)
chunks = text_splitter.split_documents(documents)
print(f'Split {len(documents)} documents into {len(chunks)} chunks.')

Split 1 documents into 229 chunks.


In [3]:
document = chunks[10]
print(document.page_content)
print(document.metadata)

There were doors all round the hall, but they were all locked; and when
Alice had been all the way down one side and up the other, trying every
door, she walked sadly down the middle, wondering how she was ever to
get out again.

Suddenly she came upon a little three-legged table, all made of solid
glass; there was nothing on it except a tiny golden key, and Alice’s
first thought was that it might belong to one of the doors of the hall;
but, alas! either the locks were too large, or the key was too small,
but at any rate it would not open any of them. However, on the second
time round, she came upon a low curtain she had not noticed before, and
behind it was a little door about fifteen inches high: she tried the
little golden key in the lock, and to her great delight it fitted!
{'source': 'data\\books\\alice_in_wonderland.md', 'start_index': 6936}


In [4]:
#Create a new DB from the documents.
db = Chroma.from_documents(
    chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
)

In [5]:
# Create a new DB from the documents.
db = Chroma.from_documents(
    chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
)
#db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")   

Saved 229 chunks to chroma.


In [6]:
embedding_function = OpenAIEmbeddings()
vector = embedding_function.embed_query('apple')
print(len(vector))

1536


In [7]:
from langchain.evaluation import load_evaluator
evaluator = load_evaluator('pairwise_embedding_distance')
#Run an avaluation
x = evaluator.evaluate_string_pairs(prediction='apple', prediction_b='orange')
print(x)
x1 = evaluator.evaluate_string_pairs(prediction='apple', prediction_b='iphone')
print(x1)

{'score': 0.1354698831743597}
{'score': 0.09710853291781452}


In [8]:
query_text = 'How does the first meet of Alice and the Mad Hatter happen?'

In [9]:
# Prepare the DB.
embedding_function = OpenAIEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

In [10]:
# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=3)

In [11]:
if len(results) == 0 or results[0][1] < 0.7:
    print(f'Unable to find matching results.')    

In [12]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

In [13]:
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)

print(prompt)

model = ChatOpenAI()
response_text = model.invoke(prompt)

sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"Response: {response_text}\nSources: {sources}"
print(formatted_response)


Human: 
Answer the question based only on the following context:

“How dreadfully savage!” exclaimed Alice.

“And ever since that,” the Hatter went on in a mournful tone, “he won’t
do a thing I ask! It’s always six o’clock now.”

A bright idea came into Alice’s head. “Is that the reason so many
tea-things are put out here?” she asked.

“Yes, that’s it,” said the Hatter with a sigh: “it’s always tea-time,
and we’ve no time to wash the things between whiles.”

“Then you keep moving round, I suppose?” said Alice.

“Exactly so,” said the Hatter: “as the things get used up.”

“But what happens when you come to the beginning again?” Alice ventured
to ask.

“Suppose we change the subject,” the March Hare interrupted, yawning.
“I’m getting tired of this. I vote the young lady tells us a story.”

“I’m afraid I don’t know one,” said Alice, rather alarmed at the
proposal.

“Then the Dormouse shall!” they both cried. “Wake up, Dormouse!” And
they pinched it on both sides at once.

---

“How dreadf