#### Vector Store Practice

In [15]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_classic.schema import Document

In [2]:
# Document Loading
file_path = "./files/sample.txt"
docs = TextLoader(file_path).load()
txt_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0, separator="\n")
documents = txt_splitter.split_documents(docs)
for i, doc in enumerate(documents):
    print(f"\n--- Chunk {i+1} ---")
    print(doc.page_content)

Created a chunk of size 294, which is longer than the specified 100
Created a chunk of size 159, which is longer than the specified 100
Created a chunk of size 148, which is longer than the specified 100
Created a chunk of size 188, which is longer than the specified 100
Created a chunk of size 255, which is longer than the specified 100
Created a chunk of size 323, which is longer than the specified 100
Created a chunk of size 320, which is longer than the specified 100



--- Chunk 1 ---
Mercury is the smallest planet in our solar system and nearest to the Sun. It's only slightly larger than Earth's Moon. From the surface of Mercury, the Sun would appear more than three times as large as it does when viewed from Earth, and the sunlight would be as much as seven times brighter.

--- Chunk 2 ---
Venus is the second planet from the Sun, and our closest planetary neighbor. It's the hottest planet in our solar system, and is sometimes called Earth's twin.

--- Chunk 3 ---
Earth – our home planet – is the third planet from the Sun, and the fifth largest planet. It's the only place we know of inhabited by living things.

--- Chunk 4 ---
Mars – the fourth planet from the Sun – is a dusty, cold, desert world with a very thin atmosphere. This dynamic planet has seasons, polar ice caps, extinct volcanoes, canyons and weather.

--- Chunk 5 ---
Jupiter is a world of extremes. It's the largest planet in our solar system – if it were a hollow shell, 1,000 Earths coul

In [3]:
import os
cache_dir = 'D:/Development/ML/Deep Learning/GenAI/.hf_cache'
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir
os.environ['HF_DATASETS_CACHE'] = cache_dir
os.environ["SENTENCE_TRANSFORMERS_HOME"] = cache_dir
os.makedirs(cache_dir, exist_ok=True)

# Embedding Model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [4]:
vector_store = Chroma(
    embedding_function=embedding_model,
    collection_name="planet_docs",
    persist_directory="./files/chroma_db"
)

vector_store.add_documents(documents)

  vector_store = Chroma(


['4a803557-8b7e-450d-ab96-d08d7a808ff8',
 'aab0df9d-6a71-45d8-a780-b71928f7f322',
 '31c881c0-8df6-4e95-8cbd-fa1398c7a0ec',
 '62b74cd4-3f25-498d-b892-130d62dea2d4',
 'ade66c8e-7d5c-49a6-9868-9ed810f3b4e0',
 '2b491777-7eaf-4860-a7b9-949c5bad4d34',
 'd260b606-d85a-45b6-855a-51e59d6c5926',
 'd591c2a7-2923-4e09-8763-efc5c7fb2fcc']

In [6]:
vector_store.get(include=["metadatas", "documents", "embeddings"])

{'ids': ['4a803557-8b7e-450d-ab96-d08d7a808ff8',
  'aab0df9d-6a71-45d8-a780-b71928f7f322',
  '31c881c0-8df6-4e95-8cbd-fa1398c7a0ec',
  '62b74cd4-3f25-498d-b892-130d62dea2d4',
  'ade66c8e-7d5c-49a6-9868-9ed810f3b4e0',
  '2b491777-7eaf-4860-a7b9-949c5bad4d34',
  'd260b606-d85a-45b6-855a-51e59d6c5926',
  'd591c2a7-2923-4e09-8763-efc5c7fb2fcc'],
 'embeddings': array([[ 0.09975477,  0.0664135 ,  0.0276264 , ...,  0.0165581 ,
          0.00745496,  0.00776949],
        [ 0.00492019, -0.08366785, -0.04227234, ...,  0.04285121,
          0.00234763,  0.04382214],
        [ 0.09682267, -0.05642878, -0.01469302, ...,  0.02072895,
          0.01720516,  0.08990765],
        ...,
        [ 0.01711406, -0.05930492, -0.0433966 , ...,  0.05687283,
         -0.00185699,  0.05977885],
        [-0.00418941, -0.05522047,  0.02331413, ...,  0.05958568,
          0.04401881,  0.04979907],
        [ 0.02199316, -0.02118044,  0.01758816, ...,  0.02668057,
         -0.01599181,  0.0635022 ]], shape=(8, 384)),

In [8]:
vector_store.similarity_search_with_score(
    query="What is the largest planet in our solar system?",
    k=2
)

[(Document(metadata={'source': './files/sample.txt'}, page_content="Jupiter is a world of extremes. It's the largest planet in our solar system – if it were a hollow shell, 1,000 Earths could fit inside. It's also the oldest planet, forming from the dust and gases left over from the Sun's formation 4.6 billion years ago."),
  0.6881592273712158),
 (Document(metadata={'source': './files/sample.txt'}, page_content='Uranus is the seventh planet from the Sun, and has the third-largest diameter in our solar system. It was the first planet found with the aid of a telescope. Uranus is often called an "ice giant" planet because it is made mostly of elements heavier than hydrogen and helium, such as oxygen, carbon, nitrogen, and others.'),
  0.6946722865104675)]

In [11]:
vector_store.similarity_search_with_score(
    query="Cold planet in our solar system?",
    k=2
)

[(Document(metadata={'source': './files/sample.txt'}, page_content='Mars – the fourth planet from the Sun – is a dusty, cold, desert world with a very thin atmosphere. This dynamic planet has seasons, polar ice caps, extinct volcanoes, canyons and weather.'),
  0.8419954180717468),
 (Document(metadata={'source': './files/sample.txt'}, page_content='Uranus is the seventh planet from the Sun, and has the third-largest diameter in our solar system. It was the first planet found with the aid of a telescope. Uranus is often called an "ice giant" planet because it is made mostly of elements heavier than hydrogen and helium, such as oxygen, carbon, nitrogen, and others.'),
  0.8770096302032471)]

In [23]:
new_doc = Document(
    page_content="Pluto is the dwarf planet in our solar system. It has a very cold surface.",
    metadata={"temperature": "cold"}
)
vector_store.add_documents([new_doc])

['63ce601f-5b35-477b-84f4-d242137e4359']

In [24]:
vector_store.get()

{'ids': ['4a803557-8b7e-450d-ab96-d08d7a808ff8',
  'aab0df9d-6a71-45d8-a780-b71928f7f322',
  '31c881c0-8df6-4e95-8cbd-fa1398c7a0ec',
  '62b74cd4-3f25-498d-b892-130d62dea2d4',
  'ade66c8e-7d5c-49a6-9868-9ed810f3b4e0',
  '2b491777-7eaf-4860-a7b9-949c5bad4d34',
  'd260b606-d85a-45b6-855a-51e59d6c5926',
  'd591c2a7-2923-4e09-8763-efc5c7fb2fcc',
  '63ce601f-5b35-477b-84f4-d242137e4359'],
 'embeddings': None,
 'documents': ["Mercury is the smallest planet in our solar system and nearest to the Sun. It's only slightly larger than Earth's Moon. From the surface of Mercury, the Sun would appear more than three times as large as it does when viewed from Earth, and the sunlight would be as much as seven times brighter.",
  "Venus is the second planet from the Sun, and our closest planetary neighbor. It's the hottest planet in our solar system, and is sometimes called Earth's twin.",
  "Earth – our home planet – is the third planet from the Sun, and the fifth largest planet. It's the only place we

In [25]:
vector_store.similarity_search_with_score(
    query="",
    k=2,
    filter={"temperature": "cold"}
)

[(Document(metadata={'temperature': 'cold'}, page_content='Pluto is the dwarf planet in our solar system. It has a very cold surface.'),
  2.057088851928711)]

In [30]:
updated_doc = Document(
    page_content="Pluto is no longer classified as a planet; it is a dwarf planet in our solar system.",
    metadata={"temperature": "cold"}
)
vector_store.update_document(document_id="63ce601f-5b35-477b-84f4-d242137e4359", document=updated_doc)

In [31]:
vector_store.get()

{'ids': ['4a803557-8b7e-450d-ab96-d08d7a808ff8',
  'aab0df9d-6a71-45d8-a780-b71928f7f322',
  '31c881c0-8df6-4e95-8cbd-fa1398c7a0ec',
  '62b74cd4-3f25-498d-b892-130d62dea2d4',
  'ade66c8e-7d5c-49a6-9868-9ed810f3b4e0',
  '2b491777-7eaf-4860-a7b9-949c5bad4d34',
  'd260b606-d85a-45b6-855a-51e59d6c5926',
  'd591c2a7-2923-4e09-8763-efc5c7fb2fcc',
  '63ce601f-5b35-477b-84f4-d242137e4359'],
 'embeddings': None,
 'documents': ["Mercury is the smallest planet in our solar system and nearest to the Sun. It's only slightly larger than Earth's Moon. From the surface of Mercury, the Sun would appear more than three times as large as it does when viewed from Earth, and the sunlight would be as much as seven times brighter.",
  "Venus is the second planet from the Sun, and our closest planetary neighbor. It's the hottest planet in our solar system, and is sometimes called Earth's twin.",
  "Earth – our home planet – is the third planet from the Sun, and the fifth largest planet. It's the only place we

In [32]:
vector_store.delete(ids=["63ce601f-5b35-477b-84f4-d242137e4359"])

In [33]:
vector_store.get()

{'ids': ['4a803557-8b7e-450d-ab96-d08d7a808ff8',
  'aab0df9d-6a71-45d8-a780-b71928f7f322',
  '31c881c0-8df6-4e95-8cbd-fa1398c7a0ec',
  '62b74cd4-3f25-498d-b892-130d62dea2d4',
  'ade66c8e-7d5c-49a6-9868-9ed810f3b4e0',
  '2b491777-7eaf-4860-a7b9-949c5bad4d34',
  'd260b606-d85a-45b6-855a-51e59d6c5926',
  'd591c2a7-2923-4e09-8763-efc5c7fb2fcc'],
 'embeddings': None,
 'documents': ["Mercury is the smallest planet in our solar system and nearest to the Sun. It's only slightly larger than Earth's Moon. From the surface of Mercury, the Sun would appear more than three times as large as it does when viewed from Earth, and the sunlight would be as much as seven times brighter.",
  "Venus is the second planet from the Sun, and our closest planetary neighbor. It's the hottest planet in our solar system, and is sometimes called Earth's twin.",
  "Earth – our home planet – is the third planet from the Sun, and the fifth largest planet. It's the only place we know of inhabited by living things.",
  '