# Indexing: Inspecting and Managing Documents in a Vectorstore

In [None]:
# Run the line of code below to check the version of langchain in the current environment.
# Substitute "langchain" with any other package name to check their version.

In [1]:
pip show langchain

Name: langchain
Version: 0.3.26
Summary: Building applications with LLMs through composability
Home-page: 
Author: 
Author-email: 
License: MIT
Location: C:\Users\Marcus\anaconda3\envs\langchain_env_py312\Lib\site-packages
Requires: langchain-core, langchain-text-splitters, langsmith, pydantic, PyYAML, requests, SQLAlchemy
Required-by: langchain-community
Note: you may need to restart the kernel to use updated packages.


In [2]:
%load_ext dotenv
%dotenv

In [3]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

In [4]:
embedding = OpenAIEmbeddings(model='text-embedding-ada-002')

In [5]:
vectorstore_from_directory = Chroma(persist_directory = "./intro-to-ds-lectures", 
                                    embedding_function = embedding)

In [7]:
# View vectorstore directory
vectorstore_from_directory.get()

{'ids': ['040a4223-3477-4856-9bdb-b8f277802fa1',
  'd71b1324-f782-45dc-9e4e-92b5afe850d4',
  'a64cd6f2-fd5c-4f44-ad7e-d49b3851e9a9',
  '29c8a05b-31ed-401a-aba7-52b12641721e',
  '64a4be33-ce36-45c7-9559-3e95daa9a1c1',
  '25776343-94eb-461a-84e1-82d4784f507d',
  '50dd0415-9b37-4ffa-9eb5-fd6c85585514',
  '6f77492d-e7d0-45cf-9746-c886af4adefd',
  '7a42e26c-293b-42ea-8eb3-c63975e20358',
  '4848e646-4039-41ab-a086-178d73847673',
  '86f3cf49-5537-4684-aa1f-4ab44428d79d',
  '9d8f8edc-5622-4a38-9f7d-c9a346443402',
  '02e2e8aa-649b-426f-a407-075ab6953e7d',
  '2ece71b7-c9f4-4e6d-b4da-c0349c272456',
  '616e170d-cf7d-4c30-910d-7283223cd695',
  '84881c64-df6a-49da-881f-c61e4ee4fbab',
  'bf86eaa6-cc4d-4e72-bbb5-ed9098d50bd8',
  '2b498da7-b383-41a1-a5d6-54ca3011843e',
  'eeda60fe-1023-467b-a479-16589963603b',
  '54248683-8847-42a8-8521-78dfc896115b',
  '53cc69a1-56ca-4009-a200-d183498e3f03',
  '229d13a1-1f01-466c-8f9a-138773d585ec',
  '88f9f94f-9be4-451d-af1c-53edd54adeea',
  '6c19b4b8-ce90-4a56-8241-

In [10]:
# Pass a vectorstore id to the get() method and include embeddings string
vectorstore_from_directory.get(ids = "d71b1324-f782-45dc-9e4e-92b5afe850d4", 
                               include = ["embeddings"])

{'ids': ['d71b1324-f782-45dc-9e4e-92b5afe850d4'],
 'embeddings': array([[-0.00145079,  0.00294724,  0.04136246, ...,  0.00858565,
         -0.02052466, -0.00128198]]),
 'documents': None,
 'uris': None,
 'included': ['embeddings'],
 'data': None,
 'metadatas': None}

In [11]:
# Create a new variable document
added_document = Document(page_content='Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis', 
                          metadata={'Course Title': 'Introduction to Data and Data Science', 
                                    'Lecture Title': 'Analysis vs Analytics'})

In [12]:
# Apply Add_Documents method to the added_document object
vectorstore_from_directory.add_documents([added_document])

['fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0']

In [13]:
# Verify vector id with the get() method
vectorstore_from_directory.get("fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0")

{'ids': ['fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0'],
 'embeddings': None,
 'documents': ['Alright! So… Let’s discuss the not-so-obvious differences between the terms analysis and analytics. Due to the similarity of the words, some people believe they share the same meaning, and thus use them interchangeably. Technically, this isn’t correct. There is, in fact, a distinct difference between the two. And the reason for one often being used instead of the other is the lack of a transparent understanding of both. So, let’s clear this up, shall we? First, we will start with analysis'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Course Title': 'Introduction to Data and Data Science',
   'Lecture Title': 'Analysis vs Analytics'}]}

In [14]:
# Configure new object updated_document
updated_document = Document(page_content='Great! We hope we gave you a good idea about the level of applicability of the most frequently used programming and software tools in the field of data science. Thank you for watching!', 
                            metadata={'Course Title': 'Introduction to Data and Data Science', 
                                     'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'})

In [15]:
# Pass vectorstore id of the added_document above
vectorstore_from_directory.update_document(document_id = "fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0", 
                                           document = updated_document)

In [16]:
# Verify document updated
vectorstore_from_directory.get("fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0")

{'ids': ['fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0'],
 'embeddings': None,
 'documents': ['Great! We hope we gave you a good idea about the level of applicability of the most frequently used programming and software tools in the field of data science. Thank you for watching!'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'Course Title': 'Introduction to Data and Data Science',
   'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'}]}

In [17]:
# Pass vectorstore id to the delete() method
vectorstore_from_directory.delete("fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0")

In [18]:
# Verify id was deleted
vectorstore_from_directory.get("fd0e66c2-9018-4dd5-b7f3-2b72121bdaa0")

{'ids': [],
 'embeddings': None,
 'documents': [],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': []}