In [2]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, query_engine, StorageContext, load_index_from_storage
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.memory import Memory
from chromadb.utils.embedding_functions import OllamaEmbeddingFunction
from core.settings import CHROMA_PATH, llm, embed_model, STORAGE_PATH
import chromadb,os
from llama_index.core.storage.chat_store import SimpleChatStore
from llama_index.core.retrievers import VectorIndexRetriever
from services.get_index_service import get_index
from db.chroma_client import get_vector_db_client
from services.documents_service import get_ref_doc_idx, get_storage_context

In [1]:
from services.ingest_service import build_ingestion_pipeline



In [9]:
collection = get_vector_db_client().get_collection("dep")
vector_store = ChromaVectorStore(chroma_collection=collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)


In [None]:
client = get_vector_db_client()
collection = client.get_collection(name="dep")
metadatas  = collection.get(
        where={"file_name": "HiveWorx_HR_Policy_Manual.pdf"},
    include=["metadatas"] 
    ).get("metadatas")

In [38]:
len(metadatas)

391

In [41]:
metadatas[0].get("file_name")

'HiveWorx_HR_Policy_Manual.pdf'

In [21]:
files = set()
for metadata in metadatas:
    files.add(metadata.get("file_name"))

files


{'HiveWorx_HR_Policy_Manual.pdf'}

In [10]:
docstore = index.storage_context.docstore


In [11]:
vectorstore= index.storage_context.vector_store

In [12]:
len(docstore.docs)

0

In [7]:
docstore.docs.keys()

dict_keys([])

In [32]:
docs = SimpleDirectoryReader(input_files=[r"C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\data\dep\HiveWorx_HR_Policy_Manual.pdf"]).load_data()

In [33]:
docs[0]

Document(id_='ed5e240b-398d-4bd0-8be9-f2ff2162efeb', embedding=None, metadata={'page_label': '1', 'file_name': 'HiveWorx_HR_Policy_Manual.pdf', 'file_path': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\HiveWorx_HR_Policy_Manual.pdf', 'file_type': 'application/pdf', 'file_size': 1048607, 'creation_date': '2025-10-27', 'last_modified_date': '2025-10-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='                                                                                            HR POLICY MANUAL     \nPage 1 of 35 \n \n  \n  \n \n \nHUMAN RESOURCE POLICY MANUAL \n                                      Isla

In [34]:
pipeline = build_ingestion_pipeline(doc_store=index.storage_context.docstore, vector_store=index.storage_context.vector_store)

In [35]:
nodes = pipeline.run(documents =docs)

In [36]:
nodes[0]

TextNode(id_='fa829b1a-d8a2-44b5-8a02-c716da168ec3', embedding=None, metadata={'page_label': '1', 'file_name': 'HiveWorx_HR_Policy_Manual.pdf', 'file_path': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\HiveWorx_HR_Policy_Manual.pdf', 'file_type': 'application/pdf', 'file_size': 1048607, 'creation_date': '2025-10-27', 'last_modified_date': '2025-10-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'ceo', 'manager', 'developer', 'department'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'ceo', 'manager', 'developer', 'department'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ed5e240b-398d-4bd0-8be9-f2ff2162efeb', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'HiveWorx_HR_Policy_Manual.pdf', 'file_path': 'C:\\Users\\Ds-Terminal\\Desktop\\

In [None]:
index.insert_nodes(nodes=nodes)

In [8]:
len(index.storage_context.docstore.docs)

0

In [41]:
storage_context.persist()

In [44]:
docstore = index.storage_context.docstore

In [85]:
for doc in docstore.docs.items():
    print(doc[0])
    print(doc[1].metadata)
    print(doc[1].doc_id)
    print(doc[1].id_)
    break

4131da9b-7df5-4b82-8615-594f40b095ca
{'page_label': '1', 'file_name': '100-English-Short-Stories.pdf', 'file_path': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\100-English-Short-Stories.pdf', 'file_type': 'application/pdf', 'file_size': 2037873, 'creation_date': '2025-10-27', 'last_modified_date': '2025-10-28'}
4131da9b-7df5-4b82-8615-594f40b095ca
4131da9b-7df5-4b82-8615-594f40b095ca


In [88]:
index.delete(doc_id="4131da9b-7df5-4b82-8615-594f40b095ca", delete_from_docstore=True)



In [89]:
storage_context.persist()

In [90]:
docstore = index.storage_context.docstore

In [91]:
docstore.get_document("4131da9b-7df5-4b82-8615-594f40b095ca")

Document(id_='4131da9b-7df5-4b82-8615-594f40b095ca', embedding=None, metadata={'page_label': '1', 'file_name': '100-English-Short-Stories.pdf', 'file_path': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\100-English-Short-Stories.pdf', 'file_type': 'application/pdf', 'file_size': 2037873, 'creation_date': '2025-10-27', 'last_modified_date': '2025-10-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=None, image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}')

In [70]:
###################working from here #############################3333333

In [2]:
ref_doc_idx = get_ref_doc_idx(department="dep",doc_name="ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf")

got collection already created.
after initializing vector store
before storage_context initialization
from withing the create or get storage path:dep
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep
from withing the create or get storage path:dep
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\index_store.json.
storage_context from path
before returning storage_context......................
375


In [3]:
len(ref_doc_idx)

0

In [45]:
storage_context = get_storage_context(department="dep")
docstore = storage_context.docstore
print(len(docstore.docs))

ref_doc_ids = set()

for node_id, node in docstore.docs.items():
    print(node_id)
    print(node.doc_id)
    metadata = node.metadata or {}
    print(metadata)
    if metadata.get("file_name") =="ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf":
        ref_doc_id = metadata.get("ref_doc_id")
        if ref_doc_id:
            ref_doc_ids.add(ref_doc_id)

got collection already created.
after initializing vector store
before storage_context initialization
from withing the create or get storage path:dep
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep
from withing the create or get storage path:dep
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\index_store.json.
storage_context from path
before returning storage_context......................
375
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\data\dep\HiveWorx_HR_Policy_Manual.pdf_part_0
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\data\dep\HiveWorx_HR_Policy_Manual.pdf_part_0
{'page_label': '1', 'file_name': 'HiveWorx_HR_Policy_Manual.pdf', 'ceo': 'true', 'developer': 'true', 'manager': 'true', 'department': 'dep'}
C:\Users\Ds-Terminal\Desktop\rag-llam

In [None]:
index = storage_context.index_store
type(index)

llama_index.core.storage.index_store.simple_index_store.SimpleIndexStore

In [51]:
for doc in docstore.docs:
    print(doc)
    break

C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\data\dep\HiveWorx_HR_Policy_Manual.pdf_part_0


In [40]:
len(ref_doc_ids)

0

In [4]:
from services.documents_service import get_storage_context

In [5]:
doc_ids = get_doc_idx(department="dep",doc_name="HiveWorx_HR_Policy_Manual.pdf")

NameError: name 'get_doc_idx' is not defined

In [6]:
storage_context = get_storage_context(department="dep")
docstore = storage_context.docstore


got collection already created.
after initializing vector store
before storage_context initialization
from withing the create or get storage path:dep
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep
from withing the create or get storage path:dep
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\index_store.json.
storage_context from path
before returning storage_context......................


In [7]:
len(docstore.docs)

375

In [10]:
docstore.get_all_ref_doc_info()

{}

In [67]:
docstore._ref_doc_collection

'docstore/ref_doc_info'

In [69]:
kv = docstore._kvstore

In [None]:
storage

In [None]:
storage_context.docstore.get_all_ref_doc_info()


{}

In [75]:
ref_docs

{}

In [None]:
for doc in docstore:
    doc = doc
    break

TypeError: 'SimpleDocumentStore' object is not iterable

In [60]:
doc

'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\HiveWorx_HR_Policy_Manual.pdf_part_0'

In [61]:
docstore.docs

{'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\HiveWorx_HR_Policy_Manual.pdf_part_0': Document(id_='C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\HiveWorx_HR_Policy_Manual.pdf_part_0', embedding=None, metadata={'page_label': '1', 'file_name': 'HiveWorx_HR_Policy_Manual.pdf', 'ceo': 'true', 'developer': 'true', 'manager': 'true', 'department': 'dep'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='                                                                                            HR POLICY MANUAL     \nPage 1 of 35 \n \n  \n  \n \n \nHUMAN RESOURCE POLICY MANUAL \n                         

In [16]:
len(doc_ids)

0

In [368]:
docs = SimpleDirectoryReader(input_dir="C:/Users/Ds-Terminal/Desktop/rag-llamaindex-chroma/data/xyzz/new_files").load_data()

In [369]:
docs[5]

Document(id_='472c3797-7a2b-41fd-a304-7936199ff6c9', embedding=None, metadata={'page_label': '6', 'file_name': '100-English-Short-Stories.pdf', 'file_path': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\xyzz\\new_files\\100-English-Short-Stories.pdf', 'file_type': 'application/pdf', 'file_size': 2037873, 'creation_date': '2025-10-24', 'last_modified_date': '2025-10-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='47. Vegan Food\n48. Evacuation\n49. Working Men\n50. Under the Lamp Post\n51. My Best Friend\n52. Preparations\n53. Social Media\n54. Big Lenders Big Spenders\n55. Traffic And Kids\n56. Down with the Pounds\n

In [381]:
from services.ingest_service import build_ingestion_pipeline
pipeline = build_ingestion_pipeline(vector_store = vector_store, doc_store = storage_context.docstore)

In [382]:
nodes = pipeline.run(docs)

Parsing nodes: 0it [00:00, ?it/s]


In [383]:
nodes

[]

In [2]:
chroma_client = get_vector_db_client()

In [3]:
index = get_index(department="dep")

got collection already created.
after initializing vector store
before storage_context initialization
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\index_store.json.
storage_context from path
before returning storage_context......................
after getting storage context within get_index
after index initialization in get_index


In [4]:
index.as_retriever().retrieve("tell me somthing about icd10")


[NodeWithScore(node=TextNode(id_='ab124a3f-751b-48c3-9acc-ea6d8a262015', embedding=None, metadata={'page_label': '1', 'file_name': 'ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf', 'developer': 'true', 'department': 'dep'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'ceo', 'manager', 'developer', 'department'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'ceo', 'manager', 'developer', 'department'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf_part_0', node_type='4', metadata={'page_label': '1', 'file_name': 'ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf', 'developer': 'true', 'department': 'dep'}, hash='5481973e6817b2aa6ba060630418d0b027ddaf7393424a0fa1eedca09374206c')

In [5]:
llm.complete("hello")

CompletionResponse(text="Hello! It's great to hear from you. Is there something I can help you with today? Do you have a question, need some information, or just want to chat? I'm here to assist you in any way I can.", additional_kwargs={'tool_calls': [], 'thinking': None}, raw={'model': 'koesn/llama3-8b-instruct:latest', 'created_at': '2025-10-27T17:53:23.944372106Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1940409614, 'load_duration': 32050047, 'prompt_eval_count': 45, 'prompt_eval_duration': 83276345, 'eval_count': 49, 'eval_duration': 1823455699, 'message': Message(role='assistant', content="Hello! It's great to hear from you. Is there something I can help you with today? Do you have a question, need some information, or just want to chat? I'm here to assist you in any way I can.", thinking=None, images=None, tool_name=None, tool_calls=None), 'usage': {'prompt_tokens': 45, 'completion_tokens': 49, 'total_tokens': 94}}, logprobs=None, delta=None)

In [3]:
chroma_client = get_vector_db_client()

In [5]:
chroma_client.delete_collection("dep")

In [4]:
chroma_client.list_collections()

[Collection(name=dep)]

AuthorizationError: Reset is disabled by config

In [None]:
index_struct = storage_context.index_store.index_structs()[1]

In [3]:
collection = chroma_client.get_collection("xyzz")
vector_store = ChromaVectorStore(chroma_collection=collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=STORAGE_PATH)
# index = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context = storage_context)
# index = VectorStoreIndex(storage_context=storage_context, index_struct = index_struct)
# index = load_index_from_storage(storage_context=storage_context, index_id="2b0a1a82-6c8c-4b5d-b4cf-6493875dd6cf")


Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\index_store.json.


In [4]:
index = load_index_from_storage(storage_context=storage_context)


In [4]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [7]:
index = get_index(department="xyzz")

Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\index_store.json.


In [433]:
index_struct = storage_context.index_store.index_structs()

In [434]:
index_struct

[IndexDict(index_id='2b0a1a82-6c8c-4b5d-b4cf-6493875dd6cf', summary=None, nodes_dict={}, doc_id_dict={}, embeddings_dict={})]

In [419]:
storage_context.index_store.delete_index_struct(index_struct[0].index_id)

In [422]:
storage_context.persist()

In [5]:
retriever = index.as_retriever()
retriever.retrieve("something about computer science.")

[NodeWithScore(node=TextNode(id_='8a5f0bb5-b130-41f7-b62b-49705778d094', embedding=None, metadata={'page_label': '26', 'file_name': 'ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf', 'ceo': 'true', 'department': 'xyzz'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'ceo', 'manager', 'developer', 'department'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date', 'ceo', 'manager', 'developer', 'department'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\xyzz\\ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf_part_25', node_type='4', metadata={'page_label': '26', 'file_name': 'ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf', 'ceo': 'true', 'department': 'xyzz'}, hash='fe2cfa11d51545472bd8c8127aadb02a71157eea9738bd9d9e94f9ad787258ad'), <Nod

In [21]:
results = collection.get(
    where={"file_name": "computer science.pdf"},
    include=["metadatas"] # Request to include the metadata of the matching segments
).get("metadatas")

In [22]:
results[0]

IndexError: list index out of range

In [8]:
for key , value in results.items():
    print(key)
    

ids
embeddings
documents
uris
included
data
metadatas


In [9]:
metadatas = results.get("metadatas")

In [10]:
results.get("metadatas")[0]

{'file_name': 'computer science.pdf',
 'doc_id': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\xyzz\\computer science.pdf_part_0',
 'page_label': '1',
 '_node_type': 'TextNode',
 'document_id': 'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\xyzz\\computer science.pdf_part_0',
 'department': 'xyzz',
 '_node_content': '{"id_": "0f8e09ed-c9d1-4af2-9802-f087fcda0a2e", "embedding": null, "metadata": {"page_label": "1", "file_name": "computer science.pdf", "ceo": "true", "department": "xyzz"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date", "ceo", "manager", "developer", "department"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date", "ceo", "manager", "developer", "department"], "relationships": {"1": {"node_id": "C:\\\\Users\\\\Ds-Terminal\\\\Desktop\\\\rag-llamaindex-chroma\\\\data\\\\xyzz\\\\

In [16]:
ref_doc_ids =[]
for metadata in results:
    ref_doc_ids.append(metadata.get("ref_doc_id"))

In [18]:
ref_doc_idx = {metadata.get("ref_doc_id") for metadata in results}


In [19]:
ref_doc_ids[0]

'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\xyzz\\ICD-10-CM FY25 Guidelines October 1, 2024 (1).pdf_part_0'

In [13]:
for id in ref_doc_ids:
    index.delete_ref_doc(id, delete_from_docstore=True)

In [14]:
storage_context.persist(persist_dir = STORAGE_PATH)

In [3]:
client = get_vector_db_client()
collection = client.get_collection("dep")
metadatas = collection.get(
where={"file_name": "computer science.pdf"},
include=["metadatas"] 
).get("metadatas")

ref_doc_idx = [metadata.get("doc_id") for metadata in metadatas]

In [5]:
ref_doc_idx[0]

'C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\dep\\computer science.pdf_part_0'

In [297]:
#######################working upto here ###########################

In [14]:
from services.get_index_service import get_storage_context


In [15]:
storage_context = get_storage_context("dep")

got collection already created.
after initializing vector store
before storage_context initialization
from withing the create or get storage path:dep
C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep
from withing the create or get storage path:dep
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\storage\dep\index_store.json.
storage_context from path
before returning storage_context......................


In [16]:
docstore = storage_context.docstore
vectorstore = storage_context.vector_store

In [17]:
documents = docstore.docs.items()

In [21]:
docstore.delete_document(doc_id=ref_doc_idx[1],raise_error=True)

In [22]:
docstore.get_document(doc_id=ref_doc_idx[1])

ValueError: doc_id C:\Users\Ds-Terminal\Desktop\rag-llamaindex-chroma\data\dep\computer science.pdf_part_1 not found.

In [24]:
docs = []
for doc_id, doc in docstore.docs.items():
    if doc.metadata["file_name"] =="computer science.pdf":
        docs.append(doc)

In [25]:

len(docs)

86

In [26]:
for doc in docs:
    docstore.delete_document(doc.id_)

In [27]:
storage_context.persist()

In [158]:
doc_id = doc.id_

In [159]:
doc_ref_id = doc.ref_doc_id

In [160]:
docstore.get_document(doc_id)

Document(id_='C:\\Users\\Ds-Terminal\\Desktop\\rag-llamaindex-chroma\\data\\comp\\100-English-Short-Stories.pdf_part_1', embedding=None, metadata={'page_label': '2', 'file_name': '100-English-Short-Stories.pdf', 'ceo': 'true', 'manager': 'true', 'department': 'comp'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=None, image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}')

In [118]:
index.delete_ref_doc(doc_id, delete_from_docstore=True)

In [165]:
item = collection.get(doc.id_)

In [166]:
item.items()

dict_items([('ids', []), ('embeddings', None), ('documents', []), ('uris', None), ('included', ['metadatas', 'documents']), ('data', None), ('metadatas', [])])

In [167]:
index.delete(doc_id,delete_from_docstore=True)



In [168]:
ref_doc_id = doc.ref_doc_id
ref_doc_id

In [169]:
print(ref_doc_id)

None


In [206]:
storage_context.persist()