diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py b/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py index a7a7faa713fac..a4de5a20c8181 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/llama_index/readers/microsoft_sharepoint/base.py @@ -421,6 +421,28 @@ def _download_files_from_sharepoint( recursive, ) + def _exclude_access_control_metadata( + self, documents: List[Document] + ) -> List[Document]: + """ + Excludes the access control metadata from the documents for embedding and LLM calls. + + Args: + documents (List[Document]): A list of documents. + + Returns: + List[Document]: A list of documents with access control metadata excluded. + """ + for doc in documents: + access_control_keys = [ + key for key in doc.metadata if key.startswith("allowed_") + ] + + doc.excluded_embed_metadata_keys.extend(access_control_keys) + doc.excluded_llm_metadata_keys.extend(access_control_keys) + + return documents + def _load_documents_with_metadata( self, files_metadata: Dict[str, Any], @@ -448,7 +470,10 @@ def get_metadata(filename: str) -> Any: file_metadata=get_metadata, recursive=recursive, ) - return simple_loader.load_data() + docs = simple_loader.load_data() + if self.attach_permission_metadata: + docs = self._exclude_access_control_metadata(docs) + return docs def load_data( self, diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/pyproject.toml index ed1df5d1fb109..1ed6491498f15 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-sharepoint/pyproject.toml @@ -29,7 +29,7 @@ license = "MIT" maintainers = ["arun-soliton"] name = "llama-index-readers-microsoft-sharepoint" readme = "README.md" -version = "0.2.1" +version = "0.2.2" [tool.poetry.dependencies] python = ">=3.8.1,<4.0"