diff --git a/langchain/document_loaders/sitemap.py b/langchain/document_loaders/sitemap.py index 1bc583cdc42c4a..3a417dd0b4ac46 100644 --- a/langchain/document_loaders/sitemap.py +++ b/langchain/document_loaders/sitemap.py @@ -61,6 +61,13 @@ def parse_sitemap(self, soup: Any) -> List[dict]: } ) + for sitemap in soup.find_all("sitemap"): + loc = sitemap.find("loc") + if not loc: + continue + soup_child = self.scrape_all([loc.text], "xml")[0] + + els.extend(self.parse_sitemap(soup_child)) return els def load(self) -> List[Document]: