From 8d2b4b0e56fa29dd96607da538bb10f6da389874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Fichot?= Date: Sat, 20 May 2023 17:16:23 +0200 Subject: [PATCH] Add support for Markdown files (#104) Similar to https://github.com/su77ungr/CASALIOY/pull/86 --- casalioy/ingest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/casalioy/ingest.py b/casalioy/ingest.py index 5efc175..15828ea 100644 --- a/casalioy/ingest.py +++ b/casalioy/ingest.py @@ -19,6 +19,7 @@ UnstructuredEPubLoader, UnstructuredHTMLLoader, UnstructuredPowerPointLoader, + UnstructuredMarkdownLoader, UnstructuredWordDocumentLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -48,6 +49,7 @@ class Ingester: "ppt": UnstructuredPowerPointLoader, "eml": UnstructuredEmailLoader, "msg": OutlookMessageLoader, + "md": UnstructuredMarkdownLoader, } def __init__(self, db_dir: str, collection: str = "test", verbose=False):