From 4a96830ec82b0c9e6b8294470137175c0e6d5483 Mon Sep 17 00:00:00 2001 From: miezzi <52928599+miezzi@users.noreply.github.com> Date: Fri, 2 Feb 2024 16:17:31 -0300 Subject: [PATCH] Update base.py (#839) * Update base.py support for configure delimiter and quotechar in paged_csv * Linting and formatting * Linting and formatting v2 * Remove | NoneType on load_data because failt test_library_matches --------- Co-authored-by: miezz --- llama_hub/file/paged_csv/base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llama_hub/file/paged_csv/base.py b/llama_hub/file/paged_csv/base.py index 1236272bc7..4011c4915a 100644 --- a/llama_hub/file/paged_csv/base.py +++ b/llama_hub/file/paged_csv/base.py @@ -3,6 +3,7 @@ A parser for tabular data files. """ + from pathlib import Path from typing import Any, Dict, List, Optional @@ -26,14 +27,18 @@ def __init__(self, *args: Any, encoding: str = "utf-8", **kwargs: Any) -> None: self._encoding = encoding def load_data( - self, file: Path, extra_info: Optional[Dict] = None + self, + file: Path, + extra_info: Optional[Dict] = None, + delimiter: str = ",", + quotechar: str = '"', ) -> List[Document]: """Parse file.""" import csv docs = [] with open(file, "r", encoding=self._encoding) as fp: - csv_reader = csv.DictReader(fp) # type: ignore + csv_reader = csv.DictReader(f=fp, delimiter=delimiter, quotechar=quotechar) # type: ignore for row in csv_reader: docs.append( Document(