In [14]:
from langchain.document_loaders import CSVLoader, DirectoryLoader
from pathlib import Path
from re import sub
from datetime import datetime

SEPRATOR = ","
QUOTEDSTRING = '"'
ISMULTITHRD = True
MAXCONCRNCY = 3

class CSVProcessFromDirectory:

    def __init__(self, folderPath : Path = None, fileFormat : str = "csv"):

        self.folderPath = folderPath
        self.fileFormat = fileFormat.lower()
        self.loaderClass = None
        self.loaderArgmnt = None
        self.directoryLoader = DirectoryLoader(
            path=self.folderPath,
            glob=f"**/*.{self.fileFormat}",
            loader_cls= self.loaderClass,
            loader_kwargs={
                "csv_args" : 
                        {
                        "delimiter": ",",
                            "quotechar": '"'
                        }
                    },
            use_multithreading= ISMULTITHRD,
            max_concurrency= MAXCONCRNCY
        )


    @property
    def folderPath(self) -> Path:
        return self.__folderPath
    
    @folderPath.setter
    def folderPath(self, folderPath : Path):

        if str(folderPath) == "" or folderPath == None:
            raise AttributeError("Folder Path is not Valid. Please provide Proper Folder Path")
        elif not folderPath.exists():
            folderPath.mkdir(parents=True, exist_ok=True)
        else:
            print(f"Path {folderPath} Exists")

        self.__folderPath = folderPath

    @property
    def fileFormat(self) -> str:
        return self.__fileFormat.lower()
    
    @fileFormat.setter
    def fileFormat(self, fileFormat: str):
        self.__fileFormat = fileFormat.lower()


    @property
    def loaderClass(self) -> str:
        return self.__loaderClass
    
    @loaderClass.setter
    def loaderClass(self, loaderClass : str):

        if self.fileFormat == "csv":
            self.__loaderClass = CSVLoader
        else:
            raise AttributeError("File Format is Other then CSV")
        
    @property
    def loaderArgmnt(self) -> dict:
        return self.__loaderArgmnt
    
    @loaderArgmnt.setter
    def loaderArgmnt(self , arg):

        if self.fileFormat == "csv":
            arg = {
                "delimiter": SEPRATOR,
                "quotechar" : QUOTEDSTRING
            }
        else:
            raise AttributeError("Not CSV Format")
        
        self.__loaderArgmnt = arg

    @property
    def CSVInDirToChunks(self) -> list:

        return self.directoryLoader.load()
    

path = Path("../data/csv")
obj = CSVProcessFromDirectory(path, "csv")
chunks = obj.CSVInDirToChunks
for item in chunks:
    print(item.metadata)
    print(item.page_content)
    print('---------------------')     

Path ..\data\csv Exists
{'source': '..\\data\\csv\\businesscentral_clov.csv', 'row': 0}
sourcelocation: Clov
pk_uuid: c95d2918-a0c7-4456-90eb-713e1437de13
number: DAN.REBECK
firstname: Dan
lastname: Rebeck
RoleGroup: Dynamics 365 for Accountants
---------------------
{'source': '..\\data\\csv\\businesscentral_clov.csv', 'row': 1}
sourcelocation: Clov
pk_uuid: baed7392-dd8a-47d6-91f4-ea268cda3191
number: MATTHEW.BOHUN-APONTE
firstname: Matthew
lastname: Bohun-Aponte
RoleGroup: Dynamics 365 for Accountants
---------------------
{'source': '..\\data\\csv\\businesscentral_clov.csv', 'row': 2}
sourcelocation: Clov
pk_uuid: fb59eb2d-2243-488e-8ecb-93172e8d5d93
number: SARAH.HIBIT
firstname: Sarah
lastname: Hibit
RoleGroup: Dynamics 365 for Accountants
---------------------
{'source': '..\\data\\csv\\businesscentral_clov.csv', 'row': 3}
sourcelocation: Clov
pk_uuid: 3f8bb255-d4c1-4389-919d-ec96ebefb1e9
number: JILLIAN.GOLDMAN
firstname: Jillian
lastname: Goldman
RoleGroup: Dynamics 365 Accoun