In [None]:
# The variables below describes if the notebook is no test mode or not,
# when "test", it means the data can be fewer than the original data.
# Result is not important, but the speed to test if it the notebook work is.

ENV_TYPE = "test" # "test" or "production"

##DataFolder Class

* Constructor (__init__): Initializes the DataFolder object with a default data path. If no default_data_path is provided, the current working directory is used as the default.

* getDataFolder: Returns the current working directory.

* setDataFolder: Changes the current working directory to the specified newDataFolder path.

* check_inside_zip: Checks the contents of a zip file specified by zip_path. It returns a list of files inside the zip file. If filenames_only is set to True, it returns only the filenames instead of the full file paths.

* get_file_path: Searches for a file with the given filename within a specified folder_path. It returns a list of file paths where the file is found. If subFolders is True, it searches in all subdirectories as well. If zipFiles is True, it also searches inside zip files. If filenames_only is set to True, it returns only the filenames instead of the full file paths.

* check_file_existence: Checks if a file with the given filename exists within a specified folder_path. It returns True if the file exists and False otherwise. It also supports searching in subdirectories (subFolders) and inside zip files (zipFiles).

* get_file_list: Retrieves a list of all files within a specified folder_path. If subFolders is True, it includes files from all subdirectories as well. If zipFiles is True, it includes files from zip files as well. If filenames_only is set to True, it returns only the filenames instead of the full file paths.

* openFile: Opens the file specified by file_path using the default system application associated with the file type. It uses the os.system function.

* executeCode: Executes the file specified by file_path as a command or script. It uses the subprocess.run function.

* writeFile: Writes the provided text to a file named filename in the specified folder_path. If the folder_path does not exist, a ValueError is raised.

* get_only_file_name: Extracts and returns only the filename from the given file_path, removing any directories or subdirectories.

In [6]:
import zipfile
import os
import subprocess

class DataFolder:
    def __init__(self, default_data_path='./'):
        self.default_data_path = default_data_path
        return

    def getDataFolder(self):
        return os.getcwd()

    def setDataFolder(self, newDataFolder):
        return os.chdir(newDataFolder)

    def check_inside_zip(self, zip_path, filenames_only=False):
        print(zip_path)
        if "\\" in zip_path:
            zip_path = zip_path.replace("\\", '/')


        listFilesinZip = []
        with zipfile.ZipFile(zip_path, 'r') as zip_file:
            for file in zip_file.namelist():
                listFilesinZip.append(zip_path + '/' + file)

        if filenames_only:
            for index, element in enumerate(listFilesinZip):
                listFilesinZip[index] = self.get_only_file_name(listFilesinZip[index])
        return listFilesinZip

                
        
    def get_file_path(self,filename, folder_path=None, subFolders=True, zipFiles=True, filenames_only = False):
        if folder_path is None:
            folder_path = self.default_data_path
        dataFolders = []
        if subFolders is True:
            for root, dirs, files in os.walk(folder_path):
                if filename in files:
                    file_path = os.path.join(root, filename)
                    if "\\" in file_path:
                        file_path = file_path.replace("\\", '/')
                    dataFolders.append(file_path)
                for file in files:
                    if file.endswith('.zip') and zipFiles is True:
                        insideZip = self.check_inside_zip(os.path.join(root, file))
                        for file_inside_zip in insideZip:
                            if file_inside_zip.endswith(filename):
                                dataFolders.append(file_inside_zip)
        else:
            for files in os.listdir(folder_path):
                if filename in files:
                    file_path = os.path.join(folder_path, filename)
                    if "\\" in file_path:
                        file_path = file_path.replace("\\", '/')
                    dataFolders.append(file_path)
                for file in files:
                    if file.endswith('.zip') and zipFiles is True:
                        insideZip = self.check_inside_zip(os.path.join(folder_path, file))
                        for file_inside_zip in insideZip:
                            if file_inside_zip.endswith(filename):
                                dataFolders.append(file_inside_zip)
        if filenames_only:
            for index, element in enumerate(dataFolders):
                dataFolders[index] = self.get_only_file_name(dataFolders[index])
        return dataFolders

    def check_file_existence(self, filename, folder_path=None, subFolders=True, zipFiles=True):
        if folder_path is None:
            folder_path = self.default_data_path
        if len(self.get_file_path(filename, folder_path=folder_path, subFolders=subFolders, zipFiles=zipFiles)):
            return True
        else:
            return False
    
    def get_file_list(self, folder_path=None, subFolders=True, zipFiles=True, filenames_only = False):
        if folder_path is None:
            folder_path = self.default_data_path    
        fileList = []
        if subFolders is True:
            for root, dirs, files in os.walk(folder_path):
                
                for file in files:
                    
                    file_path = os.path.join(folder_path, file)
                    if "\\" in file_path:
                        file_path = file_path.replace("\\", '/')
                    fileList.append(file_path)

                    if file.endswith('.zip') and zipFiles is True:
                        insideZip = self.check_inside_zip(os.path.join(root, file))
                        fileList.extend(insideZip)
        else:
            for files in os.listdir(folder_path):

                for file in files:
                    file_path = os.path.join(folder_path, file)
                    if "\\" in file_path:
                        file_path = file_path.replace("\\", '/')
                    fileList.append(file_path)

                    if file.endswith('.zip') and zipFiles is True:

                        insideZip = self.check_inside_zip(os.path.join(folder_path, file))
                        fileList.extend(insideZip)

        if filenames_only:
            for index, element in enumerate(fileList):
                fileList[index] = self.get_only_file_name(fileList[index])
        return fileList

    def openFile(self,file_path):
        os.system(file_path)
        return self
    
    def executeCode(self,file_path):
        subprocess.run([file_path])
        return self

    def writeFile(self, folder_path, filename, text):
        if not os.path.isdir(folder_path):
            raise ValueError("Informed path is not a folder to write the file.")
        file_path = os.path.join(folder_path, filename)
        with open(file_path, 'w') as file_write:
            file_write.write(text)
        
        return self

    def get_only_file_name(self,file_path):
        return file_path.split("/")[-1]


    #exampleusage

myDataFolder = DataFolder()
print(myDataFolder.get_file_list(), '\n')
print(myDataFolder.check_file_existence('hello world.py'), '\n')
print(myDataFolder.check_file_existence('california_housing_test.csv'), '\n')
print(myDataFolder.get_file_path('california_housing_test.csv'), '\n')


['./.last_opt_in_prompt.yaml', './active_config', './.last_survey_prompt.yaml', './.last_update_check.json', './config_sentinel', './gce', './config_default', './13.31.08.456554.log', './13.30.32.986418.log', './13.31.07.724890.log', './13.29.42.050505.log', './13.30.07.792467.log', './13.30.41.030388.log', './README.md', './anscombe.json', './california_housing_train.csv', './mnist_test.csv', './california_housing_test.csv', './mnist_train_small.csv'] 

False 

True 

['./sample_data/california_housing_test.csv'] 

