# List the files sorted in alphabetical order

In [5]:
import os
import glob
import re

In [4]:
def list_files(folder="."):
    """List files in the folder in alphabetical order"""
    files = sorted(filter(os.path.isfile, glob.glob(folder + os.sep + "*") ) )
    for filepath in files:
        print(f"{os.path.basename(filepath)}")

In [5]:
list_files()

list_files_in_dir_sorted.ipynb
python_file_flags.ipynb
python_io_file.ipynb
read_file_from_generater_test.txt
take_n.py
take_n_from_i.py


# List files matching pattern

In [47]:
!touch 2021QTR1 2020QTR3 2019QTR2.gz

In [48]:
def list_files_with_pattern(data_dir=".", year=None, qtr=None):
    assert (re.match(r"[1-2][0-9][0-9][0-9]", year) if year else True), f"Invalid year {year}" 
    assert (re.match(r"[1-4]", qtr) if qtr else True), f"Invalid quarter {qtr}"

    pattern = ""
    pattern += f"{year}" if year else "*"
    pattern += "QTR"
    pattern += f"{qtr}" if qtr else "?"

    return glob.glob(data_dir + os.sep + pattern)

In [49]:
for f in list_files_with_pattern():
    print(f)

./2021QTR1
./2020QTR3


In [50]:
for f in list_files_with_pattern(year="2021"):
    print(f)

./2021QTR1


In [51]:
for f in list_files_with_pattern(qtr="3"):
    print(f)

./2020QTR3


In [52]:
for f in list_files_with_pattern(year="2020", qtr="3"):
    print(f)

./2020QTR3


In [53]:
!rm 2021QTR1 2020QTR3 2019QTR2.gz

# List files exculding matched suffixes

In [84]:
def list_files_with_ignores(data_dir=".", suffixes=[".gz", ".Z"]):
    ignores = set([])
    if suffixes:
        for suffix in suffixes:
            ignores = ignores.union(set(glob.glob(data_dir + os.sep + f"*{suffix}")))
    
    files = set(glob.glob(data_dir + os.sep + "*")) - ignores
    return sorted(filter(os.path.isfile,  files))

In [85]:
!touch 2021QTR1 2020QTR3 2019QTR2.gz

In [86]:
for f in list_files_with_ignores():
    print(f)

./2020QTR3
./2021QTR1
./list_files_in_dir.ipynb
./python_file_flags.ipynb
./python_io_file.ipynb
./read_file_from_generater_test.txt
./take_n.py
./take_n_from_i.py


In [87]:
!rm 2021QTR1 2020QTR3 2019QTR2.gz