## Task 2 - Finding Files

For this problem, the goal is to write code for finding all files under a directory (and all directories beneath it) that end with ".c"

Here is an example of a test directory listing, which can be downloaded here:

* ./testdir
* ./testdir/subdir1
* ./testdir/subdir1/a.c
* ./testdir/subdir1/a.h
* ./testdir/subdir2
* ./testdir/subdir2/.gitkeep
* ./testdir/subdir3
* ./testdir/subdir3/subsubdir1
* ./testdir/subdir3/subsubdir1/b.c
* ./testdir/subdir3/subsubdir1/b.h
* ./testdir/subdir4
* ./testdir/subdir4/.gitkeep
* ./testdir/subdir5
* ./testdir/subdir5/a.c
* ./testdir/subdir5/a.h
* ./testdir/t1.c
* ./testdir/t1.h´

Python's os module will be useful—in particular, you may want to use the following resources:

os.path.isdir(path)

os.path.isfile(path)

os.listdir(directory)

os.path.join(...)

Note: ´os.walk()´ is a handy Python method which can achieve this task very easily. However, for this problem you are not allowed to use os.walk().

Here is some code for the function to get you started: 

In [32]:
# imports
import os

def find_files(suffix, path):
    """
    Find all files beneath path with file name suffix.

    Note that a path may contain further subdirectories
    and those subdirectories may also contain further subdirectories.

    There are no limit to the depth of the subdirectories can be.

    Args:
      suffix(str): suffix if the file name to be found
      path(str): path of the file system

    Returns:
       a list of paths
    """
    
    # list to store the path of the files with suffix
    suffix_files = list()

    # if path does not exists, print a error massage and return an empty list
    if not os.path.exists(path):
        print("Provided path does not exist")
        return suffix_files
    
    # if no suffix is provided, print a error massage and return an empty list
    if suffix == "":
        print("No suffix provided")
        return suffix_files
    
    # if provided path is a file, check if file has the right suffix
    if os.path.isfile(path):
        if '.' + suffix in path:
            suffix_files.append(path)
        return suffix_files

    # get a list of items that are in the directory
    items = os.listdir(path)

    # loop over items
    for item in items:
        # convert item name to a path
        current_path = os.path.join(path, item)
        # if current path is a directory, call function recursivly
        if os.path.isdir(current_path):
            suffix_files.extend(find_files(suffix, current_path))
        # if current path is a file and has the suffix then add it to the list
        elif os.path.isfile(current_path) and len(current_path) > len(suffix):
            if '.' + suffix in current_path:
                suffix_files.append(current_path)

    return suffix_files

In [33]:
# standard test cases
print("--------------------------------------------")
print("Call function: find_files('c','./testdir'):")
print(find_files("c","./testdir")) # ['./testdir/subdir3/subsubdir1/b.c', './testdir/subdir1/a.c', './testdir/t1.c', './testdir/subdir5/a.c']

print("--------------------------------------------")
print("Call function: find_files('c','./testdir/subdir1/a.c'):")
print(find_files("c","./testdir/subdir1/a.c")) # ['./testdir/subdir1/a.c']

# edge test cases
print("--------------------------------------------")
print("Call function: find_files('c','./test'):")
print(find_files("c","./test")) # []

print("--------------------------------------------")
print("Call function: find_files('','./testdir'):")
print(find_files("","./testdir")) # []

print("--------------------------------------------")
print("Call function: find_files('',''):")
print(find_files("","")) # []

--------------------------------------------
Call function: find_files('c','./testdir'):
['./testdir/subdir3/subsubdir1/b.c', './testdir/subdir1/a.c', './testdir/t1.c', './testdir/subdir5/a.c']
--------------------------------------------
Call function: find_files('c','./testdir/subdir1/a.c'):
['./testdir/subdir1/a.c']
--------------------------------------------
Call function: find_files('c','./test'):
Provided path does not exist
[]
--------------------------------------------
Call function: find_files('','./testdir'):
No suffix provided
[]
--------------------------------------------
Call function: find_files('',''):
Provided path does not exist
[]


In [1]:
## Locally save and call this file ex.py ##

# Code to demonstrate the use of some of the OS modules in python

import os

# Let us print the files in the directory in which you are running this script
print (os.listdir("."))

# Let us check if this file is indeed a file!
print (os.path.isfile("./ex.py"))

# Does the file end with .py?
print ("./ex.py".endswith(".py"))

['task5_blockchain.ipynb', 'task1_lru_cache.ipynb', 'task3_data_compression.ipynb', '.ipynb_checkpoints', 'task4_active_directory.ipynb', 'task2_finding_files.ipynb', 'testdir']
False
True


In [6]:
# check if path is dir
print(os.path.isdir("./testdir/"))
print(os.path.isdir("./testdir/t1.c"))

True
False


In [5]:
# check if file exists
print(os.path.isfile("./testdir/t1.c"))
print(os.path.isfile("./testdir/t11.c"))

False

In [7]:
# list files in directory
print(os.listdir("./testdir/"))

['subdir3', 'subdir1', 'subdir4', 'subdir2', 't1.h', 't1.c', 'subdir5']


In [None]:
# to join path string
os.path.join(...)

In [25]:
base_path = "./"
items = os.listdir(base_path)

In [26]:
items

['task5_blockchain.ipynb',
 'task1_lru_cache.ipynb',
 'task3_data_compression.ipynb',
 '.ipynb_checkpoints',
 'task4_active_directory.ipynb',
 'task2_finding_files.ipynb',
 'testdir']

In [27]:
current = items.pop()

In [28]:
items

['task5_blockchain.ipynb',
 'task1_lru_cache.ipynb',
 'task3_data_compression.ipynb',
 '.ipynb_checkpoints',
 'task4_active_directory.ipynb',
 'task2_finding_files.ipynb']

In [29]:
current

'testdir'

In [31]:
current = os.path.join(base_path, current)

In [32]:
os.path.isdir("./testdir/")

True

In [8]:
def test(path, suffix):
    
    c_files = list()
    
    if not os.path.exists(path):
        print("Provided path does not exist")
        return c_files
    
    # get a list of items in the directory
    items = os.listdir(path)
    
    # loop over items
    for item in items:
        # convert item name to a path
        current_path = os.path.join(path, item)
        # if its a directory, call function recursivly
        if os.path.isdir(current_path):
            c_files.extend(test(current_path, suffix))
        # if its a file and has the suffix ".c" then add it to the list
        elif os.path.isfile(current_path) and len(current_path) > 2:
            if current_path[-2:] == suffix:
                c_files.append(current_path)
                print(current_path)
                
    return c_files

In [9]:
test("./testdir", ".c")

./testdir/subdir3/subsubdir1/b.c
./testdir/subdir1/a.c
./testdir/t1.c
./testdir/subdir5/a.c


['./testdir/subdir3/subsubdir1/b.c',
 './testdir/subdir1/a.c',
 './testdir/t1.c',
 './testdir/subdir5/a.c']

In [69]:
test("./", ".h")

./testdir/subdir3/subsubdir1/b.h
./testdir/subdir1/a.h
./testdir/t1.h
./testdir/subdir5/a.h


['./testdir/subdir3/subsubdir1/b.h',
 './testdir/subdir1/a.h',
 './testdir/t1.h',
 './testdir/subdir5/a.h']