# Learn about file and folder with os.scandir

In [36]:
import os
import time

# os.scandir returns an iterator of DirEntry objects
# DirEntry objects have the following attributes:
#  name: the name of the file
#  path: the full path to the file
#  is_dir(): returns True if the entry is a directory
#  is_file(): returns True if the entry is a file
# we apply the set() function to convert it to a set
current_set = set(os.scandir('./test_folder'))
print(current_set)
print(len(current_set))

# Create a set containing the 'test.csv' entry
my_set = {entry for entry in current_set if entry.name == 'test.csv'}
print(my_set)

print(my_set in current_set)

{<DirEntry 'sub_folder'>, <DirEntry 'test1.csv'>, <DirEntry 'test.csv'>, <DirEntry 'test2.csv'>}
4
{<DirEntry 'test.csv'>}
False


In [72]:
import os
import time

# os.scandir returns an iterator of DirEntry objects
# DirEntry objects have the following attributes:
#  name: the name of the file
#  path: the full path to the file
#  is_dir(): returns True if the entry is a directory
#  is_file(): returns True if the entry is a file
# we apply the set() function to convert it to a set
current_set = list(os.scandir('./test_folder'))
print(current_set)

# Create a set containing the 'test.csv' entry
my_set = [entry for entry in current_set if entry.name == 'test.csv']
print(my_set)

all(item in current_set for item in my_set)

[<DirEntry '20230319-120052.txt'>, <DirEntry '20230319-120055.txt'>, <DirEntry 'sub_folder'>, <DirEntry 'test.csv'>, <DirEntry 'test1.csv'>, <DirEntry 'test2.csv'>]
[<DirEntry 'test.csv'>]


True

In [73]:
sub_set = [item for item in my_set if item not in current_set]
print(sub_set)

[]


In [74]:
my_set = list(os.scandir(('./')))

sub_set1 = [item for item in my_set if item not in current_set]

print(my_set)

[<DirEntry '.git'>, <DirEntry '.gitignore'>, <DirEntry 'HelloWorld.py'>, <DirEntry 'HelloWorld_2023-03-18_21-54-53.031451.txt'>, <DirEntry 'monitor_folder.ipynb'>, <DirEntry 'monitor_folder.py'>, <DirEntry 'README.md'>, <DirEntry 'test_folder'>]


In [38]:
for file in current_set:
  print(file)
  print(file.name)

<DirEntry 'sub_folder'>
sub_folder
<DirEntry 'test1.csv'>
test1.csv
<DirEntry 'test.csv'>
test.csv
<DirEntry 'test2.csv'>
test2.csv


In [39]:
folder_path = './test_folder'
initial_state = set(os.scandir(folder_path))
current_state = set(os.scandir(folder_path))
new_files = current_state - initial_state
print(new_files)

{<DirEntry 'test1.csv'>, <DirEntry 'test.csv'>, <DirEntry 'sub_folder'>, <DirEntry 'test2.csv'>}


In [40]:
new_files.issubset(initial_state)

False

In [41]:
print(initial_state)
print(current_state)
print(my_set)


{<DirEntry 'test1.csv'>, <DirEntry 'test2.csv'>, <DirEntry 'test.csv'>, <DirEntry 'sub_folder'>}
{<DirEntry 'test1.csv'>, <DirEntry 'test.csv'>, <DirEntry 'sub_folder'>, <DirEntry 'test2.csv'>}
{<DirEntry 'test.csv'>}


In [55]:
import os

def diff_2_sets_of_DirEntry(set1, set2):
    # Convert the directory entries to strings
    set1_str = set(str(entry) for entry in set1)
    set2_str = set(str(entry) for entry in set2)
    print(set1)
    print(set2)

    # Find the difference between the sets
    diff_str = set1_str.difference(set2_str)
    print(diff_str)
    
    # Filter out empty strings
    diff_str = [entry_str for entry_str in diff_str if entry_str]
    
    # Convert the strings back to DirEntry objects
    diff = set()
    for entry_str in diff_str:
        if entry_str:
            try:
                diff.add(os.scandir(entry_str).path)
            except OSError as e:
                print(f"Error scanning {entry_str}: {e}")
                continue

    # Print the difference
    return diff

diff = diff_2_sets_of_DirEntry(initial_state, my_set)

{<DirEntry 'test1.csv'>, <DirEntry 'test2.csv'>, <DirEntry 'test.csv'>, <DirEntry 'sub_folder'>}
{<DirEntry 'test.csv'>}
{"<DirEntry 'test2.csv'>", "<DirEntry 'sub_folder'>", "<DirEntry 'test1.csv'>"}
Error scanning <DirEntry 'test2.csv'>: [WinError 123] The filename, directory name, or volume label syntax is incorrect: "<DirEntry 'test2.csv'>"
Error scanning <DirEntry 'sub_folder'>: [WinError 123] The filename, directory name, or volume label syntax is incorrect: "<DirEntry 'sub_folder'>"
Error scanning <DirEntry 'test1.csv'>: [WinError 123] The filename, directory name, or volume label syntax is incorrect: "<DirEntry 'test1.csv'>"


In [56]:
diff_2_sets_of_DirEntry(initial_state, my_set)

{<DirEntry 'test1.csv'>, <DirEntry 'test2.csv'>, <DirEntry 'test.csv'>, <DirEntry 'sub_folder'>}
{<DirEntry 'test.csv'>}
{"<DirEntry 'test2.csv'>", "<DirEntry 'sub_folder'>", "<DirEntry 'test1.csv'>"}
Error scanning <DirEntry 'test2.csv'>: [WinError 123] The filename, directory name, or volume label syntax is incorrect: "<DirEntry 'test2.csv'>"
Error scanning <DirEntry 'sub_folder'>: [WinError 123] The filename, directory name, or volume label syntax is incorrect: "<DirEntry 'sub_folder'>"
Error scanning <DirEntry 'test1.csv'>: [WinError 123] The filename, directory name, or volume label syntax is incorrect: "<DirEntry 'test1.csv'>"


set()

In [57]:
print(diff)

set()
