# Chapter 11 - Organizing files

## Examples

### Shutil
= Shell Utility


1. Create folder

In [None]:
from pathlib import Path

# make new directory called 'spam'
current_directory = Path.cwd()
(current_directory / 'spam').mkdir(exist_ok=True)

# make new file in spam directory
with open(current_directory / 'spam/file1.txt', 'w', encoding='utf-8') as file:
    file.write('hellaw')

2. Copy file and whole folder

In [4]:
import shutil

# by copying file1.txt, duplicate file called file2.txt, 
#shutil.copy(current_directory / 'spam/file1.txt', current_directory /'spam/file2.txt')


In [1]:
# duplicate whole folder
#shutil.copytree(current_directory / 'spam', current_directory / 'spam_backup')

3. Move files/folders

In [None]:
import shutil
from pathlib import Path
current_dir = Path.cwd()

# create new folder /spam2
#(current_dir / 'spam2').mkdir()

# move file1.txt into /spam2
shutil.move(current_dir / 'spam/file1.txt', current_dir / 'spam2')
print('Done')

Done


In [11]:
# shutil.move(current_dir / 'spam/file1.txt', current_dir / 'spam2/new_name.txt')

4. Permanently Delete files


* Calling shutil.rmtree(path) will delete (that is, remove) the entire folder tree at path, including all the files and subfolders it contains.
* Calling os.unlink(path) will delete the single file at path.
* Calling os.rmdir(path) will delete the folder at path. This folder must be empty.

In [21]:
import os
from pathlib import Path

current_dir = Path.cwd() / 'spam'
for filename in current_dir.glob('*.txt'):
    #os.unlink(filename)
    #print('Deleting', filename)
    print('Uncomment previous line')

5. Delete to recycle bin

In [23]:
import send2trash

send2trash.send2trash('./spam2/file1.txt')

Explore directory tree

In [26]:
import os
os.listdir()

['examples11.ipynb', 'spam', 'spam2', 'spam_backup']

In [29]:
from pathlib import Path
current_dir = Path.cwd()

#list(current_dir.iterdir())

In [30]:
from pathlib import Path
current_dir = Path.cwd() 
(current_dir / 'spam').mkdir(exist_ok=True)
(current_dir / 'spam/eggs').mkdir(exist_ok=True)
(current_dir / 'spam/eggs2').mkdir(exist_ok=True)
(current_dir / 'spam/eggs/bacon').mkdir(exist_ok=True)
for f in ['spam/file1.txt', 'spam/eggs/file2.txt', 'spam/eggs/file3.txt', 'spam/eggs/bacon/file4.txt']:
    with open(current_dir / f, 'w', encoding='utf-8') as file:
        file.write('Hello')

In [33]:

import os, shutil
from pathlib import Path
current_dir = Path.cwd()

for folder_name, subfolders, filenames in os.walk(current_dir / 'spam'):
    '''
    print('The current folder is ' + folder_name)
    
    for subfolder in subfolders:
        print('SUBFOLDER OF ' + folder_name + ': ' + subfolder)
    '''
    for filename in filenames:
        print(f'FILE : {filename}')
        # Rename file to uppercase:
        p = Path(folder_name)
        shutil.move(p / filename, p / filename.upper())
   
    print('')

FILE : FILE1.TXT

FILE : FILE2.TXT
FILE : FILE3.TXT

FILE : FILE4.TXT




Compressing to zipfile

In [None]:
import zipfile

# create file1.txt in /zip
with open('./zip/file1.txt', 'w', encoding='utf-8') as file_obj:
     file_obj.write('Hello' * 10000)

# compress in zip file
with zipfile.ZipFile('./zip/example.zip', 'w') as example_zip:
     example_zip.write('file1.txt', compress_type=zipfile.ZIP_DEFLATED, compresslevel=9)

In [42]:
example_zip = zipfile.ZipFile('./zip/example.zip')
print(example_zip.namelist())

file1_info = example_zip.getinfo('file1.txt')
print(file1_info.file_size)
print(file1_info.compress_size)
print(f'Compressed file is {round(file1_info.file_size / file1_info.compress_size, 2)}x smaller!')

example_zip.close()

['file1.txt']
50000
97
Compressed file is 515.46x smaller!


Extract zip files

In [45]:
import zipfile

example_zip = zipfile.ZipFile('./zip/example.zip')
example_zip.extractall('./zip/extracted')
example_zip.close()

In [51]:
example_zip = zipfile.ZipFile('./zip/example.zip')
example_zip.extract('file1.txt', './zip/extracted2')

'zip\\extracted2\\file1.txt'

## Practice Questions

Due to a matter of folder structure privacy, the following snippets of code, have not been tested and new folders not created.  
For these reasons, these codes have not been properly tested.

### Selectively copying

Write a program that walks through a folder tree and searches for files with a certain file extension (such as .pdf or .jpg). Copy these files from their current location to a new folder.

In [None]:
import os
import shutil
from pathlib import Path

current_folder = Path.cwd()
new_folder = current_folder / 'new-folder'
new_folder.mkdir(exist_ok=True)

extensions = '.pdf'

def selective_copy(extension, current_directory):
    for folder_name, subfolders, filenames in os.walk(current_directory):
        for filename in filenames:
            if filenames.lower().endswith(extension.lower()):
                source_file = Path(folder_name) / filename
                shutil.copy(source_file, new_folder)
                print(f'Copied: {source_file.name}')

# Call function
selective_copy(extensions, current_folder)

### Deleting Unneeded Files
It’s not uncommon for a few unneeded but humongous files or folders to take up the bulk of the space on your hard drive. If you’re trying to free up room on your computer, it’s more effective to identify the largest unneeded files first.  
Write a program that walks through a folder tree and searches for exceptionally large files or folders—say, ones that have a file size of more than 100MB. (Remember that, to get a file’s size, you can use os.path.getsize() from the os module.) Print these files with their absolute path to the screen.

In [None]:
import os
from pathlib import Path

current_folder = Path.cwd()

def find_huge_files(current_dir, size_mb=100):
    # convert MB to bytes
    size_threshold = size_mb * 1024 * 1024

    for folder_name, subfolders, filenames in os.walk(current_dir):
        for filename in filenames:
            file_path = Path(folder_name) / filename
            if file_path.is_file() and os.path.getsize(file_path) >= size_threshold:
                print(f'Huge file: {file_path.resolve()}\n   Size: {os.path.getsize(file_path) / (1024 * 1024):.2f} MB')
 
# call function
find_huge_files(current_folder)

### Renumbering Files
Write a program that finds all files with a given prefix, such as spam001.txt, spam002.txt, and so on, in a single folder and locates any gaps in the numbering (such as if there is a spam001.txt and a spam003.txt but no spam002.txt). Have the program rename all the later files to close this gap.

To create these example files (skipping spam042.txt, spam086.txt, and spam103.txt), run the following code:

for i in range(1, 121):
    if i not in (42, 86, 103):
        with open(f'spam{str(i).zfill(3)}.txt', 'w') as file:
            pass
    
As an added challenge, write another program that can insert gaps into numbered files (and bump up the numbers in the filenames after the gap) so that a new file can be inserted.

In [None]:
import re
from pathlib import Path

folder = Path.cwd()
prefix = 'spam'
suffix = '.txt'

# list files with prefix and suffix
files = sorted(folder.glob(f'{prefix}*{suffix}'))

# extract numbers and associate files
file_tuples = []
for f in files:
    match = re.search(rf'{prefix}(\d+){suffix}', f.name)
    if match:
        number = int(match.group(1))
        file_tuples.append((number, f))

# order by number 
file_tuples.sort(key=lambda x: x[0])

# renumber for gaps
for i, (original_number, file_path) in enumerate(file_tuples, start=1):
    new_name = f'{prefix}{str(i).zfill(3)}{suffix}'
    new_path = folder / new_name
    if file_path.name != new_name:
        print(f'Renaming {file_path.name} \n   in {new_name}')
        file_path.rename(new_path)

### Converting Dates from American- to European-Style
Say your boss emails you thousands of files with American-style dates (MM-DD-YYYY) in their names and needs them renamed to European-style dates (DD-MM-YYYY). This boring task could take all day to do by hand! Instead, write a program that does the following:

  1.  Searches all filenames in the current working directory and all subdirectories for American-style dates. Use the os.walk() function to go through the subfolders.

  2.  Uses regular expressions to identify filenames with the MM-DD-YYYY pattern in them—for example, spam12-31-1900.txt. Assume the months and days always use two digits, and that files with non-date matches don’t exist. (You won’t find files named something like 99-99-9999.txt.)

  3.  When a filename is found, renames the file with the month and day swapped to make it European-style. Use the shutil.move() function to do the renaming.

## 

In [None]:
import os
import re
import shutil

folder = os.getcwd()

# Regex for MM-DD-YYYY
date_pattern = re.compile(r'(.*?)(\d{2})-(\d{2})-(\d{4})(.*)')

for foldername, subfolders, filenames in os.walk(folder):
    for filename in filenames:
        mo = date_pattern.search(filename)
        if mo:
            before = mo.group(1)
            month = mo.group(2)
            day = mo.group(3)
            year = mo.group(4)
            after = mo.group(5)

            # Nuovo nome in formato europeo
            new_name = f'{before}{day}-{month}-{year}{after}'

            # Percorsi completi
            old_path = os.path.join(foldername, filename)
            new_path = os.path.join(foldername, new_name)

            print(f'Renaming: {old_path} -> {new_path}')
            shutil.move(old_path, new_path)