# The Shutil Module

## Copying Files

In [1]:
import shutil, os
os.chdir('../')
os.listdir()

['CH9', 'CH7', 'CH8', '.DS_Store', 'README.md', '.ipynb_checkpoints', '.git']

In [2]:
shutil.copy('CH8/regex_search.txt', 'CH9')

'CH9/regex_search.txt'

In [3]:
shutil.copy('CH8/regex_search.txt', 'CH9/regex_search_copied.txt')

'CH9/regex_search_copied.txt'

## Coping Folders

In [4]:
shutil.copytree('CH8', 'CH9/CH8_copied')

'CH9/CH8_copied'

## Permanently Deleting Files and Folders

In [9]:
## this is a good habit before "deleting" files/folders
for filename in os.listdir():
    if filename.endswith('.txt'):
#         os.unlink(filename)   # This will delete .txt files
        print(filename)

regex_search_copied.txt
regex_search.txt


## Safe Deletes with the send2trash Module

In [11]:
import send2trash

bacon_file = open('bacon.txt', 'a')  # creates the file
bacon_file.write('Bacon is not a vegetable.')

25

In [12]:
bacon_file.close()
send2trash.send2trash('bacon.txt')

In [13]:
os.listdir()

['.DS_Store',
 'CH8_copied',
 'regex_search_copied.txt',
 'Ch9_Organizing Files.ipynb',
 '.ipynb_checkpoints',
 'regex_search.txt']

In [14]:
send2trash.send2trash('regex_search.txt')
send2trash.send2trash('regex_search_copied.txt')
send2trash.send2trash('CH8_copied')
os.listdir()

['.DS_Store', 'Ch9_Organizing Files.ipynb', '.ipynb_checkpoints']

## Walking a Directory Tree

In [17]:
os.walk('../')

<generator object walk at 0x7fc0c82b8468>

In [21]:
for folder_name, sub_folders, file_names in os.walk('../'):
    print('The current folder is ' + folder_name)
    
    for sub_folder in sub_folders:
        print('SUBFOLDER OF ' + folder_name + ': ' + sub_folder)
    
    for file_name in file_names:
        print('FILE INSIDE ' + folder_name + ': ' + file_name)
    
    print()

The current folder is ../
SUBFOLDER OF ../: CH9
SUBFOLDER OF ../: CH7
SUBFOLDER OF ../: CH8
SUBFOLDER OF ../: .ipynb_checkpoints
SUBFOLDER OF ../: .git
FILE INSIDE ../: .DS_Store
FILE INSIDE ../: README.md

The current folder is ../CH9
SUBFOLDER OF ../CH9: .ipynb_checkpoints
FILE INSIDE ../CH9: .DS_Store
FILE INSIDE ../CH9: Ch9_Organizing Files.ipynb

The current folder is ../CH9/.ipynb_checkpoints
FILE INSIDE ../CH9/.ipynb_checkpoints: Ch9_Organizing Files-checkpoint.ipynb

The current folder is ../CH7
SUBFOLDER OF ../CH7: .ipynb_checkpoints
FILE INSIDE ../CH7: Ch.7 Regex.ipynb

The current folder is ../CH7/.ipynb_checkpoints
FILE INSIDE ../CH7/.ipynb_checkpoints: Ch.7 Regex-checkpoint.ipynb

The current folder is ../CH8
FILE INSIDE ../CH8: madlibs_template.txt
FILE INSIDE ../CH8: mad_libs.py
FILE INSIDE ../CH8: .DS_Store
FILE INSIDE ../CH8: mcb.db
FILE INSIDE ../CH8: madlibs_template_Answered.txt
FILE INSIDE ../CH8: regex_search.py
FILE INSIDE ../CH8: regex_search.txt
FILE INSIDE ../

## Compressing Files with the zipfile Module

### Creating and Adding to Zip Files

In [26]:
import zipfile
with zipfile.ZipFile('new.zip', 'w') as newZip:
    newZip.write('spam.txt', compress_type=zipfile.ZIP_DEFLATED)

### Reading ZIP Files

In [31]:
with zipfile.ZipFile('new.zip') as example_zip:
    print(example_zip.namelist())
    spam_info = example_zip.getinfo('spam.txt')
    print(spam_info.file_size)
    print(spam_info.compress_size)
    print("Compressed file is {}x smaller!".format(
                    round(spam_info.file_size / spam_info.compress_size, 2)))

['spam.txt']
172
125
Compressed file is 1.38x smaller!


### Extracting from ZIP files

In [33]:
with zipfile.ZipFile('new.zip') as example_zip:
    example_zip.extractall()

In [36]:
with zipfile.ZipFile('new.zip') as example_zip:
    print(example_zip.extract('spam.txt', './some/new/folders'))

some/new/folders/spam.txt


## Project: Renaming Files with American-Style Dates to European-Style Dates
Amercian-style dates(MM-DD-YYYY) --> European-style dates(DD-MM-YYYY). 
program does:
- it searches all the filenames in the current working directory for American-style dates
- when one is found, it renames the file with the month and day swapped to make it European-style

In [None]:
# %load Renaming_Files.py
#!/usr/bin/env python3
"""
Created on Wed Aug 21 10:36:59 2019

@author: soohyeonkim

Renaming_Files.py - Renames filenames with American mm-dd-yyyy date format
to European dd-mm-yyyy
"""

import shutil, os, re

# TODO: Create a regex that matches files with the American date format
date_patter = re.compile(r"""
                    ^(.*?)           # all text before the date
                    ((0|1)?\d)-      # one or two digits for the month
                    ((0|1|2|3)?\d)-  # oen or two digits for the day
                    ((19|20)\d{2})   # four digits for the year
                    (.*?)$           # all text after the date
                         """, re.X)

# TODO: loop over the files in the working directory
for amer_filename in os.listdir():
    mo = datePattern.search(amer_filename)

    # TODO: skip files without a date
    if mo == None:
        continue

    # TODO: Get the different parts of the filename.
    before_part = mo.group(1)
    month_part = mo.group(2)
    day_part    = mo.group(4)
    year_part   = mo.group(6)
    after_part  = mo.group(8)

    # TODO: Form the European-style filename.
    euro_filename = before_part + day_part + '-' \
                   + month_part + '-'           \
                   + year_part + '-'
    
    # TODO: Get the full, absolute file paths
    abs_working_dir = os.path.abspath('.')
    amer_filename = os.path.join(abs_working_dir, amer_filename)
    euro_filname = os.path.join(abs_working_dir, euro_filename)

    # TODO: Rename the fiels
    print('Renaming "{}" to "{}"...'.format(amer_filename, euro_filename))
#    shutil.move(amer_filename, euro_filename) # uncomment after testing



## Project: Backing Up a Folder into a Zip File
You'd like to keep different versions, so you want the ZIP file's filename to increment each time it is made; for example, AlsPythonBook_1.zip, AlsPythonBook_2.zip and so on. 

In [48]:
# %load backup_to_zip.py
#!/usr/bin/env python3
"""
Created on Wed Aug 21 11:32:13 2019

@author: soohyeonkim

Copies an entire folder and its content into a ZIP file whose filename
incrementsf
"""

import zipfile, os

def backup_to_zip(folder):
    # Backup the entire contents of "folder" into a ZIP file.
    folder = os.path.abspath(folder) # make surefolder is absolute
    
    # Figure out the filename this code should use based on
    # what files already exist
    number = 1
    while True:
        zip_filename = os.path.basename(folder) \
                            + '_' + str(number) + '.zip'
        if not os.path.exists(zip_filename):
            break
        number += 1
    
    # Create the ZIP file
    print('Creating {}...'.format(zip_filename))
    backup_zip = zipfile.ZipFile(zip_filename, 'w')
    
    # Walk the entire folder tree and compress the files in each folder. 
    for foldername, subfolders, filenames in os.walk(folder):
        print('Adding files in {}...'.format(foldername))
        # Add the current folder to the zip file.
        backup_zip.write(foldername)
        # Add all the files in this folder to the zip file.
        for filename in filenames:
            new_base = os.path.basename(folder) + '_'
            if filename.startswith(new_base) and filename.endswith('.zip'):
                continue    # Don't back up the backup zip files
            
            backup_zip.write(os.path.join(foldername, filename))
        
    backup_zip.close()
        
    print('Done.')
       
#### TEST ********
backup_to_zip('.')

Creating CH9_2.zip...
Adding files in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9...
Adding files in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/.ipynb_checkpoints...
Adding files in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/some...
Adding files in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/some/new...
Adding files in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/some/new/folders...
Done.


# Practice Projects

## 1. Selective copy

In [55]:
# %load selective_copy.py
#!/usr/bin/env python3
"""
Created on Wed Aug 21 12:46:58 2019

@author: soohyeonkim

walks through a folder tree and searches for files with a certain file
extension (e.g., .pdf, .jpg). Copy these files from whatever location 
they are in to a new folder.

"""

import os, re, shutil

def selective_copy():
    # TODO: get current location and show it to a user
    cwd = os.getcwd()
    print("Your current location is {}".format(cwd))
    
    # TODO: Get inputs for root, extension, destination path
    while True:
        root = input("What location do you want to start search from? ").strip()
        if os.path.exists(root): # make sure it is legitimate
            break
        else:
            print("Please input root directory in relative or absolute path")
    
    ext = input("What file format are you looking for? (e.g., .jpg, .txt) ")\
                    .strip().lower()
    
    if not ext.startswith("."): # in case forget putting "."
        ext = "." + ext
    
    while True:
        dest =  input("Where do you want copies located? Please include folder name ")\
                    .strip()
        if os.path.exists(dest): # make sure it exists
            break   # exists
        else:
            # if not, make new one
            try:
                 os.mkdir(dest)
            except OSError: # in case of error
                print ("Creation of the directory %s failed" % dest)
                print("Please input destination in relative or absolute path")
                continue # go get input again
            
            if os.path.exists(dest): # check again if made 
                break
            else: # if not, address is wrong
                print("Please input destination in relative or absolute path")
    
    
    # beware that it can be either relative or absolute path
    # and we need absolute path
    if not os.path.isabs(root): # if relative path
        root = os.path.abspath(root)
    if not os.path.isabs(dest):
        dest = os.path.abspath(dest)
    
    # TODO: make regex pattern
    search_pattern = re.compile(r"^(.*?)"+ext+"$")
    
    # TODO: walk through the folder and detect files and copy to destination
    for foldername, subfolders, filenames in os.walk(root):
        # we don't want hidden folders digged
        if os.path.basename(foldername).startswith("."):
            continue
        
        if os.path.basename(foldername) == os.path.basename(dest):
            continue  # we don't have to copy the copied ones
        
        print("Searching in {}...".format(foldername))
        # go through all files
        for filename in filenames:
            # we don't want hidden files searched
            if filename.startswith("."):
                continue
            
            base = os.path.basename(filename)
            
            if search_pattern.search(base): # if matches
                shutil.copy(filename, dest) # copy to destination
                print("\t{} is copied.".format(base))
    
    print("Done!")

#### test *******    
selective_copy()


Your current location is /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9
What location do you want to start search from? .
What file format are you looking for? (e.g., .jpg, .txt) .py
Where do you want copies located? Please include folder name ./copied
Searching in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9...
	delete_unneeded_files.py is copied.
	Backup_to_zip.py is copied.
	selective_copy.py is copied.
	Renaming_Files.py is copied.
Searching in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/some...
Searching in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/some/new...
Searching in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_

In [57]:
os.listdir()

['CH9_2.zip',
 'CH9_1.zip',
 '.DS_Store',
 'copied',
 'new.zip',
 'spam.txt',
 'Backup_to_zip.py',
 'selective_copy.py',
 'Ch9_Organizing Files.ipynb',
 'Renaming_Files.py',
 '.ipynb_checkpoints',
 'some']

## 2. Deleting Unneeded Files

In [63]:
# %load delete_unneeded_files.py
#!/usr/bin/env python3
"""
Created on Wed Aug 21 14:01:30 2019

@author: soohyeonkim

walks through a folder tree and searches for exceptionally large files or 
folders - say, once that have a file size of more than 100MB. 
Print these files with their absolute path to the screen
"""

import os

def detect_unneeded_files(cwd = os.getcwd(), cutoff=100):
    print("Search start from " + os.path.abspath(cwd))
    print("It will show files larger than {}MB".format(cutoff))
    print()
    
    cutoff = cutoff * 10**6 
    founds = []
    
    # TODO: walk through a folder
    for foldername, subfolders, filenames in os.walk(cwd):
        # TODO: skip small folders to save time
        if os.path.getsize(foldername) <= cutoff:
            continue
        
        # TODO: print where you are to the screen in absolute path
        foldername = os.path.abspath(foldername)
        print("  Searching in {} ...".format(foldername))
        
        # TODO: search for large files/folders
        count = 0
        for filename in filenames:
            if os.path.getsize(filename) >= cutoff:
                filename = os.path.abspath(filename)
                founds += [filename]
                count += 1
        print("\tFound {} files!".format(count))
    
    # TODO: print large files to screen
    print("\n---- results ----")
    for i in range(len(founds)):
        print(" [{}] ".format(i+1) + founds[i])
    

#### TEST ************ 
detect_unneeded_files(cutoff=0.0001)

Search start from /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9
It will show files larger than 0.00015MB

  Searching in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9 ...
	Found 10 files!
  Searching in /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/copied ...
	Found 4 files!

---- results ----
 [1] /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/CH9_2.zip
 [2] /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/CH9_1.zip
 [3] /Users/soohyeonkim/GoogleDrive/Coding/python_coding/Automate_The_Boring_Stuff_With_Python/Automate_The_Boring_Stuff_With_Python/CH9/delete_unneeded_

## 3. Filling in the Gaps

In [85]:
# %load filling_gaps.py
#!/usr/bin/env python3
"""
Created on Wed Aug 21 14:58:25 2019

@author: Soo Hyeon Kim

With a given prefix, such as spam001.txt, spam003.txt, adn so on. in a single
folder and locates any gaps in the numbering. The program will rename all 
the later files to close this gap
"""

import os, re, shutil

# TODO: get a parameter that determines adjust or add between gaps, 
#       Also, weather save file or not

def filling_gaps(cwd=os.getcwd(), adjust=True):
   
    cwd = os.path.abspath(cwd) # make sure cwd is absolute path
    prefix = input("What is the prefix? ")
    
    # group 1: all
    # group 2: numbering
    # group 3: extension
    regex_pattern = re.compile(\
                    "(" + prefix  + "[._\-\s]?([\d]+))(\.[\d\w]+)$")
     
    # TODO: walk through a folder       
    file_list = os.listdir()
    numberings = []
    
    if file_list: # if not empty listextension = found[0][2]
        i = 0
        for file in file_list:
            # TODO: find numbering files with regex pattern
            found = regex_pattern.findall(file)
            
            if found:
                # TODO: Extract all numbers in numbering part in order
                numberings.append(found[0][1])
                if i == 0:
                    extension = found[0][2] # extract extension
#                    print(found)
#                    print(extension)
                    i = 1 # to repeat extrating extension            
            
    else:
        print("Empty folder. Nothing to show")
        return # terminate the program
    
    if not numberings:
        print("No search result. Bye")
        return 
    
    # sort in ascending order of value
    numberings.sort(key=lambda x: int(x))
    # for now for convinience convert spamto int
    numberings = [int(number) for number in numberings]
    min_num = min(numberings)
    max_num = max(numberings)
    # numbers not in numberings. i.e., complement set of numberings
    numberings_c = [x for x in range(min_num, max_num) \
                                        if x not in numberings]
    if not numberings_c:
        print("All seems right")
        return 
    
    min_gap = min(numberings_c) # minimum gap value
    
    ## let's make hashable dictionary {original: 'right form'}
    length_of_out = len([x for x in numberings if x > min_gap])
    added_numbers = list(range(min_gap, min_gap + length_of_out))
    numberings_adj = [x for x in numberings if x < min_gap] + added_numbers
    # to unify format
    numberings_str = ['0'*(len(str(max(numberings_adj))) - \
                           len(str(int(num)))) \
                       + str(num) for num in numberings_adj]
    # dictionary for search
    numberings_dict = {k:v for k, v in zip(numberings, numberings_str)}
    
    min_gap_str = '0' * (len(numberings_str[0]) - len(str(min_gap))) \
                    + str(min_gap)
    
    
    # TODO: check if it is adjust mode or add mode
    if adjust:
        # TODO: if adjust mode, 
        #       re-organize the order in ascending order from min_gap
        file_list.sort(key= lambda x: len(x)) # in order not to overwrite already renamed one
#         print(file_list)
        
        for file in file_list:
            # TODO: find numbering files with regex pattern
            found = regex_pattern.findall(file)
            
            if found:
                
                # adjust name in uniform format
                num_str = numberings_dict[int(found[0][1])]
#                print(numberings_dict, found, prefix + num_str + found[0][2])
                # rename
                shutil.move(\
                        os.path.join(cwd, file), \
                        os.path.join(cwd, prefix + num_str + found[0][2])\
                            )
        print("Done adjusting!")
    
    else:
        # TODO: if add mode, add numbering from min_gap
        print("I suggest you save file as '{}'"\
              .format(prefix+min_gap_str+extension))
       
      
#### test *******
filling_gaps(cwd='./some', adjust=False)

What is the prefix? spam
I suggest you save file as 'spam3.txt'
