In [1]:
from datetime import datetime
import re
from os import scandir, remove, stat


def convert_date(timestamp):
    """Converts the time stamp to a human readable format
    
    Args: 
        timestamp(`str`): utc time string
    
    Returns:
        `str`: human readable time string -- 01 Jan 2001 12:01"""
    d = datetime.utcfromtimestamp(timestamp)
    formated_date = d.strftime('%d %b %Y %H:%M')
    return formated_date


# collect the files in a set
def get_files(path):
    """Generates a two sets of strings representing the files in the folder 
        (name and date of last modification).
        
    Args:
        path(`str`): path to generate sets for
        
    Retruns:
        `tuple` of `set`: element 0 includes filenames, element 1 includes last modification time"""
    set_files = set()
    set_filenames = set()
    dir_entries = scandir(path)
    for entry in dir_entries:
        if entry.is_file():
            info = entry.stat()
            e = f'{entry.name} {convert_date(info.st_mtime)}'
#             print(f'{entry.name}\t Last Modified: {convert_date(info.st_mtime)}')
            set_files.add(e)
            set_filenames.add(entry.name)
    return set_files, set_filenames


# are you sure you want to delete?
def del_func(filepath):
    """Deletes a file from a folder with user confirmation
    
    Args:
        filepath(`str`): path to delete
    
    Returns:
        None
    """
    
    answer = input(f'Are you sure you want to delete {filepath}? (Y or N):  ')
    if answer == 'Y':
        os.remove(filepath)
        print('File deleted')              

In [3]:
# This program scans two folders and compares the files in them and then asks if you want to delete the exact duplicates.

# For security's sake, folders to be compared can only be allowed to be in a certain Path. Don't want you deleting OS necessary files... 
Path = '/Users/martinesteves/Pictures/'
print(f'The folders to be compared have to be sub(sub)directories of {Path}.')

# Input the folders for comparison.
# Duplicates will eventually be removed from the first folder.
paths = [f'{Path}',f'{Path}/MJE photos Jen G']
paths = ['','']
for i in [0,1]:
    while True:
        try:
            paths[i] = input(f'Enter the full path to folder{i+1}: ')
# Check if path exists, that the folder exists in the path allowed, and that the directories are different.
            if bool(stat(paths[i])) == True and Path in paths[i] and paths[1] != paths[0]:
                break
            else: 
                print(f'The folder has to be {Path} or be located in this directory. You also can/t compare the same file to itself. Please enter a new path to the directory')
# Error Handling
        except FileNotFoundError as err:
            print('Filepath does not exist, try again')
        
        
# Call the function to retrieve the filenames and dates of modification in a set for each folder.
set1, set_filenames1 = get_files(paths[0])
set2, set_filenames2 = get_files(paths[1])

# Extract a set of duplicates by comparing the two sets
doubles_set = set1.intersection(set2)

# Remove duplicates from the first folder
if len(doubles_set) == 0:
    print('There are no doubles.')
for file in doubles_set:
    filename = re.search('^[a-zA-Z\d_ -.]*\.[a-zA-Z\d]*', file)
    del_func(f'{path1}/{filename.group(0)}')

The folders to be compared have to be sub(sub)directories of /Users/martinesteves/Pictures/.
Enter the full path to folder1: 
Filepath does not exist, try again


KeyboardInterrupt: Interrupted by user

In [6]:
%alias nbconvert $HOME/bin/develtools/nbconvert DuplicateFile.Remover.ipynb
%nbconvert

/Users/martinesteves/bin/develtools/nbconvert, DuplicateFile.Remover.ipynb,
