In [None]:
import os, shutil, glob, subprocess

In [None]:
def copy_files_to_single_directory(src, dest):
    for root, dirs, files in os.walk(src):
        for file in files:
            src_file = os.path.join(root, file)
            dest_file = os.path.join(dest, file)
            
            # Check if a file with the same name already exists in the destination
            counter = 1
            base, extension = os.path.splitext(dest_file)
            while os.path.exists(dest_file):
                dest_file = f"{base}-{counter}{extension}"
                counter += 1
            
            shutil.copy2(src_file, dest_file)

In [None]:
%%time
source_directory = '/Users/nrapstin/Downloads/Fall_2023_photo_dump'
destination_directory = '/Users/nrapstin/Downloads/backup-photos'
copy_files_to_single_directory(source_directory, destination_directory)

In [None]:
def get_files_with_extensions(directory, extensions):
    files = []
    for ext in extensions:
        files.extend(glob.glob(os.path.join(directory, f'*{ext}')))
        files.extend(glob.glob(os.path.join(directory, f'*{ext.upper()}')))
    return files

In [None]:
from PIL import Image
from PIL.ExifTags import TAGS
from datetime import datetime

In [None]:
directory = '/Users/nrapstin/Downloads/backup-photos'

In [None]:
img_extensions = [".png", ".jpg", ".jpeg", ".gif"]  # List of extensions

In [None]:
img_list = get_files_with_extensions(directory, img_extensions)

In [None]:
len(img_list)

In [None]:
mov_extensions = [".mov", ".mp4"]  # List of extensions

In [None]:
mov_list = get_files_with_extensions(directory, mov_extensions)

In [None]:
len(mov_list)

In [None]:
def get_next_available_filename(filepath):
    directory, filename = os.path.split(filepath)
    file_base, extension = os.path.splitext(filename)

    counter = 1
    new_filename = f"{file_base}-{counter}{extension}"
    new_filepath = os.path.join(directory, new_filename)

    while os.path.exists(new_filepath):
        counter += 1
        new_filename = f"{file_base}-{counter}{extension}"
        new_filepath = os.path.join(directory, new_filename)

    return new_filepath

In [None]:
img_with_meta = {}

# loop over all exported images
for i, img in enumerate(img_list):
    
    # read the image data using PIL
    image = Image.open(img)
    # extract EXIF data
    exifdata = image.getexif()
    
    if exifdata:
        # iterating over all EXIF data fields
        for tag_id in exifdata:

            # get the tag name, instead of human unreadable tag id
            tag = TAGS.get(tag_id, tag_id)
            data = exifdata.get(tag_id)

            # decode bytes 
            if isinstance(data, bytes):
                data = data.decode()
            
            
            # we only care about the date tag
            if tag == 'DateTime':
                date_str = data
                
                # Parse the string into a datetime object
                dt = datetime.strptime(date_str, '%Y:%m:%d %H:%M:%S')
                img_with_meta[i] = { 'img_path' : img, 'dt' : dt}
                


In [None]:
len(img_with_meta)

In [None]:
for key, val in img_with_meta.items():
#     print(key, val)
    # get date
    dt = val['dt']
    
    # Create new directory name based on year-month
    year_folder = os.path.join(directory, str(dt.year))
    month_folder = os.path.join(year_folder, f"{dt.year}-{dt.month:02d}")

    # Create year and year-month directories if they don't exist
    os.makedirs(month_folder, exist_ok=True)
    
    # check if file name corresponds to the date
    img_path_original = val['img_path']
    
    file = img_path_original.split('/')[-1] 
    file, extension = os.path.splitext(file)
    yearmonth = file.split('-')[0]
    year = yearmonth[:4]
    month = yearmonth[4:6]
    year_dt = dt.year
    month_dt = dt.month
    
#     if str(year) != str(year_dt) or str(month) != f"{month_dt:02d}":
#         # if year or month do not match
#         print(key, val)
#         print('date DOES NOT match')
        
    # Create new filename with year, month, day, hour, minute, and second
    base_filename = dt.strftime('%Y%m%d-%H%M%S')
    new_filename = base_filename + extension
    new_file_path = os.path.join(month_folder, new_filename)

    # Check if file exists, and if so, append a digit starting at 1
    if os.path.exists(new_file_path):
        # pick new name
        new_filename = get_next_available_filename(new_file_path)
        new_file_path = os.path.join(month_folder, new_filename)

    print(f"dt: {dt}")
    print(f"img_path_original: {img_path_original}")
    print(f"new_file_path: {new_file_path}")


    # Move the file to the new location
    shutil.move(img_path_original, new_file_path)


In [None]:
# make a new list without metadata
img_list = get_files_with_extensions(directory, img_extensions)

In [None]:
len(img_list)    

In [None]:
def get_creation_date(file_path):
    date_format = "%Y-%m-%d %H:%M:%S"
    
    if file_path.endswith((".png", ".jpg", ".jpeg", ".PNG", ".JPG", ".gif")):
        with Image.open(file_path) as img:
            if 'exif' in img.info:
                exif_data = img._getexif()
                if exif_data and 36867 in exif_data:  # 36867 is the tag for DateTimeOriginal
                    return datetime.strptime(exif_data[36867], '%Y:%m:%d %H:%M:%S')

    return None

In [None]:
# loop over all exported images
for i, img in enumerate(img_list):
    creation_date = get_creation_date(img)
    if creation_date and creation_date > datetime(1970, 1, 1):
        print(i, img)
        
        # Create new directory name based on year-month
        year_folder = os.path.join(directory, str(creation_date.year))
        month_folder = os.path.join(year_folder, f"{creation_date.year}-{creation_date.month:02d}")

        # Create year and year-month directories if they don't exist
        os.makedirs(month_folder, exist_ok=True)
            
        # Create new filename with year, month, day, hour, minute, and second
        file = img.split('/')[-1] 
        file, extension = os.path.splitext(file)
        base_filename = creation_date.strftime('%Y%m%d-%H%M%S')
        new_filename = base_filename + extension
        new_file_path = os.path.join(month_folder, new_filename)
           
        # Check if file exists, and if so, append a digit starting at 1
        if os.path.exists(new_file_path):
            # pick new name
            new_filename = get_next_available_filename(new_file_path)
            new_file_path = os.path.join(month_folder, new_filename)
            
        print(f"creation_date: {creation_date}")
        print(f"img_path_original: {img}")
        print(f"new_file_path: {new_file_path}")
        
        
        # Move the file to the new location
        shutil.move(img, new_file_path)


In [None]:
# make a new list without metadata
img_list = get_files_with_extensions(directory, img_extensions)

In [None]:
len(img_list)

In [None]:
os.makedirs(os.path.join(directory, "unsorted"), exist_ok=True)

In [None]:
# move these into unsorted
for i, img in enumerate(img_list):
    
    # Create new filename with year, month, day, hour, minute, and second
    file = img.split('/')[-1] 
    file, extension = os.path.splitext(file)
    base_filename = 'nodate'
    new_filename = base_filename + extension
    new_file_path = os.path.join(os.path.join(directory, "unsorted"), new_filename)

    # Check if file exists, and if so, append a digit starting at 1
    if os.path.exists(new_file_path):
        # pick new name
        new_filename = get_next_available_filename(new_file_path)
        new_file_path = os.path.join(os.path.join(directory, "unsorted"), new_filename)
    
    print(f"img_path_original: {img}")
    print(f"new_file_path: {new_file_path}")
        
    # Move the file to the new location
    shutil.move(img, new_file_path)

In [None]:
vid_with_meta = {}
# loop over all exported videos
for i, v in enumerate(mov_list):
    # read the mov metadata
    metadata = subprocess.getoutput("ffmpeg -i %s"  % (v))
    
    # get the date from metadata
    if metadata.find('com.apple.quicktime.creationdate:') != -1:
        date_str = metadata[metadata.find('com.apple.quicktime.creationdate:'):].split("\n")[0].split(": ")[1]
        # Parse the string into a datetime object
        print(f"{i}: {v}")
        dt = datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S%z')
        print(f"date: {dt}")
        
        vid_with_meta[i] = { 'vid_path' : v, 'dt' : dt}


In [None]:
len(vid_with_meta)

In [None]:
for key, val in vid_with_meta.items():
#     print(key, val)
    # get date
    dt = val['dt']
    
    # Create new directory name based on year-month
    year_folder = os.path.join(directory, str(dt.year))
    month_folder = os.path.join(year_folder, f"{dt.year}-{dt.month:02d}")

    # Create year and year-month directories if they don't exist
    os.makedirs(month_folder, exist_ok=True)
    
    vid_path_original = val['vid_path']
    
    file = vid_path_original.split('/')[-1] 
    file, extension = os.path.splitext(file)
    yearmonth = file.split('-')[0]
    year = yearmonth[:4]
    month = yearmonth[4:6]
    year_dt = dt.year
    month_dt = dt.month
    
        
    # Create new filename with year, month, day, hour, minute, and second
    base_filename = dt.strftime('%Y%m%d-%H%M%S')
    new_filename = base_filename + extension
    new_file_path = os.path.join(month_folder, new_filename)

    # Check if file exists, and if so, append a digit starting at 1
    if os.path.exists(new_file_path):
        # pick new name
        new_filename = get_next_available_filename(new_file_path)
        new_file_path = os.path.join(month_folder, new_filename)

    print(f"dt: {dt}")
    print(f"vid_path_original: {vid_path_original}")
    print(f"new_file_path: {new_file_path}")


    # Move the file to the new location
    shutil.move(vid_path_original, new_file_path)


In [None]:
mov_list = get_files_with_extensions(directory, mov_extensions)

In [None]:
len(mov_list)

In [None]:
for i, v in enumerate(mov_list):
#     print(f"{i}: {v}")
    # read the mov metadata
    metadata = subprocess.getoutput("ffmpeg -i %s"  % (v))
    
    # get the date from metadata
    if metadata.find('creation_time') != -1:
        date_str = metadata[metadata.find('creation_time'):].split("\n")[0].split(": ")[1]
        # Parse the string into a datetime object
        date_str = date_str.replace('Z', '+0000')
    
        dt = datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z')
#         print(f"date: {dt}")
        
        # check if file name corresponds to the date
#         img_path_original = val['img_path']

        file = v.split('/')[-1] 
        file, extension = os.path.splitext(file)
        yearmonth = file.split('-')[0]
        year = yearmonth[:4]
        month = yearmonth[4:6]
        
        year_dt = dt.year
        month_dt = dt.month
        
        if str(year) != str(year_dt) or str(month) != f"{month_dt:02d}":
            # if year or month do not match
            print(i, v)
            print('date DOES NOT match')
        
        # Create new directory name based on year-month
        year_folder = os.path.join(directory, str(dt.year))
        month_folder = os.path.join(year_folder, f"{dt.year}-{dt.month:02d}")

        # Create year and year-month directories if they don't exist
        os.makedirs(month_folder, exist_ok=True)
    
        # Create new filename with year, month, day, hour, minute, and second
        base_filename = dt.strftime('%Y%m%d-%H%M%S')
        new_filename = base_filename + extension
        new_file_path = os.path.join(month_folder, new_filename)

        # Check if file exists, and if so, append a digit starting at 1
        if os.path.exists(new_file_path):
            # pick new name
            new_filename = get_next_available_filename(new_file_path)
            new_file_path = os.path.join(month_folder, new_filename)

        print(f"dt: {dt}")
        print(f"vid_path_original: {v}")
        print(f"new_file_path: {new_file_path}")


        # Move the file to the new location
        shutil.move(v, new_file_path)

In [None]:
mov_list = get_files_with_extensions(directory, mov_extensions)

In [None]:
file = mov_list[0]

In [None]:
metadata = subprocess.getoutput("ffmpeg -i %s"  % (file))

In [None]:
metadata

In [None]:
from hachoir.parser import createParser
from hachoir.metadata import extractMetadata

In [None]:
parser = createParser(file)
metadata = extractMetadata(parser)
creation_date = metadata.get('creation_date')

In [None]:
creation_date

In [None]:
mov_list

In [None]:
# move these into unsorted
for i, v in enumerate(mov_list):
    
    # Create new filename with year, month, day, hour, minute, and second
    file = v.split('/')[-1] 
    file, extension = os.path.splitext(file)
    base_filename = 'nodate'
    new_filename = base_filename + extension
    new_file_path = os.path.join(os.path.join(directory, "unsorted"), new_filename)

    # Check if file exists, and if so, append a digit starting at 1
    if os.path.exists(new_file_path):
        # pick new name
        new_filename = get_next_available_filename(new_file_path)
        new_file_path = os.path.join(os.path.join(directory, "unsorted"), new_filename)
    
    print(f"v_path_original: {v}")
    print(f"new_file_path: {new_file_path}")
        
    # Move the file to the new location
    shutil.move(v, new_file_path)