Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 341 lines (309 sloc) 12.5 KB
#!/usr/bin/env python
#Meitham 01 July 2011
__doc__ = """Organise your photos.
NAME - flexable images organiser based on EXIF details.
[python] -r -v
A flexiable python script that can operate on a tree of images and copy or
move the files into directories based on the image EXIF details. It can detect
duplicates using either MD5 or SHA, it can based the duplication on the entire
image or just the thumbnails. It has the ability to build or fix EXIF details
from images that has wrong EXIF details, such as it can correct picture taken
date on images.
The script can build a symlink of date based hierarchical directories. So you
can have your images sorted in date based folders, a photo file that was taken
at YYYY/MM/DD will end up in a folder of YYYY/MM/DD (posix) YYYY\MM\DD (win)
--version show program's version number and exit
-h, --help show this help message and exit
-v, --verbose make lots of noise [default]
-q, --quiet unless errors don't output anything
-r, --recursive operate recursively [default]
-s, --symlink follow symbolic linked directories
-m, --move delete original file from SOURCE
-d DEPTH, --depth=DEPTH
unlimited [default: 0]
-g LOG, --log=LOG log all actions [default: sys.stderr]
-i, --ignore ignore photos with missing EXIF header [default]
-p NOEXIFPATH, --process-no-exif=NOEXIFPATH
copy/moves images with no EXIF data to [default:
from optparse import OptionParser
from datetime import datetime, timedelta
import Image
import sys
import os
import shutil
import string
import hashlib
import logging, logging.handlers
import pyexiv2
except ImportError:
print "Missing dependency: please install pyexiv2 in order to use this script"
__version__ = "0.101"
IMAGE_EXT = ['.jpg','jpeg','.png']
class ImageException(Exception):
class InvalidImageFile(ImageException):
class InvalidDateTag(ImageException):
class MissingDateTag(ImageException):
class MissingImageFile(ImageException):
class DuplicateImage(ImageException):
class NoThumbnailFound(ImageException):
def isPhoto(path):
'''return true if image has a known image extension file
_, ext = os.path.splitext(path)
return string.lower(ext) in IMAGE_EXT
return False
def isExact(file1, file2, func=None):
''' checks if two files have exactly same content
if func == None:
func = thumbDigest
return func(file1) == func(file2)
except NoThumbnailFound:
# resort to full file digest
func = sha256HexDigest
# lets try thumbnail
return func(file1) == func(file2)
def sha256HexDigest(filename):
'''returns an sha256 hex digest of an input
buf = open(filename).read()
h = hashlib.sha256()
return h.hexdigest()
def thumbDigest(filename):
'''returns a digest of the thumb image
metadata = pyexiv2.ImageMetadata(filename)
if not metadata.previews:
raise NoThumbnailFound(filename)
h = hashlib.sha256()
return h.hexdigest()
class ImageFile:
''' an image file object handler
def __init__(self, fullfilepath):
''' constructor
global logger # use global logger / should really argument this
self.logger = logger
# file existance
if not os.path.exists(fullfilepath): # file missing
raise MissingImageFile('file not found %s' %fullfilepath)
# file extension
self.basename, self.ext = os.path.splitext(fullfilepath)
self.ext = self.ext.lower() # i hate uppercased extensions
if not self.ext in IMAGE_EXT:
raise InvalidImageFile('invalid image file %s' %fullfilepath)
self.metadata = pyexiv2.ImageMetadata(fullfilepath)
except IOError:
raise InvalidImageFile('invalid image file %s' %fullfilepath)
if not self.metadata:
raise MissingImageFile('missing exif info %s' %fullfilepath)
self.srcfilepath = fullfilepath
self.imagedate = self.metadata['Exif.Image.DateTime'].value
except KeyError:
self.logger.debug("Exif.Image.DateTime key is missing from exif")
# here we handle images that has no date, this will come later
raise InvalidDateTag('Exif.Image.DateTime key is missing from exif')
self.datedFileName = self.imagedate.strftime('%Y_%m_%d-%H_%M_%S')
def getUniquePath(self, base):
''' gets a proper name and path with no duplications
if self.imagedate is None:
# i should handle images with no date here"no image date for file %s " %self.srcfilepath)
# i could either return the file creation date
# or just a string.
return None
seq = 0
imd = self.imagedate
self.dstdir = os.path.join(base, str(imd.year), str(imd.month), str(
while True:
fileName = self.datedFileName
if seq:
fileName += self.ext
self.dstfilename = os.path.join(self.dstdir, fileName)
if os.path.exists(self.dstfilename):"image with similar date/time stamp already exists %s " %self.dstfilename)
if isExact(self.srcfilepath, self.dstfilename):".. this appars to be a duplicate %s " %self.dstfilename)
raise DuplicateImage('image %s already exists - duplicate'%self.dstfilename)
else:".. this is not a duplicate %s " %self.dstfilename)
return self.dstfilename
def move(self, base, deleteOriginal=False, link=False):
if self.getUniquePath(base) is None:"unknown destination for image %s " %self.srcfilepath)
except DuplicateImage, e:
if deleteOriginal:
dstdir = os.path.dirname(self.dstfilename)
if not (os.path.exists(dstdir) and os.path.isdir(dstdir)):"creating dir %s" %dstdir)
if link:"linking %s ==> %s " %(self.srcfilepath, self.dstfilename))
if os.path.islink(self.srcfilepath):
self.srcfilepath = os.readlink(self.srcfilepath)
os.symlink(self.srcfilepath, self.dstfilename)
if deleteOriginal:"moving %s ==> %s " %(self.srcfilepath, self.dstfilename))
shutil.move(self.srcfilepath, self.dstfilename)
else:"copying %s ==> %s " %(self.srcfilepath, self.dstfilename))
shutil.copy2(self.srcfilepath, self.dstfilename)
def getOptions():
''' creates the options and return a parser object
parser = OptionParser(usage="%prog [options] src dest", version="%prog 0.1")
parser.add_option("-v", "--verbose",
action="store_true", dest="verbose",
help="make lots of noise, mainly used for debugging")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose",
help="unless errors don't output anything.")
parser.add_option("-r", "--recursive",
action="store_true", dest="recursive",
help="operate recursively.")
parser.add_option("-R", "--rotate",
action="store_true", dest="rotate",
help="rotate images according to their EXIF rotation tag.")
parser.add_option("-k", "--dry-link",
action="store_true", dest="link",
help="creates a tree of symbolic links under destination with date time\
hierarchy preserving the original images states.")
parser.add_option("-s", "--symlink",
action="store_true", dest="symlink",
help="follow symbolic linked directories.")
parser.add_option("-m", "--move",
action="store_true", dest="move",
help="delete original file from SOURCE, by default it makes a copy of the file.")
parser.add_option("-d", "--depth",
default="0", type='int', dest="depth",
help="unlimited [default: %default].")
parser.add_option("-g", "--log",
default="sys.stderr", dest="log",
help="log all actions [default: %default].")
# parser.add_option("-i", "--ignore",
# action="store_true", dest="ignore", default=True,
# help="ignore photos with missing EXIF header.")
parser.add_option("-p", "--process-no-exif",
default="undated", dest="noExifPath",
help="copy/moves images with no EXIF data to [default: %default].")
return parser
def treewalk(top, recursive=False, followlinks=False, depth=0):
''' generator similar to os.walk(), but with limited subdirectory depth
global logger
if not maxdepth is None:
if depth > maxdepth:
for f in os.listdir(top):
file_path = os.path.join(top, f)
if os.path.isdir(file_path):
# its a dir recurse into it
if recursive:
islink = os.path.islink(file_path)
if (islink and followlinks) or not islink:
for dirpath, filename in treewalk(file_path, recursive, followlinks, depth+1):
yield dirpath, filename
elif os.path.isfile(file_path):
yield top, f
# Unknown file type, print a message'Skipping %s' % pathname)
def makeLogger(log=None):
''' returns a logger class, call first when used in shell, otherwise
all objects complain of missing logger
logger = logging.getLogger('')
if log == None:
# "sys.stderr"
console = logging.StreamHandler()
fileHandler = logging.FileHandler(log)
return logger
if __name__=='__main__':
''' main
parser = getOptions()
(options, args) = parser.parse_args()
global logger
logger = makeLogger(options.log)
if len(args) == 1:
src = dst = args[0]
# this needs to build the tree under temp and move it to dest when done
elif len(args) == 2:
src = args[0]
dst = args[1]
logger.error("invalid number of arguments")
if options.verbose:
logger.debug('verbose mode on')
logger.debug('verbose mode off')
if options.symlink:
logger.debug("following symlinks")
logger.debug("ignoring symlinks")
logger.debug("creating a tree of links to photos")
options.move = False
# check for non implemented options
if options.rotate:
raise NotImplementedError('rotating images is not supported yet')
maxdepth = options.depth = int(options.depth)
logger.debug("depth is: "+ str(options.depth))
if maxdepth == 0:
maxdepth = None
for dirpath, filename in treewalk(src, options.recursive, options.symlink, options.depth):
fullfilepath = os.path.join(dirpath, filename)
logger.debug("processing %s" %fullfilepath)
if not isPhoto(fullfilepath): # ignore non image extensions
logger.debug("ignoring non image file %s" %fullfilepath)
imagefile = ImageFile(fullfilepath)
except ImageException, e:
imagefile.move(dst, options.move,