From 00fe21694b907d675bf0266c9c118daf74a0ee89 Mon Sep 17 00:00:00 2001 From: Meitham Jamaa Date: Fri, 25 May 2012 17:15:20 +0100 Subject: [PATCH] fixed README --- albumiser.py | 630 ++++++++++++++++++++++++------------------------ sqlalbumiser.py | 354 --------------------------- 2 files changed, 318 insertions(+), 666 deletions(-) delete mode 100644 sqlalbumiser.py diff --git a/albumiser.py b/albumiser.py index a7b5a1c..4771a64 100644 --- a/albumiser.py +++ b/albumiser.py @@ -1,348 +1,354 @@ -#!/usr/bin/env python -#Meitham 01 July 2011 -__doc__ = """Organise your photos. - -NAME - albumiser.py - flexable images organiser based on EXIF details. - -SYNOPSIS - [python] albumiser.py [OPTION]... [SOURCE] [DESTINATION] - -EXAMPLE - [python] albumiser.py -r -v - -DESCRIPTION - A flexiable python script that can operate on a tree of images and copy or - move the files into directories based on the image EXIF details. It can detect - duplicates using either MD5 or SHA, it can based the duplication on the entire - image or just the thumbnails. It has the ability to build or fix EXIF details - from images that has wrong EXIF details, such as it can correct picture taken - date on images. - - The script can build a symlink of date based hierarchical directories. So you - can have your images sorted in date based folders, a photo file that was taken - at YYYY/MM/DD will end up in a folder of YYYY/MM/DD (posix) YYYY\MM\DD (win) - -Options: - --version show program's version number and exit - -h, --help show this help message and exit - -v, --verbose make lots of noise [default] - -q, --quiet unless errors don't output anything - -r, --recursive operate recursively [default] - -s, --symlink follow symbolic linked directories - -m, --move delete original file from SOURCE - -d DEPTH, --depth=DEPTH - unlimited [default: 0] - -g LOG, --log=LOG log all actions [default: sys.stderr] - -i, --ignore ignore photos with missing EXIF header [default] - -p NOEXIFPATH, --process-no-exif=NOEXIFPATH - copy/moves images with no EXIF data to [default: - undated] -""" -from optparse import OptionParser -from datetime import datetime, timedelta -import Image -import sys -import os -import shutil -import string +import argparse import hashlib import logging, logging.handlers +import os +import shutil +import sqlite3 +import sys +import tempfile + +from datetime import datetime, timedelta +from optparse import OptionParser + try: import pyexiv2 except ImportError: - print "Missing dependency: please install pyexiv2 in order to use this script" - -__version__ = "0.101" + print('Missing dependency: please install pyexiv2 in order to use this script') + sys.exit(1) +__version__ = '0.05' IMAGE_EXT = ['.jpg','jpeg','.png'] -class ImageException(Exception): - pass +def get_options(): + """ Creates the options and return a parser object + """ + #parser = OptionParser(usage='sqlalbumiser.py [options] src dest', version='%s'%__version__) + parser = argparse.ArgumentParser(prog='albumiser', + version=__version__, + description='Organise your photos the way they should be', + conflict_handler='resolve') + + # either be too verbose or quite, you cannot be both + verbose = parser.add_mutually_exclusive_group() + + verbose.add_argument('-v', '--verbose', + action='store_true', dest='verbose', + help='make loads of noise, useful for debugging!') + + verbose.add_argument('-q', '--quiet', + action='store_false', dest='verbose', + help="unless errors don't output anything.") -class InvalidImageFile(ImageException): - pass + parser.add_argument('-r', '--recursive', + action='store_true', dest='recursive', + help='operate recursively.') -class InvalidDateTag(ImageException): - pass + parser.add_argument('--rotate', + action='store_true', dest='rotate', + help='rotate images according to their EXIF rotation tag.') -class MissingDateTag(ImageException): - pass + parser.add_argument('--dry-link', + action='store_true', dest='link', + help='creates a tree of symbolic links under destination with date time\ + hierarchy preserving the original images states.') -class MissingImageFile(ImageException): - pass + parser.add_argument('-s', '--follow_links', + action='store_true', dest='follow_links', + help='follow symbolic linked directories.') -class DuplicateImage(ImageException): - pass - -class NoThumbnailFound(ImageException): - pass - -def getOptions(): - ''' creates the options and return a parser object - ''' - parser = OptionParser(usage="%prog [options] src dest", version="%prog 0.1") - parser.add_option("-v", "--verbose", - action="store_true", dest="verbose", - default=True, - help="make lots of noise, mainly used for debugging") - parser.add_option("-q", "--quiet", - action="store_false", dest="verbose", - help="unless errors don't output anything.") - parser.add_option("-r", "--recursive", - action="store_true", dest="recursive", - help="operate recursively.") - parser.add_option("-R", "--rotate", - action="store_true", dest="rotate", - help="rotate images according to their EXIF rotation tag.") - parser.add_option("-k", "--dry-link", - action="store_true", dest="link", - help="creates a tree of symbolic links under destination with date time\ - hierarchy preserving the original images states.") - parser.add_option("-s", "--symlink", - action="store_true", dest="symlink", - help="follow symbolic linked directories.") - parser.add_option("-m", "--move", - action="store_true", dest="move", - help="delete original file from SOURCE, by default it makes a copy of the file.") - parser.add_option("-d", "--depth", - default="0", type='int', dest="depth", - help="unlimited [default: %default].") - parser.add_option("-g", "--log", - default="sys.stderr", dest="log", - help="log all actions [default: %default].") - # parser.add_option("-i", "--ignore", - # action="store_true", dest="ignore", default=True, - # help="ignore photos with missing EXIF header.") - parser.add_option("-p", "--process-no-exif", - default="undated", dest="noExifPath", - help="copy/moves images with no EXIF data to [default: %default].") + parser.add_argument('-m', '--move', + action='store_true', dest='move', + help='delete original file from SOURCE, by default it makes a copy of the file.') + + parser.add_argument('--delete-duplicates', + action='store_true', dest='delete_duplicates', + help='delete duplicate files from SOURCE, by default it ignores them and keep them intact.') + + parser.add_argument('-d', '--depth', + type=int, dest='depth', + help='default is unlimited.') + + parser.add_argument('-g', '--log', + default=None, dest='log', + help='log all actions, default is console.') + parser.add_argument('source', action="store") #, nargs='?', default=os.getcwd()) + parser.add_argument('target', action="store") #, nargs='?', default=os.getcwd()) + # photos with no EXIF dates will either be copied into a special directory + # or will assume a UNIX epoch 1970-01-01, latter is default + parser.add_argument('--ignore-no-exif', + action='store_true', dest='ignore_no_exif', default=False, + help='ignore photos with missing EXIF date, otherwise use UNIX epoch.') return parser -def makeLogger(log=None): - ''' returns a logger class, call first when used in shell, otherwise - all objects complain of missing logger - ''' - logger = logging.getLogger('') - if log == None: - # "sys.stderr" - console = logging.StreamHandler() - logger.addHandler(console) +def get_logger(log=None): + """ returns a logger object + """ + logger = logging.getLogger('albumiser') + if log is None: + # 'sys.stderr' + logger.addHandler(logging.StreamHandler()) else: - fileHandler = logging.FileHandler(log) - logger.addHandler(fileHandler) + logger.addHandler(logging.FileHandler(log)) return logger -def treewalk(top, recursive=False, followlinks=False, depth=0): - ''' generator similar to os.walk(), but with limited subdirectory depth - ''' - global logger - if not maxdepth is None: - if depth > maxdepth: - return - for f in os.listdir(top): - file_path = os.path.join(top, f) - if os.path.isdir(file_path): - # its a dir recurse into it - if recursive: - islink = os.path.islink(file_path) - if (islink and followlinks) or not islink: - for dirpath, filename in treewalk(file_path, recursive, followlinks, depth+1): - yield dirpath, filename - elif os.path.isfile(file_path): - yield top, f +def handle_options(): + parser = get_options() + ns = parser.parse_args() + logger = get_logger(ns.log) + if ns.source == ns.target: + ns.target = tempfile.mkdtemp() + ns.target_is_temp = True + if ns.verbose: + logger.setLevel(logging.DEBUG) + logger.debug('verbose mode on') + else: + logger.debug('verbose mode off') + if ns.follow_links: + logger.debug('following symlinks') + else: + logger.debug('ignoring symlinks') + if ns.link: + logger.debug('creating a tree of links to photos') + ns.move = False + # check for non implemented options + if ns.rotate: + raise NotImplementedError('rotating images is not supported yet') + if ns.depth is None: + max_depth = ns.depth = None + else: + max_depth = ns.depth = int(ns.depth) + + logger.debug('depth is: %r'%ns.depth) + if max_depth == 0: + max_depth = None + + return ns + + +class TreeWalker: + """provides a functionality similar to os.walk but can do + pre defined depth when needed. + """ + def __init__(self, top='/', max_depth=None, *args, **kwargs): + self._top = top + self._max_depth = max_depth + self.logger = kwargs.get('logger', None) + self._depth = 0 + + if self._max_depth is None or self._max_depth>0: + self._recursive = True else: - # Unknown file type, print a message - logger.info('Skipping %s' % pathname) + self._recursive = False + + self._follow_links = kwargs.get('follow_links', False) + + def __repr__(self): + return ''%(self._top, self._max_depth) + + def walk(self, top=None, depth=0): + if not top: + top = self._top + if self._max_depth is not None: + if depth > self._max_depth: + return + for f in os.listdir(top): + file_path = os.path.join(top, f) + if os.path.isdir(file_path): + # its a dir recurse into it + if self._recursive: + islink = os.path.islink(file_path) + if (islink and self._follow_links) or not islink: + for dirpath, filename in self.walk(file_path, depth+1): + yield dirpath, filename + elif os.path.isfile(file_path): + yield top, f + else: + # Unknown file type, print a message + if self.logger: + self.logger.info('Skipping %s' % file_path) + def isPhoto(path): - '''return true if image has a known image extension file - ''' + """return true if image has a known image extension file + """ try: _, ext = os.path.splitext(path) - return string.lower(ext) in IMAGE_EXT + return ext.lower() in IMAGE_EXT except: return False -def isDuplicate(file1, file2, func=None): - ''' checks if two files have exactly same content - ''' - if func == None: - func = thumbDigest - try: - # lets try thumbnail - return func(file1) == func(file2) - except NoThumbnailFound: - # resort to full file digest - func = sha256HexDigest - return func(file1) == func(file2) - -def sha256HexDigest(filename): - '''returns an sha256 hex digest of an input - ''' - buf = open(filename).read() - h = hashlib.sha256() - h.update(buf) - return h.hexdigest() - -def thumbDigest(filename): - '''returns a digest of the thumb image - ''' - metadata = pyexiv2.ImageMetadata(filename) - metadata.read() - if not metadata.previews: - raise NoThumbnailFound(filename) +def sha256HexDigest(indata): + """returns an sha256 hex digest of an input + """ h = hashlib.sha256() - h.update(metadata.previews[-1].data) + h.update(indata) return h.hexdigest() -class ImageFile: - ''' an image file object handler - ''' - def __init__(self, fullfilepath): - ''' constructor - ''' - global logger # use global logger / should really argument this - self.logger = logger - - # file existance - if not os.path.exists(fullfilepath): # file missing - raise MissingImageFile('file not found %s' %fullfilepath) - - # file extension - self.basename, self.ext = os.path.splitext(fullfilepath) - self.ext = self.ext.lower() # i hate uppercased extensions - if not self.ext in IMAGE_EXT: - raise InvalidImageFile('invalid image file %s' %fullfilepath) - - self.metadata = pyexiv2.ImageMetadata(fullfilepath) - try: - self.metadata.read() - except IOError: - raise InvalidImageFile('invalid image file %s' %fullfilepath) - - if not self.metadata: - raise MissingImageFile('missing exif info %s' %fullfilepath) - - self.srcfilepath = fullfilepath + +def main(): + ns = handle_options() + conn = sqlite3.connect('/tmp/albumise.sqlite') + conn.text_factory = str + c = conn.cursor() + c.execute("drop table if exists images") + #TODO add a new column for the year/month/day/hour/minute/second + c.execute(""" + create table images ( + hash text unique, + path text unique, + created text, + destination text, + status text, + error text) + """) + #TODO add a new index for the new column + c.execute("create index idx_hash on images(hash)") + c.execute("create index idx_path on images(path)") + c.execute("create index idx_status on images(status)") + c.execute("create index idx_created on images(created)") + conn.commit() + undated_counter = 0 + logger = logging.getLogger('albumiser') + logger.debug('about to walk files') + tree_walker = TreeWalker(ns.source, ns.depth, logger=logger, follow_symlinks=ns.follow_links) + for dirpath, filename in tree_walker.walk(): + undated = False + imd = None try: - self.imagedate = self.metadata['Exif.Image.DateTime'].value - except KeyError: - self.logger.debug("Exif.Image.DateTime key is missing from exif") + path = os.path.join(dirpath, filename) + _,ext = os.path.splitext(path) + ext = ext.lower() + logger.debug('processing %s' %path) + + if not isPhoto(path): + # ignore non image extensions + logger.warning('ignoring non image file %s' %path) + c.execute("insert or ignore into images values(?,?,?,?,?,?)", + (path, path, None, None, 'IGNORE', 'non image file')) + continue + try: - # to make my htc android happy - self.imagedate = self.metadata['Exif.Photo.DateTimeOriginal'].value + metadata = pyexiv2.ImageMetadata(path) + metadata.read() + except(IOError, UnicodeDecodeError) as e: + logger.error('unable to read image metadata %s' %path) + # use file digest + digest = sha256HexDigest(path) + c.execute("insert or ignore into images values(?,?,?,?,?,?)", + (digest, path, None, None, 'FAILED', str(e))) + conn.commit() + continue + + # handle digest + try: + # reading thumbnails + logger.debug('trying thumbnail digest') + if len(metadata.previews) > 0: + largest = metadata.previews[-1] + data = largest.data + else: # no thumbnails available + data = metadata.buffer + except Exception as e: + # when metadata is not available use the entire file content hash + logger.exception(e) + logger.debug('unable to use metadata - using file content digest') + with open(path) as f: + data = f.read() + finally: + digest = sha256HexDigest(data) + + # check if we have had this file before + c.execute("""select * from images where hash=?""",(digest,)) + if len(c.fetchall())>0: + logger.warning('file %s already processed'%path) + if ns.delete_duplicates: + logger.debug('deleting duplicate file %s'%path) + c.execute("insert or ignore into images values(?,?,?,?,?,?)", + (digest, path, None, None, 'DUPLICATE', None)) + os.remove(path) + continue # we have this object already + + #import pdb + #if filename == '2012_04_09-15_15_18.2.jpg': + # pdb.set_trace() + + # fix images fucked up by fspot + try: + software = metadata['Exif.Image.Software'].value + if software.startswith('f-spot'): + # avoid using creation date time and rely on + # date time digitized tag + imd = metadata['Exif.Photo.DateTimeDigitized'].value except KeyError: - # here we handle images that has no date, this will come later - raise InvalidDateTag('Exif.Photo.DateTimeOriginal key is missing from exif') - self.datedFileName = self.imagedate.strftime('%Y_%m_%d-%H_%M_%S') - - def getUniquePath(self, base): - ''' gets a proper name and path with no duplications - ''' - if self.imagedate is None: - # i should handle images with no date here - self.logger.info("no image date for file %s " %self.srcfilepath) - # i could either return the file creation date - # or just a string. - return None - seq = 0 - imd = self.imagedate - self.dstdir = os.path.join(base, str(imd.year), str(imd.month), str(imd.day)) - while True: - fileName = self.datedFileName - if seq: - fileName+='_%0.0f'%seq - fileName += self.ext - self.dstfilename = os.path.join(self.dstdir, fileName) - if os.path.exists(self.dstfilename): - self.logger.info("image with similar date/time stamp already exists %s " %self.dstfilename) - if isDuplicate(self.srcfilepath, self.dstfilename): - self.logger.info(".. this appars to be a duplicate %s " %self.dstfilename) - raise DuplicateImage('image %s already exists - duplicate'%self.dstfilename) - else: - self.logger.info(".. this is not a duplicate %s " %self.dstfilename) - seq+=1 - continue - else: - return self.dstfilename + logger.info('unknown software') - def move(self, base, deleteOriginal=False, link=False): - try: - if self.getUniquePath(base) is None: - self.logger.info("unknown destination for image %s " %self.srcfilepath) - return - except DuplicateImage, e: - self.logger.info(e) - if deleteOriginal: - os.remove(self.srcfilepath) - return - dstdir = os.path.dirname(self.dstfilename) - if not (os.path.exists(dstdir) and os.path.isdir(dstdir)): - self.logger.info("creating dir %s" %dstdir) - os.makedirs(dstdir) - if link: - self.logger.info("linking %s ==> %s " %(self.srcfilepath, self.dstfilename)) - if os.path.islink(self.srcfilepath): - self.srcfilepath = os.readlink(self.srcfilepath) - os.symlink(self.srcfilepath, self.dstfilename) - return - if deleteOriginal: - self.logger.info("moving %s ==> %s " %(self.srcfilepath, self.dstfilename)) - shutil.move(self.srcfilepath, self.dstfilename) - else: - self.logger.info("copying %s ==> %s " %(self.srcfilepath, self.dstfilename)) - shutil.copy2(self.srcfilepath, self.dstfilename) + if imd is None: + tag = metadata.get('Exif.Image.DateTime', None) + if tag is None: + tag = metadata.get('Exif.Photo.DateTimeOriginal', None) + if tag is not None: + imd = tag.value + + if imd is None or not isinstance(imd, datetime): + logger.warning('Exif Date Tags are missing - using UNIX epoch') + undated = True + undated_counter+=1 + imd = datetime(1970,1,1) + + yyyy = str(imd.year) + mm = str(imd.month) + dd = str(imd.day) + dstdir = os.path.join(ns.target, + yyyy, + '%04d-%02d'%(imd.year,imd.month), + '%04d-%02d-%02d'%(imd.year,imd.month,imd.day)) + if undated: + dstdir = os.path.join(dstdir, imd.strftime('%Y_%m_%d-%H_%M_%S-') + +str(undated_counter)+ext) + else: + c.execute("select * from images where created=?",(str(imd),)) + images_in_second = len(c.fetchall()) + if images_in_second > 0: + # we have more than one image in this second + dstdir = os.path.join(dstdir, + imd.strftime('%Y_%m_%d-%H_%M_%S') + +'.'+str(images_in_second)+ext) + else: + dstdir = os.path.join(dstdir, + imd.strftime('%Y_%m_%d-%H_%M_%S')+ext) + # check here if the dated image already exists in the database + # select * from images where path like dstdir + # use the count of the query result + c.execute("insert or ignore into images values(?,?,?,?,?,?)", + (digest, path, str(imd), dstdir, 'READY', None)) + conn.commit() + logger.info('%s added to DB'%path) + except Exception as e: + logger.debug('ATTENTION: unexpected error') + logger.exception(e) + continue + conn.commit() + #c.close() + # database is now built - process files + c.execute("""select * from images where status='READY'""") + for row in c: + source = row[1] + target = row[3] + target_dir = os.path.dirname(target) + if not os.path.exists(target_dir): + os.makedirs(target_dir) + if ns.move: + shutil.move(source, target) + elif ns.link: + if os.path.islink(source): + source = os.readlink(source) + os.symlink(source, target) + else: + shutil.copy2(source, target) + if __name__=='__main__': - ''' main - ''' - parser = getOptions() - (options, args) = parser.parse_args() - global logger - logger = makeLogger(options.log) - if len(args) == 1: - src = dst = args[0] - # this needs to build the tree under temp and move it to dest when done - elif len(args) == 2: - src = args[0] - dst = args[1] - else: - logger.error("invalid number of arguments") - parser.print_help() - sys.exit() - if options.verbose: - logger.setLevel(logging.DEBUG) - logger.debug('verbose mode on') - else: - logger.debug('verbose mode off') - if options.symlink: - logger.debug("following symlinks") - else: - logger.debug("ignoring symlinks") - if options.link: - logger.debug("creating a tree of links to photos") - options.move = False - # check for non implemented options - if options.rotate: - raise NotImplementedError('rotating images is not supported yet') - maxdepth = options.depth = int(options.depth) - logger.debug("depth is: "+ str(options.depth)) - if maxdepth == 0: - maxdepth = None - for dirpath, filename in treewalk(src, options.recursive, options.symlink, options.depth): - fullfilepath = os.path.join(dirpath, filename) - logger.debug("processing %s" %fullfilepath) - if not isPhoto(fullfilepath): # ignore non image extensions - logger.debug("ignoring non image file %s" %fullfilepath) - continue - try: - imagefile = ImageFile(fullfilepath) - except ImageException, e: - logger.debug(e) - continue - except Exception, e: - logger.debug(e) - continue - imagefile.move(dst, options.move, options.link) + """ main + """ + main() diff --git a/sqlalbumiser.py b/sqlalbumiser.py deleted file mode 100644 index 4771a64..0000000 --- a/sqlalbumiser.py +++ /dev/null @@ -1,354 +0,0 @@ -import argparse -import hashlib -import logging, logging.handlers -import os -import shutil -import sqlite3 -import sys -import tempfile - -from datetime import datetime, timedelta -from optparse import OptionParser - -try: - import pyexiv2 -except ImportError: - print('Missing dependency: please install pyexiv2 in order to use this script') - sys.exit(1) - -__version__ = '0.05' -IMAGE_EXT = ['.jpg','jpeg','.png'] - -def get_options(): - """ Creates the options and return a parser object - """ - #parser = OptionParser(usage='sqlalbumiser.py [options] src dest', version='%s'%__version__) - parser = argparse.ArgumentParser(prog='albumiser', - version=__version__, - description='Organise your photos the way they should be', - conflict_handler='resolve') - - # either be too verbose or quite, you cannot be both - verbose = parser.add_mutually_exclusive_group() - - verbose.add_argument('-v', '--verbose', - action='store_true', dest='verbose', - help='make loads of noise, useful for debugging!') - - verbose.add_argument('-q', '--quiet', - action='store_false', dest='verbose', - help="unless errors don't output anything.") - - parser.add_argument('-r', '--recursive', - action='store_true', dest='recursive', - help='operate recursively.') - - parser.add_argument('--rotate', - action='store_true', dest='rotate', - help='rotate images according to their EXIF rotation tag.') - - parser.add_argument('--dry-link', - action='store_true', dest='link', - help='creates a tree of symbolic links under destination with date time\ - hierarchy preserving the original images states.') - - parser.add_argument('-s', '--follow_links', - action='store_true', dest='follow_links', - help='follow symbolic linked directories.') - - parser.add_argument('-m', '--move', - action='store_true', dest='move', - help='delete original file from SOURCE, by default it makes a copy of the file.') - - parser.add_argument('--delete-duplicates', - action='store_true', dest='delete_duplicates', - help='delete duplicate files from SOURCE, by default it ignores them and keep them intact.') - - parser.add_argument('-d', '--depth', - type=int, dest='depth', - help='default is unlimited.') - - parser.add_argument('-g', '--log', - default=None, dest='log', - help='log all actions, default is console.') - parser.add_argument('source', action="store") #, nargs='?', default=os.getcwd()) - parser.add_argument('target', action="store") #, nargs='?', default=os.getcwd()) - # photos with no EXIF dates will either be copied into a special directory - # or will assume a UNIX epoch 1970-01-01, latter is default - parser.add_argument('--ignore-no-exif', - action='store_true', dest='ignore_no_exif', default=False, - help='ignore photos with missing EXIF date, otherwise use UNIX epoch.') - return parser - -def get_logger(log=None): - """ returns a logger object - """ - logger = logging.getLogger('albumiser') - if log is None: - # 'sys.stderr' - logger.addHandler(logging.StreamHandler()) - else: - logger.addHandler(logging.FileHandler(log)) - return logger - -def handle_options(): - parser = get_options() - ns = parser.parse_args() - logger = get_logger(ns.log) - if ns.source == ns.target: - ns.target = tempfile.mkdtemp() - ns.target_is_temp = True - if ns.verbose: - logger.setLevel(logging.DEBUG) - logger.debug('verbose mode on') - else: - logger.debug('verbose mode off') - if ns.follow_links: - logger.debug('following symlinks') - else: - logger.debug('ignoring symlinks') - if ns.link: - logger.debug('creating a tree of links to photos') - ns.move = False - # check for non implemented options - if ns.rotate: - raise NotImplementedError('rotating images is not supported yet') - if ns.depth is None: - max_depth = ns.depth = None - else: - max_depth = ns.depth = int(ns.depth) - - logger.debug('depth is: %r'%ns.depth) - if max_depth == 0: - max_depth = None - - return ns - - -class TreeWalker: - """provides a functionality similar to os.walk but can do - pre defined depth when needed. - """ - def __init__(self, top='/', max_depth=None, *args, **kwargs): - self._top = top - self._max_depth = max_depth - self.logger = kwargs.get('logger', None) - self._depth = 0 - - if self._max_depth is None or self._max_depth>0: - self._recursive = True - else: - self._recursive = False - - self._follow_links = kwargs.get('follow_links', False) - - def __repr__(self): - return ''%(self._top, self._max_depth) - - def walk(self, top=None, depth=0): - if not top: - top = self._top - if self._max_depth is not None: - if depth > self._max_depth: - return - for f in os.listdir(top): - file_path = os.path.join(top, f) - if os.path.isdir(file_path): - # its a dir recurse into it - if self._recursive: - islink = os.path.islink(file_path) - if (islink and self._follow_links) or not islink: - for dirpath, filename in self.walk(file_path, depth+1): - yield dirpath, filename - elif os.path.isfile(file_path): - yield top, f - else: - # Unknown file type, print a message - if self.logger: - self.logger.info('Skipping %s' % file_path) - - -def isPhoto(path): - """return true if image has a known image extension file - """ - try: - _, ext = os.path.splitext(path) - return ext.lower() in IMAGE_EXT - except: - return False - -def sha256HexDigest(indata): - """returns an sha256 hex digest of an input - """ - h = hashlib.sha256() - h.update(indata) - return h.hexdigest() - - -def main(): - ns = handle_options() - conn = sqlite3.connect('/tmp/albumise.sqlite') - conn.text_factory = str - c = conn.cursor() - c.execute("drop table if exists images") - #TODO add a new column for the year/month/day/hour/minute/second - c.execute(""" - create table images ( - hash text unique, - path text unique, - created text, - destination text, - status text, - error text) - """) - #TODO add a new index for the new column - c.execute("create index idx_hash on images(hash)") - c.execute("create index idx_path on images(path)") - c.execute("create index idx_status on images(status)") - c.execute("create index idx_created on images(created)") - conn.commit() - undated_counter = 0 - logger = logging.getLogger('albumiser') - logger.debug('about to walk files') - tree_walker = TreeWalker(ns.source, ns.depth, logger=logger, follow_symlinks=ns.follow_links) - for dirpath, filename in tree_walker.walk(): - undated = False - imd = None - try: - path = os.path.join(dirpath, filename) - _,ext = os.path.splitext(path) - ext = ext.lower() - logger.debug('processing %s' %path) - - if not isPhoto(path): - # ignore non image extensions - logger.warning('ignoring non image file %s' %path) - c.execute("insert or ignore into images values(?,?,?,?,?,?)", - (path, path, None, None, 'IGNORE', 'non image file')) - continue - - try: - metadata = pyexiv2.ImageMetadata(path) - metadata.read() - except(IOError, UnicodeDecodeError) as e: - logger.error('unable to read image metadata %s' %path) - # use file digest - digest = sha256HexDigest(path) - c.execute("insert or ignore into images values(?,?,?,?,?,?)", - (digest, path, None, None, 'FAILED', str(e))) - conn.commit() - continue - - # handle digest - try: - # reading thumbnails - logger.debug('trying thumbnail digest') - if len(metadata.previews) > 0: - largest = metadata.previews[-1] - data = largest.data - else: # no thumbnails available - data = metadata.buffer - except Exception as e: - # when metadata is not available use the entire file content hash - logger.exception(e) - logger.debug('unable to use metadata - using file content digest') - with open(path) as f: - data = f.read() - finally: - digest = sha256HexDigest(data) - - # check if we have had this file before - c.execute("""select * from images where hash=?""",(digest,)) - if len(c.fetchall())>0: - logger.warning('file %s already processed'%path) - if ns.delete_duplicates: - logger.debug('deleting duplicate file %s'%path) - c.execute("insert or ignore into images values(?,?,?,?,?,?)", - (digest, path, None, None, 'DUPLICATE', None)) - os.remove(path) - continue # we have this object already - - #import pdb - #if filename == '2012_04_09-15_15_18.2.jpg': - # pdb.set_trace() - - # fix images fucked up by fspot - try: - software = metadata['Exif.Image.Software'].value - if software.startswith('f-spot'): - # avoid using creation date time and rely on - # date time digitized tag - imd = metadata['Exif.Photo.DateTimeDigitized'].value - except KeyError: - logger.info('unknown software') - - if imd is None: - tag = metadata.get('Exif.Image.DateTime', None) - if tag is None: - tag = metadata.get('Exif.Photo.DateTimeOriginal', None) - if tag is not None: - imd = tag.value - - if imd is None or not isinstance(imd, datetime): - logger.warning('Exif Date Tags are missing - using UNIX epoch') - undated = True - undated_counter+=1 - imd = datetime(1970,1,1) - - yyyy = str(imd.year) - mm = str(imd.month) - dd = str(imd.day) - - dstdir = os.path.join(ns.target, - yyyy, - '%04d-%02d'%(imd.year,imd.month), - '%04d-%02d-%02d'%(imd.year,imd.month,imd.day)) - if undated: - dstdir = os.path.join(dstdir, imd.strftime('%Y_%m_%d-%H_%M_%S-') - +str(undated_counter)+ext) - else: - c.execute("select * from images where created=?",(str(imd),)) - images_in_second = len(c.fetchall()) - if images_in_second > 0: - # we have more than one image in this second - dstdir = os.path.join(dstdir, - imd.strftime('%Y_%m_%d-%H_%M_%S') - +'.'+str(images_in_second)+ext) - else: - dstdir = os.path.join(dstdir, - imd.strftime('%Y_%m_%d-%H_%M_%S')+ext) - # check here if the dated image already exists in the database - # select * from images where path like dstdir - # use the count of the query result - c.execute("insert or ignore into images values(?,?,?,?,?,?)", - (digest, path, str(imd), dstdir, 'READY', None)) - conn.commit() - logger.info('%s added to DB'%path) - except Exception as e: - logger.debug('ATTENTION: unexpected error') - logger.exception(e) - continue - conn.commit() - #c.close() - - # database is now built - process files - c.execute("""select * from images where status='READY'""") - for row in c: - source = row[1] - target = row[3] - target_dir = os.path.dirname(target) - if not os.path.exists(target_dir): - os.makedirs(target_dir) - if ns.move: - shutil.move(source, target) - elif ns.link: - if os.path.islink(source): - source = os.readlink(source) - os.symlink(source, target) - else: - shutil.copy2(source, target) - -if __name__=='__main__': - """ main - """ - main()