diff --git a/README b/README index bcd962d..24ec3b4 100644 --- a/README +++ b/README @@ -6,7 +6,7 @@ Features: - collects some additional data about each photo - locally web browsable content store - suitable for running via cron long term to keep collection current - + - NEW! support for video This is an early but highly functional Flickr backup utility. It works fine for getting recently changed images and a small slice of the related @@ -14,7 +14,6 @@ photo data. My intent is to expand this so that all related data is collected. Not yet but in the works: - - support for video - better html templates - store all photo data Flickr can provide - sets, collections, galleries @@ -67,6 +66,6 @@ PicBackFlick is licensed under the AGPL instead of GPL because it's plausibly us context. -Reed Wade , 2011-04-03 +Reed Wade , 2011-04-16 diff --git a/photos.html b/photos.html index 29728fd..50f3fe1 100644 --- a/photos.html +++ b/photos.html @@ -67,6 +67,10 @@ for (i=0; i < page_size; i++) { p = image_list[i +offset] if (picbackflick_images[p]) { + v = '' + if (picbackflick_images[p].video_orig_path) { + v = '[MOVIE] ' + } $('#photo_spots').append( '
'+ '' + @@ -78,6 +82,7 @@ '[M] '+ '[O] '+ '[P] '+ + v+ ''+ '
') } diff --git a/picbackflick.py b/picbackflick.py index bceb1f7..5ff8778 100755 --- a/picbackflick.py +++ b/picbackflick.py @@ -40,6 +40,7 @@ import os import time import urllib2 +import glob import json from optparse import OptionParser import ConfigParser @@ -80,6 +81,8 @@ class Photo: print p.get_image_url('s') """ + BUF_SIZE = 1024*1024 + def __init__(self, pbf, id): self.pbf = pbf self.id = id @@ -146,40 +149,81 @@ def save(self): self.pbf.info("skipping "+f) continue if not os.path.exists(os.path.dirname(f)): - os.makedirs(os.path.dirname(f)) ## TODO: deal with failures in dir creation + os.makedirs(os.path.dirname(f)) self.pbf.info("writing "+f) img = urllib2.urlopen(self.get_image_url(size=size)) out = open(f,'wb') - out.write(img.read()) ## TODO: need to look at chunking this up instead of single buffer + while True: + buf = img.read(self.BUF_SIZE) + if len(buf) == 0: + break + out.write(buf) out.close() img.close() + + if self.vals['media'] == 'video': + # example: + # http://www.flickr.com/photos/reedwade/5597186999/play/orig/e45022b02e/ + # this was taken from a single example and then looking at the output of a call to flickr.photos.getSizes() + # + # I don't find any documentation from Flickr saying this is or isn't the correct scheme for fetching video originals so + # I hope this works for the general case. It seems plausible + # + url = "http://www.flickr.com/photos/%s/%s/play/orig/%s/" % (self.pbf.options.flickr_username, self.id, self.vals['originalsecret']) - if self.vals['media'] == 'video': - # example: - # http://www.flickr.com/photos/reedwade/5597186999/play/orig/e45022b02e/ - # this was taken from a single example and then looking at the output of a call to flickr.photos.getSizes() - url = "http://www.flickr.com/photos/%s/%s/play/orig/%s/" % (self.pbf.options.flickr_username, self.id, self.vals['originalsecret']) - self.vals['video_orig_path'] = os.path.join('video',self.id[-2:], self.id) - f = os.path.join(self.pbf.options.photos_path,self.vals['video_orig_path']) - - # ok, now we run into a problem. We don't know the extension for the video file. It could be one of several things. - # We have to fetch the file and check to content-disposition header to learn. - # But, maybe we already have the video file and don't want to re-fetch it. So, we look for files with the ID prefix. - - todo -- check for pre-existing video file and skip if found - - todo -- read url, look at header to learn file ext, set that and open the output for writing and then spin - ext = BLAH + self.vals['video_orig_path'] = os.path.join('video',self.id[-2:], self.id) # 'video/89/123456789' + + f = os.path.join(self.pbf.options.photos_path,self.vals['video_orig_path']) + + # ok, now we run into a problem. We don't know the extension for the video file. It could be one of several things. + # We have to fetch the file and check the content-disposition header to learn it. + # But, maybe we already have the video file and don't want to re-fetch it. So, we look for video files with the ID prefix. + + # check for pre-existing video file (any extension) and skip if found + # + # It's possible but unlikely they've replaced it with a new video file of a different extension. + # In that case we lose. + + found = glob.glob(f+'.*') + if len(found): + self.pbf.info("skipping "+found[0]) + else: + + if not os.path.exists(os.path.dirname(f)): + os.makedirs(os.path.dirname(f)) + + # now fetch the video file + # read url, look at header to learn file ext, set that and open the output for writing and then spin + + img = urllib2.urlopen(url) + + try: + ext = img.info().getheader('content-disposition').split('.')[-1] + except: + self.pbf.info("failed to determine video file extension, using 'video' instead") + ext = 'video' + self.vals['video_orig_path'] += '.'+ext f += '.'+ext - + + self.pbf.info("writing "+f) + out = open(f,'wb') + + while True: + buf = img.read(self.BUF_SIZE) + if len(buf) == 0: + break + out.write(buf) + + out.close() + img.close() ## meta data f = os.path.join(self.pbf.options.photos_path,'info',self.id[-2:],self.id+".js") self.pbf.info("writing "+f) if not os.path.exists(os.path.dirname(f)): - os.makedirs(os.path.dirname(f)) ## TODO: deal with failures in dir creation + os.makedirs(os.path.dirname(f)) out = open(f,'wb') # we use dateuploaded as the key along with ID because we want to sort on this later # it turns out Flickr photo IDs aren't strictly sequential @@ -237,7 +281,7 @@ def get_last_updated_timestamp(self): def set_last_updated_timestamp(self): if not os.path.exists(os.path.dirname(self.options.last_updated_filename)): - os.makedirs(os.path.dirname(self.options.last_updated_filename)) ## TODO: deal with failures in dir creation + os.makedirs(os.path.dirname(self.options.last_updated_filename)) out = open(self.options.last_updated_filename, "wb") out.write("%d\n" % self.start_time) out.close() @@ -309,8 +353,8 @@ def handle_command_line_options(self): help="rebuild the local photo javascript db file") ## TODO: implement single photo fetch by ID - ##parser.add_option("-s", "--single", dest="single_photo", metavar="PHOTO-ID", - ## help="update a single photo entry") + ##parser.add_option("-s", "--single", dest="single_photo", metavar="PHOTO-OR-VIDEO-ID", + ## help="update a single entry") ## TODO: implement public only feature ##parser.add_option("--public-photos-only", dest="public_photos_only", action="store_true", default=False, @@ -405,7 +449,7 @@ def _get_recent_photos(self): photo = Photo(self, id=p.attrib['id']) photos_seen += 1 - self.info("%d / %d : %s - %s" % (photos_seen, photo_count, photo.vals['title'], photo.vals['description'])) + self.info("%d / %d : %s : %s - %s" % (photos_seen, photo_count, p.attrib['id'], photo.vals['title'], photo.vals['description'])) photo.save()