sorted video

reedwade · Apr 16, 2011 · 91d2432 · 91d2432
1 parent bd82ce0
commit 91d2432
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 27 deletions.
diff --git a/README b/README
@@ -6,15 +6,14 @@ Features:
  - collects some additional data about each photo
  - locally web browsable content store
  - suitable for running via cron long term to keep collection current
-
+ - NEW! support for video
 
 This is an early but highly functional Flickr backup utility. It works
 fine for getting recently changed images and a small slice of the related
 photo data. My intent is to expand this so that all related data is collected.
 
 
 Not yet but in the works:
- - support for video
  - better html templates
  - store all photo data Flickr can provide
  - sets, collections, galleries
@@ -67,6 +66,6 @@ PicBackFlick is licensed under the AGPL instead of GPL because it's plausibly us
 context.
 
 
-Reed Wade <reedwade@gmail.com>, 2011-04-03
+Reed Wade <reedwade@gmail.com>, 2011-04-16
 
 
diff --git a/photos.html b/photos.html
@@ -67,6 +67,10 @@
     for (i=0; i < page_size; i++) {
         p = image_list[i +offset]
         if (picbackflick_images[p]) {
+            v = ''
+            if (picbackflick_images[p].video_orig_path) {
+                v = '<a href="'+picbackflick_images[p].video_orig_path+'" target="_blank">[MOVIE]</a> '
+            }
             $('#photo_spots').append(
                 '<div class="photo_entry">'+
                 '<img src="'+picbackflick_images[p].image_s+'" width="75" height="75" />' +
@@ -78,6 +82,7 @@
                 '<a href="'+picbackflick_images[p].image_+'" target="_blank">[M]</a> '+
                 '<a href="'+picbackflick_images[p].image_o+'" target="_blank">[O]</a> '+
                 '<a href="http://flickr.com/photos/'+flickr_username+'/'+picbackflick_images[p].id+'" target="_blank">[P]</a> '+
+                v+
                 '</span>'+
                 '</div>')
         }

diff --git a/picbackflick.py b/picbackflick.py
@@ -40,6 +40,7 @@
 import os
 import time
 import urllib2
+import glob
 import json
 from optparse import OptionParser
 import ConfigParser
@@ -80,6 +81,8 @@ class Photo:
         print p.get_image_url('s')
     """
 
+    BUF_SIZE = 1024*1024
+
     def __init__(self, pbf, id):
         self.pbf = pbf
         self.id = id
@@ -146,40 +149,81 @@ def save(self):
                 self.pbf.info("skipping "+f)
                 continue
             if not os.path.exists(os.path.dirname(f)):
-                os.makedirs(os.path.dirname(f)) ## TODO: deal with failures in dir creation
+                os.makedirs(os.path.dirname(f))
 
             self.pbf.info("writing "+f)
             img = urllib2.urlopen(self.get_image_url(size=size))
             out = open(f,'wb')
-            out.write(img.read()) ## TODO: need to look at chunking this up instead of single buffer
+            while True:
+                buf = img.read(self.BUF_SIZE)
+                if len(buf) == 0:
+                    break
+                out.write(buf)
             out.close()
             img.close()
+
+        if self.vals['media'] == 'video':
+            # example:
+            # http://www.flickr.com/photos/reedwade/5597186999/play/orig/e45022b02e/
+            # this was taken from a single example and then looking at the output of a call to flickr.photos.getSizes()
+            #
+            # I don't find any documentation from Flickr saying this is or isn't the correct scheme for fetching video originals so
+            # I hope this works for the general case. It seems plausible
+            #
+            url = "http://www.flickr.com/photos/%s/%s/play/orig/%s/" % (self.pbf.options.flickr_username, self.id, self.vals['originalsecret'])
 
-            if self.vals['media'] == 'video':
-                # example:
-                # http://www.flickr.com/photos/reedwade/5597186999/play/orig/e45022b02e/
-                # this was taken from a single example and then looking at the output of a call to flickr.photos.getSizes()
-                url = "http://www.flickr.com/photos/%s/%s/play/orig/%s/" % (self.pbf.options.flickr_username, self.id, self.vals['originalsecret'])
-                self.vals['video_orig_path'] = os.path.join('video',self.id[-2:], self.id)
-                f = os.path.join(self.pbf.options.photos_path,self.vals['video_orig_path'])
-
-                # ok, now we run into a problem. We don't know the extension for the video file. It could be one of several things.
-                # We have to fetch the file and check to content-disposition header to learn.
-                # But, maybe we already have the video file and don't want to re-fetch it. So, we look for files with the ID prefix.
-
-                todo -- check for pre-existing video file and skip if found
-
-                todo -- read url, look at header to learn file ext, set that and open the output for writing and then spin
-                ext = BLAH
+            self.vals['video_orig_path'] = os.path.join('video',self.id[-2:], self.id) # 'video/89/123456789'
+
+            f = os.path.join(self.pbf.options.photos_path,self.vals['video_orig_path'])
+
+            # ok, now we run into a problem. We don't know the extension for the video file. It could be one of several things.
+            # We have to fetch the file and check the content-disposition header to learn it.
+            # But, maybe we already have the video file and don't want to re-fetch it. So, we look for video files with the ID prefix.
+
+            # check for pre-existing video file (any extension) and skip if found
+            #
+            # It's possible but unlikely they've replaced it with a new video file of a different extension.
+            # In that case we lose.
+
+            found = glob.glob(f+'.*')
+            if len(found):
+                self.pbf.info("skipping "+found[0])
+            else:
+
+                if not os.path.exists(os.path.dirname(f)):
+                    os.makedirs(os.path.dirname(f))
+
+                # now fetch the video file
+                # read url, look at header to learn file ext, set that and open the output for writing and then spin
+
+                img = urllib2.urlopen(url)
+
+                try:
+                    ext = img.info().getheader('content-disposition').split('.')[-1]
+                except:
+                    self.pbf.info("failed to determine video file extension, using 'video' instead")
+                    ext = 'video'
+
                 self.vals['video_orig_path'] += '.'+ext
                 f += '.'+ext
-
+
+                self.pbf.info("writing "+f)
+                out = open(f,'wb')
+
+                while True:
+                    buf = img.read(self.BUF_SIZE)
+                    if len(buf) == 0:
+                        break
+                    out.write(buf)
+
+                out.close()
+                img.close()          
 
         ## meta data
         f = os.path.join(self.pbf.options.photos_path,'info',self.id[-2:],self.id+".js")
         self.pbf.info("writing "+f)
         if not os.path.exists(os.path.dirname(f)):
-            os.makedirs(os.path.dirname(f)) ## TODO: deal with failures in dir creation
+            os.makedirs(os.path.dirname(f))
         out = open(f,'wb')
         # we use dateuploaded as the key along with ID because we want to sort on this later
         # it turns out Flickr photo IDs aren't strictly sequential
@@ -237,7 +281,7 @@ def get_last_updated_timestamp(self):
 
     def set_last_updated_timestamp(self):
         if not os.path.exists(os.path.dirname(self.options.last_updated_filename)):
-            os.makedirs(os.path.dirname(self.options.last_updated_filename)) ## TODO: deal with failures in dir creation
+            os.makedirs(os.path.dirname(self.options.last_updated_filename))
         out = open(self.options.last_updated_filename, "wb")
         out.write("%d\n" % self.start_time)
         out.close()
@@ -309,8 +353,8 @@ def handle_command_line_options(self):
                           help="rebuild the local photo javascript db file")
 
         ## TODO: implement single photo fetch by ID
-        ##parser.add_option("-s", "--single", dest="single_photo", metavar="PHOTO-ID",
-        ##                  help="update a single photo entry")
+        ##parser.add_option("-s", "--single", dest="single_photo", metavar="PHOTO-OR-VIDEO-ID",
+        ##                  help="update a single entry")
 
         ## TODO: implement public only feature
         ##parser.add_option("--public-photos-only", dest="public_photos_only", action="store_true", default=False,
@@ -405,7 +449,7 @@ def _get_recent_photos(self):
                 photo = Photo(self, id=p.attrib['id'])
 
                 photos_seen += 1
-                self.info("%d / %d : %s - %s" % (photos_seen, photo_count, photo.vals['title'], photo.vals['description']))
+                self.info("%d / %d : %s : %s - %s" % (photos_seen, photo_count, p.attrib['id'], photo.vals['title'], photo.vals['description']))
 
                 photo.save()