-
Notifications
You must be signed in to change notification settings - Fork 17
/
flickrmirrorer.py
executable file
·808 lines (659 loc) · 31.1 KB
/
flickrmirrorer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
#!/usr/bin/env python
# A small command-line python script that creates a local backup of your
# Flickr data. It mirrors images, titles, description, tags, albums and
# collections.
#
# Available at https://github.com/markdoliner/flickrmirrorer
#
# Licensed as follows (this is the 2-clause BSD license, aka
# "Simplified BSD License" or "FreeBSD License"):
#
# Copyright (c)
# Ciprian Radu, 2016
# Johan Walles, 2016
# Mark Doliner, 2012-2023
# Mattias Holmlund, 2013
# Steve Cassidy, 2016
# Victor Engmark, 2016
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import argparse
import datetime
import dateutil.parser
import errno
import glob
import math
import os
import requests
import shutil
import signal
import sys
import time
import urllib.parse
import webbrowser
try:
# We try importing simplejson first because it's faster than json
# in python 2.7 and lower
import simplejson as json
except ImportError:
import json
try:
import flickrapi
except ImportError:
sys.stderr.write('Error importing flickrapi python library. Is it installed?\n')
sys.exit(1)
API_KEY = '9c5c431017e712bde232a2f142703bb2'
API_SECRET = '7c024f6e7a36fc03'
PLEASE_GRANT_AUTHORIZATION_MSG = """
Please authorize Flickr Mirrorer to read your photos, titles, tags, etc.
1. Visit %s
2. Click "OK, I'LL AUTHORIZE IT"
3. Copy and paste the code here and press 'return'
"""
NUM_PHOTOS_PER_BATCH = 500
class VideoDownloadError(Exception):
def __str__(self):
return '%s' % self.args[0]
def _ensure_dir_exists(path):
"""Create the directory 'path' if it does not exist.
Calls sys.exit(1) if any directory could not be created."""
try:
os.makedirs(path)
except OSError as ex:
if ex.errno != errno.EEXIST:
sys.stderr.write('Error creating destination directory %s: %s\n'
% (path, ex.strerror))
sys.exit(1)
def _ensure_dir_doesnt_exist(path):
"""Remove the directory 'path' and all contents if it exists.
Calls sys.exit(1) if the directory or any contents could not be removed."""
try:
shutil.rmtree(path)
except OSError as ex:
if ex.errno != errno.ENOENT:
sys.stderr.write('Error removing %s: %s\n' % (path, ex.strerror))
sys.exit(1)
def _validate_json_response(rsp):
"""Exits the script with an error if the response is a failure.
Args:
rsp (dict): A parse JSON response from the Flickr API.
"""
if rsp['stat'] != 'ok':
sys.stderr.write('API request failed: Error %(code)s: %(message)s\n' % rsp)
sys.exit(1)
def get_photo_datetime(photo):
"""Return date a photo was taken.
Obtained from:
1. 'datetaken' unless 'datetakenunknown'
2. Parsed from photo title 'YYYYMMDD_HHmmss'
3. 'datetaken' anyway; it's available even if unknown, so we just
go with whatever Flickr made up for us.
Returns:
datetime.datetime
"""
if photo['datetakenunknown'] == "0":
return dateutil.parser.parse(photo['datetaken'])
try:
parsed = datetime.datetime.strptime(photo['title'], '%Y%m%d_%H%M%S')
if parsed.year > 2000 and parsed < datetime.datetime.now():
return parsed
except ValueError:
# Unable to parse photo title as datetime
pass
return dateutil.parser.parse(photo['datetaken'])
class FlickrMirrorer(object):
dest_dir = None
photostream_dir = None
tmp_filename = None
flickr = None
def __init__(self, args):
self.dest_dir = args.destdir
self.verbosity = args.verbosity
self.print_statistics = args.statistics
self.include_views = args.include_views
self.ignore_photos = args.ignore_photos
self.ignore_videos = args.ignore_videos
self.delete_unknown = args.delete_unknown
self.photostream_dir = os.path.join(self.dest_dir, 'photostream')
self.albums_dir = os.path.join(self.dest_dir, 'Albums')
self.collections_dir = os.path.join(self.dest_dir, 'Collections')
self.tmp_filename = os.path.join(self.dest_dir, 'tmp')
# Statistics
self.deleted_photos = 0
self.modified_photos = 0
self.new_photos = 0
self.modified_albums = 0
self.modified_collections = 0
# Register a SIGINT (Ctrl-C) handler
signal.signal(signal.SIGINT, self._sig_int_handler)
# Create flickrapi instance
self.flickr = flickrapi.FlickrAPI(api_key=API_KEY, secret=API_SECRET, format='parsed-json')
def run(self):
try:
self._run_helper()
finally:
self._cleanup()
def _run_helper(self):
# Authenticate
# The user-friendly way to do this is with this command:
# self.flickr.authenticate_via_browser(perms='read')
# However, the nature of this script is such that we don't want
# to rely on people running it somwhere with a web browser
# installed. So use the manual authentication process. A
# reasonable compromise might be to try browser auth first and
# if it fails then fall back to manual auth. Really flickrapi
# should do that for us. Or at least print the URL to the
# console.
if not self.flickr.token_valid(perms='read'):
self.flickr.get_request_token(oauth_callback='oob')
authorize_url = self.flickr.auth_url(perms='read')
webbrowser.open_new_tab(authorize_url)
verifier = input(PLEASE_GRANT_AUTHORIZATION_MSG % authorize_url)
self.flickr.get_access_token(verifier)
if self.ignore_photos and self.ignore_videos:
sys.stderr.write(
'There is nothing to do because photos and videos are ignored. '
'Please choose to mirror at least photos or videos.\n')
return
self._verbose('Photos will be %s' % ('ignored' if self.ignore_photos else 'mirrored'))
self._verbose('Videos will be %s' % ('ignored' if self.ignore_videos else 'mirrored'))
self._verbose('Unknown files in %s will%s be deleted' % (
self.dest_dir, '' if self.delete_unknown else ' not'))
# Create destination directory
_ensure_dir_exists(self.dest_dir)
# Fetch photos
self._download_all_photos()
# Create albums and collections
self._mirror_albums()
self._create_not_in_any_album_dir()
self._mirror_collections()
self._print_statistics()
def _print_statistics(self):
if not self.print_statistics:
return
print('New photos / videos: %d' % self.new_photos)
print('Deleted photos / videos: %d' % self.deleted_photos)
print('Modified photos /videos: %d' % self.modified_photos)
print('Modified albums: %d' % self.modified_albums)
print('Modified collections: %d' % self.modified_collections)
def _download_all_photos(self):
"""Download all our pictures and metadata.
If you have a lot of photos then this function will take a while."""
self._verbose('Mirroring all photos and videos in photostream')
_ensure_dir_exists(self.photostream_dir)
new_files = set()
current_page = 1
metadata_fields = ('description,license,date_upload,date_taken,owner_name,icon_server,original_format,'
'last_update,geo,tags,machine_tags,o_dims,media')
if self.include_views:
metadata_fields += ',views'
download_errors = []
while True:
rsp = self.flickr.people_getPhotos(
user_id='me',
extras=metadata_fields,
per_page=NUM_PHOTOS_PER_BATCH,
page=current_page,
)
_validate_json_response(rsp)
photos = rsp['photos']['photo']
for photo in photos:
if (photo['media'] == 'photo' and not self.ignore_photos) or (
photo['media'] == 'video' and not self.ignore_videos):
try:
new_files |= self._download_photo(photo)
except VideoDownloadError as e:
download_errors.append(e)
if current_page >= rsp['photos']['pages']:
# We've reached the end of the photostream. Stop looping.
break
current_page += 1
# Error out if there were exceptions
if download_errors:
sys.stderr.write(
'The Flickr API does not allow downloading original video files.\n'
'Please save the files listed below to the %s directory.\n'
'Note: You must be logged into your Flickr account in order to download '
'your full resolution videos!\n' % self.photostream_dir)
for error in download_errors:
sys.stderr.write(' %s\n' % error)
sys.exit(1)
# Error out if we didn't fetch any photos
if not new_files:
sys.stderr.write('Error: The Flickr API returned an empty list of photos. '
'Bailing out without deleting any local copies in case this is an anomaly.\n')
sys.exit(1)
# Divide by 2 because we want to ignore the photo metadata files
# for the purposes of our statistics.
self.deleted_photos = self._delete_unknown_files(self.photostream_dir, new_files, 'file') / 2
def _download_photo(self, photo):
"""Fetch and save a media item (photo or video) and the metadata
associated with it.
Returns a python set containing the filenames for the data.
"""
url = self._get_photo_url(photo)
photo_basename = self._get_photo_basename(photo)
photo_filename = os.path.join(self.photostream_dir, photo_basename)
metadata_basename = '%s.metadata' % photo_basename
metadata_filename = '%s.metadata' % photo_filename
# Sanity check
if os.path.isdir(photo_filename) or os.path.islink(photo_filename):
sys.stderr.write('Error: %s exists but is not a file. This is not allowed.\n' % photo_filename)
sys.exit(1)
# Sanity check
if os.path.isdir(metadata_filename) or os.path.islink(metadata_filename):
sys.stderr.write('Error: %s exists but is not a file. This is not allowed.\n' % metadata_filename)
sys.exit(1)
# Download photo if it doesn't exist locally or if the metadata
# file exists and the lastupdate timestamp has changed.
# TODO: Should ideally also set should_download_photo to True if
# not os.path.exists(metadata_filename), but that doesn't work
# correctly for videos because the metadata file won't have been
# created when the video file was created because the video was
# downloaded out of band by the user.
should_download_photo = not os.path.exists(photo_filename)
if not should_download_photo:
# Download photo if lastupdate timestamp has changed.
try:
with open(metadata_filename) as json_file:
metadata = json.load(json_file)
should_download_photo |= metadata['lastupdate'] != photo['lastupdate']
except IOError as ex:
if ex.errno != errno.ENOENT:
sys.stderr.write('Error reading %s: %s\n' % (metadata_filename, ex))
sys.exit(1)
if should_download_photo:
if not os.path.exists(photo_filename):
self.new_photos += 1
else:
self.modified_photos += 1
self._progress('Fetching %s' % photo_basename)
request = requests.get(url, stream=True)
if not request.ok:
if photo['media'] == 'video':
raise VideoDownloadError(
'Manual download required (video may have changed): '
'https://www.flickr.com/video_download.gne?id=%s' % photo['id'])
sys.stderr.write(
'Error: Failed to fetch %s: %s: %s\n'
% (url, request.status_code, request.reason))
sys.exit(1)
# Write to temp file then rename to avoid incomplete files
# in case of failure part-way through.
with open(self.tmp_filename, 'wb') as tmp_file:
# Use 1 MiB chunks.
for chunk in request.iter_content(2**20):
tmp_file.write(chunk)
os.rename(self.tmp_filename, photo_filename)
else:
self._verbose('Skipping %s because we already have it'
% photo_basename)
# Write metadata
if self._write_json_if_different(metadata_filename, photo):
self._progress('Updated metadata for %s' % photo_basename)
else:
self._verbose(
'Skipping metadata for %s because we already have it' %
photo_basename)
photo_datetime = get_photo_datetime(photo)
self._set_timestamp_if_different(photo_datetime, photo_filename)
self._set_timestamp_if_different(photo_datetime, metadata_filename)
return {photo_basename, metadata_basename}
def _mirror_albums(self):
"""Create a directory for each album, and create symlinks to the
files in the photostream."""
self._verbose('Mirroring albums')
album_dirs = set()
# Fetch albums
rsp = self.flickr.photosets_getList()
_validate_json_response(rsp)
if rsp['photosets']:
for album in rsp['photosets']['photoset']:
album_dirs |= self._mirror_album(album)
self._delete_unknown_files(self.albums_dir, album_dirs, 'album')
def _mirror_album(self, album):
album_basename = self._get_album_dirname(album['id'], album['title']['_content'])
album_dir = os.path.join(self.albums_dir, album_basename)
# Fetch list of photos
photos = []
num_pages = int(math.ceil(float(album['photos']) / NUM_PHOTOS_PER_BATCH))
for current_page in range(1, num_pages + 1):
# Fetch photos in this album
rsp = self.flickr.photosets_getPhotos(
photoset_id=album['id'],
extras='original_format,media',
per_page=NUM_PHOTOS_PER_BATCH,
page=current_page,
)
_validate_json_response(rsp)
for photo in rsp['photoset']['photo']:
if (photo['media'] == 'photo' and not self.ignore_photos) or (
photo['media'] == 'video' and not self.ignore_videos):
photos += [photo]
# Include list of photo IDs in metadata, so we can tell if photos
# were added or removed from the album when mirroring in the future.
album['photos'] = [photo['id'] for photo in photos]
if (not self.include_views) and 'count_views' in album:
del album['count_views']
# Add a version number to the album metadata. This gives us an
# easy way to invalidate the local copy and cause the album to
# be recreated, if needed. More specifically this causes the
# albums to be recreated now that I've fixed the bug where
# symlinks to videos were broken.
album['flickrmirrorer_album_metadata_version'] = 2
metadata_filename = os.path.join(album_dir, 'metadata')
# TODO: Should ensure local album directory accurately reflects the
# remote album data even if the metadata hasn't changed (important in
# case the local album data has been tampered with).
if not os.path.exists(album_dir) or self._is_file_different(metadata_filename, album):
# Metadata changed, might be due to updated list of photos.
self._progress('Updating album %s' % album['title']['_content'])
self.modified_albums += 1
# Delete and recreate the album
_ensure_dir_doesnt_exist(album_dir)
_ensure_dir_exists(album_dir)
# Create symlinks for each photo, prefixed with a number so that
# the local alphanumeric sort order matches the order on Flickr.
digits = len(str(len(photos)))
for i, photo in enumerate(photos):
photo_basename = self._get_photo_basename(photo)
photo_fullname = os.path.join(self.photostream_dir, photo_basename)
photo_relname = os.path.relpath(photo_fullname, album_dir)
symlink_basename = '%s_%s' % (str(i+1).zfill(digits), photo_basename)
symlink_filename = os.path.join(album_dir, symlink_basename)
os.symlink(photo_relname, symlink_filename)
# Write metadata
self._write_json_if_different(metadata_filename, album)
else:
self._verbose('Album %s is up-to-date' % album['title']['_content'])
return {album_basename}
def _create_not_in_any_album_dir(self):
"""Create a directory for photos that aren't in any album, and
create symlinks to the files in the photostream."""
self._verbose('Creating local directory for photos not in any album')
album_dir = os.path.join(self.dest_dir, 'Not in any album')
# TODO: Ideally we would inspect the existing directory and
# make sure it's correct, but that's a lot of work. For now
# just recreate the album. Fixing this would also allow us to
# log _progress() messages when the album has changed.
_ensure_dir_doesnt_exist(album_dir)
_ensure_dir_exists(album_dir)
current_page = 1
while True:
# Fetch list of photos that aren't in any album
rsp = self.flickr.photos_getNotInSet(
extras='original_format,media',
per_page=NUM_PHOTOS_PER_BATCH,
page=current_page,
)
_validate_json_response(rsp)
photos = []
for photo in rsp['photos']['photo']:
if (photo['media'] == 'photo' and not self.ignore_photos) or (
photo['media'] == 'video' and not self.ignore_videos):
photos += [photo]
if not photos:
# We've reached the end of the photostream. Stop looping.
break
for photo in photos:
photo_basename = self._get_photo_basename(photo)
photo_fullname = os.path.join(self.photostream_dir, photo_basename)
photo_relname = os.path.relpath(photo_fullname, album_dir)
symlink_filename = os.path.join(album_dir, photo_basename)
os.symlink(photo_relname, symlink_filename)
current_page += 1
def _mirror_collections(self):
"""Create a directory for each collection, and create symlinks to the
albums."""
self._verbose('Mirroring collections')
collection_dirs = set()
# Fetch collections
rsp = self.flickr.collections_getTree()
_validate_json_response(rsp)
if rsp['collections']:
for collection in rsp['collections']['collection']:
collection_dirs |= self._mirror_collection(self.collections_dir, collection)
self._delete_unknown_files(self.collections_dir, collection_dirs, 'collection')
def _mirror_collection(self, parent_dir, collection):
"""
Args:
parent_dir (str): The full path to the directory where this
collection should be written.
collection (dict): The collection metadata dict as returned
by the flickr.collections.getTree API call.
"""
collection_basename = self._get_collection_dirname(collection['id'], collection['title'])
collection_dir = os.path.join(parent_dir, collection_basename)
metadata_filename = os.path.join(collection_dir, 'metadata')
if not os.path.exists(collection_dir) or self._is_file_different(metadata_filename, collection):
# Metadata changed, might be due to updated list of albums.
self._progress('Updating collection %s' % collection['title'])
self.modified_collections += 1
# Delete and recreate the collection
_ensure_dir_doesnt_exist(collection_dir)
_ensure_dir_exists(collection_dir)
# Create symlinks for each album
for album in collection.get('set') or []:
album_basename = self._get_album_dirname(album['id'], album['title'])
album_fullname = os.path.join(self.albums_dir, album_basename)
album_relname = os.path.relpath(album_fullname, collection_dir)
symlink_filename = os.path.join(collection_dir, album_basename)
os.symlink(album_relname, symlink_filename)
# Collections can contain infinitely nested collections.
for child_collection in collection.get('collection') or []:
self._mirror_collection(collection_dir, child_collection)
# Write metadata
self._write_json_if_different(metadata_filename, collection)
return {collection_basename}
def _get_photo_url(self, photo):
mediatype = photo['media']
if mediatype == 'photo':
return 'https://farm%(farm)s.staticflickr.com/%(server)s/%(id)s_%(originalsecret)s_o.%(originalformat)s' \
% photo
if mediatype == 'video':
# URL created according to these instructions:
# http://code.flickr.net/2009/03/02/videos-in-the-flickr-api-part-deux/
owner = self.flickr.token_cache.token.user_nsid
return 'http://www.flickr.com/photos/%s/%s/play/orig/%s/' % (
owner, photo['id'], photo['originalsecret'])
sys.stderr.write('Error: Unsupported media type "%s":\n' % mediatype)
sys.stderr.write(json.dumps(photo, indent=2) + '\n')
sys.exit(1)
def _get_photo_basename(self, photo):
mediatype = photo['media']
if mediatype == 'photo':
return '%s.%s' % (photo['id'], photo['originalformat'])
if mediatype == 'video':
# TODO: If Flickr begins including the file extension in the
# video metadata then this code should be changed to behave
# like the photo case, above.
# The photo metadata for videos does not indicate the file
# extension. If we've already saved the video locally then
# we can get the basename from the local file.
for f in glob.iglob(os.path.join(self.photostream_dir, photo['id']) + '*'):
if not f.endswith('metadata'):
return os.path.basename(f)
# Otherwise, make an HTTP HEAD request to get the response
# headers we'd see when trying to download the photo. This
# URL gets redirected to the CDN with a URL that includes
# the video's original name.
# TODO: Note that this started failing on 2016-06-25. It
# seems to be impossible to download original video files
# via the Flickr API now. The best we can do is show the
# user a download URL and ask them to download. For a little
# more context see:
# https://www.flickr.com/groups/51035612836@N01/discuss/72157671986445591/72157673833636861
# https://groups.yahoo.com/neo/groups/yws-flickr/conversations/topics/9610
# https://groups.yahoo.com/neo/groups/yws-flickr/conversations/topics/9617
head = requests.head(self._get_photo_url(photo), allow_redirects=True)
if head.status_code != 200:
raise VideoDownloadError(
'Manual download required: '
'https://www.flickr.com/video_download.gne?id=%s' % photo['id'])
return os.path.basename(urllib.parse.urlparse(head.url).path)
sys.stderr.write('Error: Unsupported media type "%s":\n' % mediatype)
sys.stderr.write(json.dumps(photo, indent=2) + '\n')
sys.exit(1)
@staticmethod
def _get_album_dirname(id_, title):
safe_title = urllib.parse.quote(title.encode('utf-8'), " ',")
# The ID is included in the name to avoid collisions when there
# are two albums with the same name.
return '%s - %s' % (safe_title, id_)
@staticmethod
def _get_collection_dirname(id_, title):
safe_title = urllib.parse.quote(title.encode('utf-8'), " ',")
# The ID is included in the name to avoid collisions when there
# are two collections with the same name.
return '%s - %s' % (safe_title, id_)
@staticmethod
def _is_file_different(filename, data):
"""Return True if the contents of the file 'filename' differ
from 'data'. Otherwise return False."""
try:
with open(filename) as json_file:
orig_data = json.load(json_file)
return orig_data != data
except IOError as ex:
if ex.errno != errno.ENOENT:
sys.stderr.write('Error reading %s: %s\n' % (filename, ex))
sys.exit(1)
return True
def _set_timestamp_if_different(self, photo_datetime, filename):
"""Set the access and modified times of a file to the specified
datetime.
Args:
photo_datetime (datetime.datetime)
"""
try:
timestamp = time.mktime(photo_datetime.timetuple())
if timestamp != os.path.getmtime(filename):
os.utime(filename, (timestamp, timestamp))
except OverflowError:
self._progress('Error updating timestamp for: %s' % filename)
def _write_json_if_different(self, filename, data):
"""Write the given data to the specified filename, but only if it's
different from what is currently there. Return true if the file was
written.
We use this function mostly to avoid changing the timestamps on
metadata files."""
if not self._is_file_different(filename, data):
# Data has not changed--do nothing.
return False
# Write to temp file then rename to avoid incomplete files
# in case of failure part-way through.
with open(self.tmp_filename, 'w') as json_file:
json.dump(data, json_file)
os.rename(self.tmp_filename, filename)
return True
def _delete_unknown_files(self, rootdir, known, knowntype):
"""If the delete_unknown option is used, delete all files and
directories in rootdir except the known files.
knowntype is only used for the log message.
Returns the number of deleted entries.
"""
# return early if the rootdir doesn't exist
if not os.path.isdir(rootdir):
return 0
# delete only if the --delete-unknown was specified.
if not self.delete_unknown:
return 0
delete_count = 0
curr_entries = os.listdir(rootdir)
unknown_entries = set(curr_entries) - set(known)
for unknown_entry in unknown_entries:
fullname = os.path.join(rootdir, unknown_entry)
self._progress('Deleting unknown %s: %s' % (knowntype, unknown_entry))
delete_count += 1
try:
if os.path.isdir(fullname):
shutil.rmtree(fullname)
else:
os.remove(fullname)
except OSError as ex:
sys.stderr.write('Error deleting %s: %s\n' % (fullname, ex.strerror))
sys.exit(1)
return delete_count
def _verbose(self, msg):
if self.verbosity >= 2:
print(msg)
def _progress(self, msg):
if self.verbosity >= 1:
print(msg)
def _cleanup(self):
# Remove a temp file, if one exists
try:
os.remove(self.tmp_filename)
except OSError as ex:
if ex.errno != errno.ENOENT:
sys.stderr.write('Error deleting temp file %s: %s\n' % (self.tmp_filename, ex.strerror))
def _sig_int_handler(self, signum, frame):
# User exited with CTRL+C
print('')
self._print_statistics()
sys.exit()
def main():
parser = argparse.ArgumentParser(
description='Create a local mirror of your flickr data.')
parser.add_argument(
'destdir',
help='the path to where the mirror shall be stored')
parser.add_argument(
'-v', '--verbose',
dest='verbosity', action='store_const', const=2,
default=1,
help='print progress information to stdout')
parser.add_argument(
'-q', '--quiet',
dest='verbosity', action='store_const', const=0,
help='print nothing to stdout if the mirror succeeds')
parser.add_argument(
'-s', '--statistics', action='store_const',
default=False, const=True,
help='print transfer-statistics at the end')
parser.add_argument(
'--ignore-views', action='store_const',
dest='include_views', default=True, const=False,
help='do not include views-counter in metadata')
parser.add_argument(
'--ignore-photos', action='store_const',
dest='ignore_photos', default=False, const=True,
help='do not mirror photos')
parser.add_argument(
'--ignore-videos', action='store_const',
dest='ignore_videos', default=False, const=True,
help='do not mirror videos')
parser.add_argument(
'--delete-unknown', action='store_const',
dest='delete_unknown', default=False, const=True,
help='delete unrecognized files in the destination directory. '
'Warning: if you choose to ignore photos or videos, they will be deleted!')
args = parser.parse_args()
mirrorer = FlickrMirrorer(args)
mirrorer.run()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
# User exited with CTRL+C
# Print a newline to leave the console in a prettier state.
print('')