/
import-flickr.py
executable file
·161 lines (132 loc) · 5.58 KB
/
import-flickr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3
import argparse
import datetime
import flickrapi
import os
import re
import yaml
arg_parser = argparse.ArgumentParser('Import posts automatically from flickr')
arg_parser.add_argument('--generate', nargs = '?', default = False, const = True, choices = ['overwrite'], help = 'Automatically generate posts, will not overwrite existing posts')
args = arg_parser.parse_args()
generated_post_template = '''\
---
title: "{title}"
date: {date}
photography/types:
- Flickr Album
generated: true
---
{description}
{{{{< flickr set="{id}" >}}}}
'''
config = {}
for filename in ['config.yaml', 'secrets.yaml']:
with open(filename, 'r') as fin:
config.update(yaml.load(fin))
post_date_path = os.path.join('data', 'flickr', 'post-dates.yaml')
if os.path.exists(post_date_path):
with open(post_date_path, 'r') as fin:
post_dates = yaml.load(fin)
else:
post_dates = {}
flickr = flickrapi.FlickrAPI(config['flickr']['key'], config['flickr']['secret'], cache = True, format = 'parsed-json')
thumbnail_sizes = {
'square': 's', # 75x75
'thumbnail': 't', # max 100
'small': 'm', # max 240
'medium': 'z', # max 640
'large': 'b', #max 1024
}
cache_paths = [
('_cache', 'photosets'),
]
for cache_path in cache_paths:
try:
os.makedirs(os.path.join(*cache_path))
except:
pass
# s small square 75x75
# t thumbnail, 100 on longest side
# m small, 240 on longest side
# z medium 640, 640 on longest side
# b large, 1024 on longest side*
thumbnail_size = 'm'
def fix_content_objects(obj):
'''Fix a nested dictionary so that {key: {_content: abc}} becomes {key: abc}'''
if isinstance(obj, dict) and len(obj) == 1 and '_content' in obj:
return obj['_content']
elif isinstance(obj, dict):
return {key: fix_content_objects(obj[key]) for key in obj}
elif isinstance(obj, list):
return [fix_content_objects(each) for each in obj]
else:
return obj
user = flickr.people.findByUsername(username = config['flickr']['username'])
user_id = user['user']['id']
raw_photosets = flickr.photosets.getList(user_id = user_id)
if raw_photosets['photosets']['pages'] != 1:
raise Exception('Cannot currently deal with multiple pages')
# Import flickr data
print('[Flickr] Importing photosets...')
for photoset in sorted(raw_photosets['photosets']['photoset'], key = lambda ps: int(ps['date_create'])):
photoset = fix_content_objects(photoset)
photoset_path = os.path.join('data', 'flickr', 'sets', '{}.yaml'.format(photoset['id']))
# Check if we've cached this photoset
if os.path.exists(photoset_path):
with open(photoset_path, 'r') as fin:
cached_photoset = yaml.load(fin)
if int(cached_photoset['date_update']) < int(photoset['date_update']):
print(photoset['id'], photoset['title'], 'already exists but out of date')
else:
print(photoset['id'], photoset['title'], 'already exists and up to date')
continue
# Override the default information with more useful information
print(photoset['id'], photoset['title'], 'downloading...')
photoset['photos'] = []
photos = flickr.photosets.getPhotos(user_id = user_id, photoset_id = photoset['id'])
for photo in photos['photoset']['photo']:
photo = fix_content_objects(photo)
photo['url'] = 'https://farm{farm}.staticflickr.com/{server}/{id}_{secret}.jpg'.format(**photo)
photo['thumbnails'] = {
name: 'https://farm{farm}.staticflickr.com/{server}/{id}_{secret}_{size}.jpg'.format(size = size, **photo)
for name, size in thumbnail_sizes.items()
}
photo['page'] = 'https://www.flickr.com/photos/{username}/{photo_id}/in/album-{photoset_id}/'.format(
username = config['flickr']['username'],
photo_id = photo['id'],
photoset_id = photoset['id'],
)
photoset['photos'].append(photo)
os.makedirs(os.path.dirname(photoset_path), exist_ok = True)
with open(photoset_path, 'w') as fout:
yaml.dump(photoset, fout, default_flow_style = False)
print()
# TODO: Generate posts based on this data
# NOTE: I don't know if I actually want to do this, since I don't have the date that I actually took the pictures available
if args.generate:
print('[Flickr] Generating posts...')
for filename in os.listdir(os.path.join('data', 'flickr', 'sets')):
with open(os.path.join('data', 'flickr', 'sets', filename), 'r') as fin:
photoset = yaml.load(fin)
photoset_id = int(photoset['id'])
if photoset_id in post_dates:
date_created = datetime.datetime.strptime(post_dates[photoset_id], '%Y-%m-%d')
else:
date_created = datetime.datetime.fromtimestamp(int(photoset['date_create']))
slug = re.sub('[^a-z0-9-]+', '-', photoset['title'].lower()).strip('-')
filename = '{}-{}.generated.md'.format(date_created.strftime('%Y-%m-%d'), slug)
path = os.path.join('content', 'photography', str(date_created.year), filename)
print('{}'.format(photoset['title'], path), end = '... ')
if os.path.exists(path) and args.generate != 'overwrite':
print('already exists, skipping')
else:
os.makedirs(os.path.dirname(path), exist_ok = True)
with open(path, 'w') as fout:
fout.write(generated_post_template.format(
date = date_created,
title = photoset['title'],
description = photoset.get('description', ''),
id = photoset['id'],
))
print('written')
print()