/
twics.py
executable file
·341 lines (268 loc) · 10 KB
/
twics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#!/usr/bin/python
""" twics.py -- various functions to convert microblogging service
status updates into ical format for inclusion on calendars """
import oauth2 as oauth
import os.path
import simplejson
import urllib2
import vobject
from dateutil import parser
from optparse import OptionParser, OptionGroup
from time import sleep
CONFIG = {}
def clean_status(tweet, protocol):
""" add username and protocol attributes to json output and remove
extraneous user details """
if (not tweet.get('username') and
tweet.get('user') and tweet['user'].get('screen_name')):
tweet['username'] = tweet['user']['screen_name']
tweet['protocol'] = protocol
if tweet['username'] and tweet['protocol'] and tweet.get('user'):
del tweet['user']
return tweet
def fetch_statuses(opts, args):
""" fetch the most recent statuses from the microblogging service """
apicall = {
'twitter': 'http://api.twitter.com/1/statuses/user_timeline.json',
'identica' : 'http://identi.ca/api/statuses/user_timeline.json',
}
tl = load_json_list(opts.file)
# process existing list
seen = {}
max_id = 0
since = ''
for tweet in tl:
seen[tweet['id']] = 1
if tweet['id'] > max_id:
max_id = tweet['id']
if max_id:
since = '&since_id=%s' % (max_id)
for page in range(1, 17):
if opts.verbose:
print "fetching page %s" % (page)
apiurl = '%s?screen_name=%s&page=%s&count=200%s' % (
apicall[opts.protocol], opts.username, page, since)
if opts.verbose:
print apiurl
try:
json = get_content(opts, apiurl)
except:
print "Ratelimited: sleeping for ten minutes..."
sleep(600)
json = get_content(opts, apiurl)
ctl = simplejson.loads(json)
ctl_count = len(ctl)
if ctl_count == 0:
print "No results returned, exiting loop"
break
for tweet in ctl:
if not seen.get(tweet['id']):
tl.append(clean_status(tweet, opts.protocol))
if ctl_count < 200:
sleep(opts.sleep)
else:
break
write_json(tl, opts.file)
def get_content(opts, apicall):
""" fetch content via apicall """
if not CONFIG['token']:
return urllib2.urlopen(apicall).read()
consumer = oauth.Consumer(
key=CONFIG['consumer']['key'],
secret=CONFIG['consumer']['secret'])
token = oauth.Token(
key=CONFIG['token']['key'],
secret=CONFIG['token']['secret'])
# Create our client.
client = oauth.Client(consumer, token)
# The OAuth Client request works just like httplib2 for the most part.
if opts.verbose:
print "Fetching %s" % apicall
resp, content = client.request(apicall, "GET")
if opts.verbose:
print "Returned %s: %s" % (resp['status'], content)
if resp['status'] == '200':
return content
else:
return '[]'
def integrate_statuses(opts, args):
""" integrate individual statuses or a list of status urls/ids into
an existing json archive"""
apicall = {
'twitter': 'http://api.twitter.com/1/statuses/show',
'identica': 'http://identi.ca/api/statuses/show',
}
tl = load_json_list(opts.file)
# process existing list
seen = {}
for tweet in tl:
seen[tweet['id']] = 1
# process integration source
if os.path.isfile(opts.indiv):
with open(opts.indiv, 'r') as ifile:
lines = ifile.read().split('\n')
else:
# url/id passed in on command line
lines = [ opts.indiv, ]
for line in lines:
tid = None
try:
tid = int(line.split('/')[-1])
except ValueError:
continue
if seen.get(tid):
continue
if tid:
apiurl = '%s/%s.json' % (apicall[opts.protocol], tid)
if opts.verbose:
print apiurl
try:
json = get_content(opts, apiurl)
except:
print "Ratelimited: sleeping for ten minutes..."
sleep(600)
json = get_content(opts, apiurl)
tweet = simplejson.loads(json)
tl.append(clean_status(tweet, opts.protocol))
sleep(opts.sleep)
write_json(tl, opts.file)
def load_keyfile(key, keyfile):
""" load keyfile into CONFIG """
keys = {}
if os.path.isfile(keyfile):
with open(keyfile) as kfile:
keys = simplejson.loads(kfile.read())
CONFIG[key] = keys
def load_json_list(input_file):
""" load json file and return the listing (otherwise return empty list)"""
if os.path.isfile(input_file):
with open(input_file, 'r') as jsonfile:
listing = simplejson.loads(jsonfile.read())
else:
listing = []
return listing
def merge_status_files(opts, args):
""" merge multiple status json archives """
tl = []
for status_file in args:
tl.extend(load_json_list(status_file))
for tweet in tl:
clean_status(tweet, opts.protocol)
write_json(tl, opts.file)
def status2ics(opts, args):
""" write ical format of status updates """
tl = load_json_list(args[0])
cal = vobject.iCalendar()
cal.add('prodid').value = '-//twitter.com/twitter ICS//EN'
cal.add('version').value ='1.0'
cal.add('calscale').value ='GREGORIAN'
cal.add('x-wr-calname').value ='status updates'
tl.sort(key=lambda tw: parser.parse(tw['created_at']), reverse=True)
for tweet in tl:
created = parser.parse(tweet['created_at'])
if tweet['protocol'] == 'identica':
url = 'http://identi.ca/notice/%s' % (tweet['id'])
else:
url = 'http://twitter.com/%s/status/%s/' % (tweet['username'],
tweet['id_str'])
urlsp = url.split('/')[2:]
uid = '%s@%s' % ('-'.join(urlsp[1:]), urlsp[0])
event = cal.add('vevent')
event.add('summary').value = tweet['text']
event.add('description').value = '%s %s' % (tweet['text'], url)
event.add('dtstart').value = created
event.add('dtend').value = created
event.add('dtstamp').value = created
event.add('url').value = url
event.add('uid').value = uid
icsfile = opts.file
with open(icsfile, 'w') as ifile:
ifile.write(cal.serialize())
def write_json(tl, output_file):
""" write output json file """
# sort status updates in descending order
tl.sort(key=lambda tw: parser.parse(tw['created_at']), reverse=True)
with open(output_file, 'w') as jfile:
jfile.write(simplejson.dumps(tl, indent=2))
def emit_usage(oparser, die=True):
""" emit program help """
oparser.print_help()
if die:
exit(1)
def main():
""" main """
dispatch = {
'generate': status2ics,
'merge': merge_status_files,
'fetch': fetch_statuses,
'integrate': integrate_statuses,
}
usage = 'usage: %%prog (%s) [options]' % '|'.join(dispatch.keys())
oparser = OptionParser(usage=usage, version='%prog 0.2')
oparser.add_option("-v", "--verbose", dest="verbose",
action="store_true",
help="verbose output")
oparser.add_option("-c", "--consumer", dest="conkey",
help="file with Twitter OAuth Consumer Key/Secret")
oparser.add_option("-f", dest="file",
help="output file (input and output on fetch)")
# %prog fetch options
fgroup = OptionGroup(oparser, "fetch/integrate options")
fgroup.add_option("-u", dest="username",
help="service username **REQUIRED for fetch**")
fgroup.add_option("-k", dest="keyfile",
help="file containing JSON-formatted OAuth Token")
fgroup.add_option("-i", dest="indiv",
help="integration source (status id, url, or listing)")
fgroup.add_option("-p", dest="protocol",
help="status service (default='twitter')",
default='twitter')
fgroup.add_option("-s", type="int", dest="sleep",
help="sleep interval between API calls (default=10)",
default=10)
oparser.add_option_group(fgroup)
(opts, args) = oparser.parse_args()
if len(args) < 1:
print "\n** Please supply an action to perform\n"
emit_usage(oparser)
else:
action = args.pop(0)
default_file = None
if action == 'fetch' or action == 'integrate':
if action == 'fetch' and not opts.username:
print "\n** Please supply a username\n"
emit_usage(oparser)
if action == 'integrate':
if not opts.indiv:
print "\n** Please supply an integration source\n"
emit_usage(oparser)
if not opts.file and not opts.username:
print "\n** Please supply an output file or a username\n"
emit_usage(oparser)
default_file = '%s-%s.json' % (opts.username, opts.protocol)
default_keyfile = '%s.keys' % (opts.username)
default_conkey = 'twics.keys'
elif action == 'generate':
if len(args) != 1:
print "\n** Please supply a single status file to be converted\n"
emit_usage(oparser)
default_file = '%s.ics' % args[0].split('.')[0]
elif action == 'merge':
if len(args) == 0:
print "\n** Please supply status files to be merged\n"
emit_usage(oparser)
default_file = '%s-merge.json' % (
'-'.join([fn.split('/')[-1].split('.')[0] for fn in args]))
# set default output file
if not opts.file:
opts.file = default_file
if opts.protocol == 'twitter':
if not opts.keyfile and default_keyfile:
opts.keyfile = default_keyfile
if not opts.conkey and default_conkey:
opts.conkey = default_conkey
load_keyfile('token', opts.keyfile)
load_keyfile('consumer', opts.conkey)
dispatch[action](opts, args)
if __name__ == '__main__':
main()