Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Tree: b1c0e383b7
Fetching contributors…

Cannot retrieve contributors at this time

89 lines (73 sloc) 2.776 kB
# -*- coding: utf-8 -*-
import sys
import httplib
from urllib import quote
import json
import couchdb
from twitter__login import login
from twitter__util import makeTwitterRequest
DB = sys.argv[1]
USER = sys.argv[2]
try:
server = couchdb.Server('http://localhost:5984')
db = server[DB]
except couchdb.http.ResourceNotFound, e:
print >> sys.stderr, """CouchDB database '%s' not found.
Please check that the database exists and try again.""" % DB
sys.exit(1)
# query by term
try:
conn = httplib.HTTPConnection('localhost', 5984)
conn.request('GET', '/%s/_fti/_design/lucene/by_text?q=%s' % (DB,
quote(USER)))
response = conn.getresponse()
if response.status == 200:
response_body = json.loads(response.read())
else:
print >> sys.stderr, 'An error occurred fetching the response: %s %s' \
% (response.status, response.reason)
sys.exit(1)
finally:
conn.close()
doc_ids = [row['id'] for row in response_body['rows']]
# pull the tweets from CouchDB
tweets = [db.get(doc_id) for doc_id in doc_ids]
# mine out the in_reply_to_status_id fields and fetch those tweets as a batch request
conversation = sorted([(tweet['_id'], int(tweet['in_reply_to_status_id']))
for tweet in tweets if tweet['in_reply_to_status_id']
is not None], key=lambda x: x[1])
min_conversation_id = min([int(i[1]) for i in conversation if i[1] is not None])
max_conversation_id = max([int(i[1]) for i in conversation if i[1] is not None])
# Pull tweets from other user using user timeline API to minimize API expenses...
t = login()
reply_tweets = []
results = []
page = 1
while True:
results = makeTwitterRequest(t,
t.statuses.user_timeline,
count=200,
# Per <http://dev.twitter.com/doc/get/statuses/user_timeline>, some
# caveats apply with the oldest id you can fetch using "since_id"
since_id=min_conversation_id,
max_id=max_conversation_id,
skip_users='true',
screen_name=USER,
page=page)
reply_tweets += results
page += 1
if len(results) == 0:
break
# During testing, it was observed that some tweets may not resolve or possibly
# even come back with null id values -- possibly a temporary fluke. Workaround.
missing_tweets = []
for (doc_id, in_reply_to_id) in conversation:
try:
print [rt for rt in reply_tweets if rt['id'] == in_reply_to_id][0]['text']
except Exception, e:
print >> sys.stderr, 'Refetching <<tweet %s>>' % (in_reply_to_id, )
results = makeTwitterRequest(t, t.statuses.show, id=in_reply_to_id)
print results['text']
# These tweets are already on hand
print db.get(doc_id)['text']
print
Jump to Line
Something went wrong with that request. Please try again.