Skip to content

Commit

Permalink
Fixing various long code lines. Hopefully, they're all less than 86 c…
Browse files Browse the repository at this point in the history
…olumns wide now
  • Loading branch information
Matthew A. Russell committed Jan 28, 2011
1 parent b918f76 commit 1e630e2
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 62 deletions.
20 changes: 12 additions & 8 deletions recipe__analyze_users_in_search_results.py
Expand Up @@ -12,7 +12,8 @@ def analyze_users_in_search_results(t, q, max_pages=15, results_per_page=100):
search_api = twitter.Twitter(domain="search.twitter.com")
search_results = []
for page in range(1,max_pages+1):
search_results += search_api.search(q=q, rpp=results_per_page, page=page)['results']
search_results += \
search_api.search(q=q, rpp=results_per_page, page=page)['results']

# Extract the screen names (the "from_user" field) from the results
# and optionally map them to a useful field like the tweet id
Expand All @@ -30,18 +31,21 @@ def analyze_users_in_search_results(t, q, max_pages=15, results_per_page=100):
screen_name_to_tweet_ids[screen_name] += [ result['id'] ]


# Use the /users/lookup resource to resolve profile information for these screen names
# Use the /users/lookup resource to resolve profile information for
# these screen names

screen_name_to_info = get_info_by_screen_name(t, screen_name_to_tweet_ids.keys())

# Extract the home location for each user. Note that the "location" field can be anything
# a user has typed in, and may be something like "Everywhere", "United States" or something else
# that won't geocode to a specific coordinate on a map.
# Extract the home location for each user. Note that the "location" field can
# be anything a user has typed in, and may be something like "Everywhere",
# "United States" or something else that won't geocode to a specific coordinate
# on a map.

screen_name_to_location = dict([(sn, info['location']) for sn, info in screen_name_to_info.items()])
screen_name_to_location = dict([(sn, info['location'])
for sn, info in screen_name_to_info.items()])

# Use the various screen_name_to{tweet_ids, info, location} maps to determine interesting things about
# the people who appear in the search results.
# Use the various screen_name_to{tweet_ids, info, location} maps to determine
# interesting things about the people who appear in the search results.

return screen_name_to_info, screen_name_to_location, screen_name_to_tweet_ids

Expand Down
7 changes: 4 additions & 3 deletions recipe__crawl.py
Expand Up @@ -17,7 +17,8 @@ def get_all_followers_ids(user_id, limit):
ids = []
while cursor != 0:

response = make_twitter_request(t, t.followers.ids, user_id=user_id, cursor=cursor)
response = make_twitter_request(t, t.followers.ids,
user_id=user_id, cursor=cursor)

if response is not None:
ids += response['ids']
Expand Down Expand Up @@ -50,8 +51,8 @@ def get_all_followers_ids(user_id, limit):
for _fid in queue:
_follower_ids = get_all_followers_ids(user_id=_fid, limit=limit)

# Store a fid => _follower_ids mapping in Redis or other database of choice
# In Redis, it might look something like this:
# Store a fid => _follower_ids mapping in Redis or other
# database of choice. In Redis, it might look something like this:

rid = get_redis_id('follower_ids', user_id=fid)
[ r.sadd(rid, _id) for _id in _follower_ids ]
Expand Down
7 changes: 5 additions & 2 deletions recipe__dorling_cartogram.py
Expand Up @@ -126,8 +126,11 @@ def get_state_frequencies(locations):
shutil.copytree('etc/protovis/protovis-3.2',
'out/protovis-3.2')

html = open('etc/protovis/dorling_cartogram/dorling_cartogram.html').read() % (json.dumps(json_data),)
f = open(os.path.join(os.getcwd(), 'out', 'dorling_cartogram', 'dorling_cartogram.html'), 'w')
html = open('etc/protovis/dorling_cartogram/dorling_cartogram.html').read() % \
(json.dumps(json_data),)

f = open(os.path.join(os.getcwd(), 'out', 'dorling_cartogram',
'dorling_cartogram.html'), 'w')
f.write(html)
f.close()

Expand Down
38 changes: 20 additions & 18 deletions recipe__geocode_profile_locations.py
Expand Up @@ -8,9 +8,9 @@

def geocode_locations(geocoder, locations):

# Some basic replacement transforms may be necessary for geocoding services to function properly
# You may probably need to add your own as you encounter rough edges in the data or with the geocoding
# service you settle on. For example, ...
# Some basic replacement transforms may be necessary for geocoding services to
# function properly. You may probably need to add your own as you encounter rough
# edges in the data or with the geocoding service you settle on. For example, ...

replacement_transforms = [('San Francisco Bay', 'San Francisco')]

Expand All @@ -24,11 +24,11 @@ def geocode_locations(geocoder, locations):
if location_to_coords.has_key(location):
continue

transformed_location = location
xformed_location = location

for transform in replacement_transforms:

transformed_location = transformed_location.replace(*transform)
xformed_location = xformed_location.replace(*transform)

while True:

Expand All @@ -37,39 +37,41 @@ def geocode_locations(geocoder, locations):
try:
# This call returns a generator

results = geocoder.geocode(transformed_location, exactly_one=False)
results = geocoder.geocode(xformed_location, exactly_one=False)
break
except HTTPError, e:
num_errors += 1
if num_errors >= MAX_HTTP_ERRORS:
sys.exit()
print >> sys.stderr, e.message
print >> sys.stderr, 'Encountered an urllib2 error. Trying again...'
print >> sys.stderr, 'A urllib2 error. Retrying...'
except UnicodeEncodeError, e:
print >> sys.stderr, e
print >> sys.stderr, 'Encountered a UnicodeEncodeError...', e.message
print >> sys.stderr, 'A UnicodeEncodeError...', e.message
break
except geopy.geocoders.google.GQueryError, e:
print >> sys.stderr, e
print >> sys.stderr, 'Encountered a ...GQueryError', e.message
print >> sys.stderr, 'A GQueryError', e.message
break


for result in results:

# Each result is of the form ("Description", (X,Y))
# Unless you have a some special logic for picking the best of many possible
# results, choose the first one returned in results and move along
# Unless you have a some special logic for picking the best of many
# possible results, choose the first one returned in results and move
# along

location_to_coords[location] = result[1]
location_to_description[location] = result[0]
break

# Use location_to_coords and other information of interest to populate a visualization.
# Depending on your particular needs, it is highly likely that you'll want to further
# post process the geocoded locations to filter out locations such as "U.S.A." which will
# plot a placemarker in the geographic center of the United States yet make the visualization
# look skewed in favor of Oklahoma, for example.
# Use location_to_coords and other information of interest to populate a
# visualization. Depending on your particular needs, it is highly likely that
# you'll want to further post process the geocoded locations to filter out
# location such as "U.S.A." which will plot a placemarker in the geographic
# center of the United States yet make the visualization look skewed in favor
# of places like Oklahoma, for example.

return location_to_coords, location_to_description

Expand All @@ -91,8 +93,8 @@ def geocode_locations(geocoder, locations):

t = oauth_login()

# This function returns a few useful maps. Let's use the screen_name => location map and
# geocode the locations
# This function returns a few useful maps. Let's use the
# screen_name => location map and geocode the locations

_, screen_name_to_location, _ = analyze_users_in_search_results(t, Q, 2)

Expand Down
42 changes: 23 additions & 19 deletions recipe__get_rt_origins.py
Expand Up @@ -17,10 +17,14 @@ def get_rt_origins(tweet):
if tweet['retweet_count'] > 0:
rt_origins += [ tweet['user']['name'].lower() ]

# Also, inspect the tweet for the presence of "legacy" retweet patterns such as "RT" and "via"
# Also, inspect the tweet for the presence of "legacy" retweet
# patterns such as "RT" and "via"

try:
rt_origins += [ mention.strip() for mention in rt_patterns.findall(tweet['text'])[0][1].split() ]
rt_origins += [
mention.strip()
for mention in rt_patterns.findall(tweet['text'])[0][1].split()
]
except IndexError, e:
pass

Expand All @@ -34,30 +38,30 @@ def get_rt_origins(tweet):
# Assume tweets have been fetched from the /search resource or elsewhere.

tweets = \
[
{
'text' : 'RT @ptowbrussell Get @SocialWebMining example code at http://bit.ly/biais2 #w00t'
[
{
'text' : 'RT @ptowbrussell Get @SocialWebMining at http://bit.ly/biais2 #w00t'

# ... more tweet fields ...
# ... more tweet fields ...

},
},

{
'text' : 'Get @SocialWebMining example code at http://bit.ly/biais2 #w00t',
'retweet_count' : 1,
'user' : {
'name' : 'ptwobrussell'
{
'text' : 'Get @SocialWebMining example code at http://bit.ly/biais2 #w00t',
'retweet_count' : 1,
'user' : {
'name' : 'ptwobrussell'

# ... more user fields ...
}
# ... more user fields ...
}

# ... more tweet fields ...
# ... more tweet fields ...

},

# ... more tweets ...
},

]
# ... more tweets ...

]

for tweet in tweets:
print get_rt_origins(tweet)
3 changes: 2 additions & 1 deletion recipe__get_search_results_for_trending_topic.py
Expand Up @@ -37,7 +37,8 @@

search_results = []
for page in range(1,MAX_PAGES+1):
search_results += twitter_search.search(q=q, rpp=RESULTS_PER_PAGE, page=page)['results']
search_results += \
twitter_search.search(q=q, rpp=RESULTS_PER_PAGE, page=page)['results']

# Exract tweet entities and embed them into search results

Expand Down
11 changes: 6 additions & 5 deletions recipe__harvest_timeline.py
Expand Up @@ -102,10 +102,10 @@ def max_finding_reducer(keys, values, rereduce):
KW['since_id'] = 1

# Harvest tweets for the given timeline.
# For friend and home timelines, the unofficial limitation is about 800 statuses although
# other documentation may state otherwise. The public timeline only returns 20 statuses
# and gets updated every 60 seconds, so consider using the streaming API for public statuses.
# See http://groups.google.com/group/twitter-development-talk/browse_thread/thread/4678df70c301be43
# For friend and home timelines, the unofficial limitation is about 800 statuses
# although other documentation may state otherwise. The public timeline only returns
# 20 statuses and gets updated every 60 seconds, so consider using the streaming API
# for public statuses. See http://bit.ly/fgJrAx
# Note that the count and since_id params have no effect for the public timeline

page_num = 1
Expand All @@ -114,7 +114,8 @@ def max_finding_reducer(keys, values, rereduce):
api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
tweets = make_twitter_request(t, api_call, **KW)

# Actually storing tweets in CouchDB is as simple as passing them into a call to db.update
# Actually storing tweets in CouchDB is as simple as passing them
# into a call to db.update

db.update(tweets, all_or_nothing=True)

Expand Down
6 changes: 4 additions & 2 deletions recipe__setwise_operations.py
Expand Up @@ -55,7 +55,8 @@ def get_redis_id(key_name, screen_name=None, user_id=None):

cursor = response['next_cursor']

print >> sys.stderr, 'Fetched %i total friend ids for %s' % (r.scard(rid), SCREEN_NAME)
print >> sys.stderr, \
'Fetched %i total friend ids for %s' % (r.scard(rid), SCREEN_NAME)

if r.scard(rid) >= MAX_IDS:
break
Expand All @@ -80,7 +81,8 @@ def get_redis_id(key_name, screen_name=None, user_id=None):

cursor = response['next_cursor']

print >> sys.stderr, 'Fetched %i total follower ids for %s' % (r.scard(rid), SCREEN_NAME)
print >> sys.stderr, \
'Fetched %i total follower ids for %s' % (r.scard(rid), SCREEN_NAME)

if r.scard(rid) >= MAX_IDS:
break
Expand Down
4 changes: 2 additions & 2 deletions recipe__tweet_entities_tagcloud.py
Expand Up @@ -120,12 +120,12 @@ def weightTermByFreq(f):
if not os.path.isdir('out'):
os.mkdir('out')

f = open(os.path.join('out', os.path.basename(HTML_TEMPLATE)), 'w')
f = open(os.path.join(os.getcwd(), 'out', os.path.basename(HTML_TEMPLATE)), 'w')
f.write(html_page)
f.close()

print >> sys.stderr, 'Tagcloud stored in: %s' % f.name

# Open up the web page in your browser

webbrowser.open("file://" + os.path.join(os.getcwd(), 'out', os.path.basename(HTML_TEMPLATE)))
webbrowser.open("file://" + f.name)
7 changes: 5 additions & 2 deletions recipe__visualize_rt_graph_protovis.py
Expand Up @@ -29,7 +29,8 @@ def write_protovis_output(g, out_file, html_template):
links.append({'source' : indexed_nodes[n2],
'target' : indexed_nodes[n1]})

json_data = json.dumps({"nodes" : [{"nodeName" : n} for n in nodes], "links" : links}, indent=4)
json_data = json.dumps({"nodes" : [{"nodeName" : n} for n in nodes], \
"links" : links}, indent=4)

html = open(html_template).read() % (json_data,)

Expand Down Expand Up @@ -63,7 +64,9 @@ def write_protovis_output(g, out_file, html_template):
search_results = []
for page in range(1,MAX_PAGES+1):

search_results.append(twitter_search.search(q=Q, rpp=RESULTS_PER_PAGE, page=page))
search_results.append(twitter_search.search(q=Q,
rpp=RESULTS_PER_PAGE,
page=page))

all_tweets = [ tweet
for page in search_results
Expand Down

0 comments on commit 1e630e2

Please sign in to comment.