-
Notifications
You must be signed in to change notification settings - Fork 13
/
process-usertimeline.py
94 lines (74 loc) · 3.14 KB
/
process-usertimeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json, tweepy
filename = 'usertimeline.json'
READ = 'rb'
tweets = json.load(open(filename,READ))
#Which hashtags were used?
# Consumer keys and access tokens, used for OAuth
READ = 'rb'
WRITE = 'wb'
tokens = json.load(open('../tokens.json',READ))
# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(tokens['consumer_key'], tokens['consumer_secret'])
auth.set_access_token(tokens['access_token'], tokens['access_token_secret'])
# Creation of the actual interface, using authentication
api = tweepy.API(auth)
def print_location(obj):
tweet = {}
tweet['location'] = obj.user.location.encode('utf8')
#tweet['time-zone'] = obj.user.time_zone
#tweet['isGeo'] = obj.geo
return tweet
def serialize_tweepy_object(obj):
tweet = {}
tweet['id'] = obj.id
tweet['retweet_count'] = len(api.retweets(obj.id)) #Ceiling effect because max is 100
tweet['author-name'] = obj.author.name.encode('utf8')
tweet['screen-name'] = obj.author.screen_name.encode('utf8')
tweet['created-at'] = obj.created_at.strftime('%m-%d-%Y')
tweet['text'] = obj.text.encode('utf8')
text = obj.text.encode('utf8')
words = ''.join(c if c.isalnum() else ' ' for c in text).split()
tweet['analysis'] = {}
tweet['analysis']['tweet_length'] = len(text)
tweet['analysis']['word-count'] = len(words)
tweet['location'] = obj.user.location.encode('utf8')
tweet['time-zone'] = obj.user.time_zone
tweet['isGeo'] = obj.geo
tweet['retweet_count'] = obj.retweet_count
tweet['favorite_count'] = obj.favorite_count
return tweet
TEXT=1
hashtags = [word for tweet in tweets for word in tweet['text'][TEXT].split() if '#' in word]
print hashtags
#Which tweet is most popular? (Which tweet has the most retweets?)
retweets = sorted(tweets,key = lambda tweet: tweet['retweet_count'],reverse=True)
#print retweets[0]
#Get location from tweet
locations = [tweet['location'] for tweet in tweets]
#print locations
'''
#Get locations from those who retweeted the most popular tweet
most_popular_tweet = retweets[0]
try:
locations_of_retweeters = api.retweets(most_popular_tweet['id'])
except:
pass
print map(print_location,locations_of_retweeters)
# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(tokens['consumer_key'], tokens['consumer_secret'])
auth.set_access_token(tokens['access_token'], tokens['access_token_secret'])
# Creation of the actual interface, using authentication
api = tweepy.API(auth)
user_file = open('gagatimeline.json',WRITE)
user_text_file = open('gagatimeline.txt','a')
user_excel_file = open('gagatimeline.csv','a')
json_list = []
for tweet in tweepy.Cursor(api.user_timeline, id="ladygaga", include_rts=False).items(10):
tweet = serialize_tweepy_object(tweet)
json_list.append(tweet)
user_text_file.write(tweet['text'] + '\n')
user_excel_file.write(str(tweet['author-name']) + '|' + tweet['text'] + '|' + str(tweet['location']) + '|' + str(tweet['retweet_count']) + '|' + str(tweet['favorite_count']) + '\n')
'''
gagatweets = open('gagatimeline.txt',READ).read().splitlines()
hashtags = [[word for word in tweet if '#' in word] for tweet in gagatweets]
print hashtags