-
Notifications
You must be signed in to change notification settings - Fork 0
/
getFoFAndFoFFollows.rb
executable file
·210 lines (194 loc) · 8.08 KB
/
getFoFAndFoFFollows.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env ruby
require 'rubygems'
require 'json'
require 'pp'
require 'time'
require 'date'
require 'mongo'
require 'parseconfig'
require 'twitter'
twitter_config = ParseConfig.new('twitter.conf').params
consumer_key = twitter_config['consumer_key']
consumer_secret = twitter_config['consumer_secret']
access_token = twitter_config['access_token']
access_token_secret = twitter_config['access_token_secret']
if ARGV.length < 1
puts "usage: #{$0} <twitter_screen_name>"
exit
end
def sleep_if_necessary()
if Twitter.rate_limit_status.remaining_hits <= 1
$stderr.printf("API THROTTLE SLEEP\n")
sleep 60 * 60
end
end
Twitter.configure do |config|
config.consumer_key = consumer_key
config.consumer_secret = consumer_secret
config.oauth_token = access_token
config.oauth_token_secret = access_token_secret
end
TWITTER_SCREEN_NAME = ARGV[0].downcase
MONGO_HOST = ENV["MONGO_HOST"]
raise(StandardError,"Set Mongo hostname in ENV: 'MONGO_HOST'") if !MONGO_HOST
MONGO_PORT = ENV["MONGO_PORT"]
raise(StandardError,"Set Mongo port in ENV: 'MONGO_PORT'") if !MONGO_PORT
MONGO_USER = ENV["MONGO_USER"]
MONGO_PASSWORD = ENV["MONGO_PASSWORD"]
TWITTER_DB = ENV["TWITTER_DB"]
raise(StandardError,"Set Mongo flickr database name in ENV: 'TWITTER_DB'") if !TWITTER_DB
db = Mongo::Connection.new(MONGO_HOST, MONGO_PORT.to_i).db(TWITTER_DB)
if MONGO_USER
auth = db.authenticate(MONGO_USER, MONGO_PASSWORD)
if !auth
raise(StandardError, "Couldn't authenticate, exiting")
exit
end
end
def getFollowersOf(follower_id, synthetic_followers_of_followers, usersColl)
new_followers_of_follower = []
followers_of_follower_cursor = -1
return_because_follower_is_protected = false
while followers_of_follower_cursor != 0 do
$stderr.printf("followers_of_follower_cursor:%d\n", followers_of_follower_cursor)
tried_previously = false
sleep_if_necessary()
begin
followers_of_follower = Twitter.follower_ids(:user_id => follower_id, :cursor => followers_of_follower_cursor)
followers_of_follower.ids.each do |id|
if synthetic_followers_of_followers.include?(id)
next
end
$stderr.printf("NEW follower of follower:%d IS:%d\n", follower_id, id)
new_followers_of_follower.push(id)
existingFollowerOfFollowerUser = usersColl.find_one("id_str" => id.to_s)
if !existingFollowerOfFollowerUser
$stderr.printf("INSERTING FOLLOWER of FOLLOWER user id:%s\n", id.to_s)
followerOfFollowerUser = { "id_str" => id.to_s, "user_info_initialized" => false, "partial_following_screen_names" => []}
usersColl.insert(followerOfFollowerUser)
end
end
rescue Twitter::Error::ServiceUnavailable, Twitter::Error::BadGateway
if tried_previously
raise
else
tried_previously = true
$stderr.printf("RETRY SLEEP\n")
sleep(60)
retry
end
rescue Twitter::Error::Unauthorized
$stderr.printf("Unauthorized to get followers of follower_id:%d\n", follower_id)
return_because_follower_is_protected = true
end
if return_because_follower_is_protected
break
end
followers_of_follower_cursor = followers_of_follower.next_cursor
end
return new_followers_of_follower
end
def getFollowsOfFollowersOfFollowers(new_synthetic_followers_of_followers,
synthetic_follows_of_followers_of_followers, usersColl)
new_follows_of_followers_of_followers = []
new_synthetic_followers_of_followers.each do |id|
follows_of_follower_of_follower_cursor = -1
return_because_follower_of_follower_is_protected = false
while follows_of_follower_of_follower_cursor != 0 do
$stderr.printf("follows_of_follower_of_follower_cursor:%d\n", follows_of_follower_of_follower_cursor)
tried_previously = false
sleep_if_necessary()
begin
follows_of_follower_of_follower = Twitter.friend_ids(:user_id => id, :cursor => follows_of_follower_of_follower_cursor)
follows_of_follower_of_follower.ids.each do |follow_id|
if synthetic_follows_of_followers_of_followers.include?(follow_id)
next
end
$stderr.printf("NEW follow of follower of follower:%d IS:%d\n", id, follow_id)
new_follows_of_followers_of_followers.push(follow_id)
existingFollowOfFollowerOfFollowerUser = usersColl.find_one("id_str" => follow_id.to_s)
if !existingFollowOfFollowerOfFollowerUser
$stderr.printf("INSERTING FOLLOW of FOLLOWER of FOLLOWER user id:%s\n", follow_id.to_s)
followOfFollowerOfFollowerUser = { "id_str" => follow_id.to_s, "user_info_initialized" => false,
"partial_following_screen_names" => []}
usersColl.insert(followOfFollowerOfFollowerUser)
end
end
rescue Twitter::Error::ServiceUnavailable, Twitter::Error::BadGateway
if tried_previously
raise
else
tried_previously = true
$stderr.printf("RETRY SLEEP\n")
sleep(60)
retry
end
rescue Twitter::Error::Unauthorized
$stderr.printf("Unauthorized to get follows of followers of followers of user_id:%d\n", id)
return_because_follower_of_follower_is_protected = true
end
if return_because_follower_of_follower_is_protected
break
end
follows_of_follower_of_follower_cursor = follows_of_follower_of_follower.next_cursor
end
end
return new_follows_of_followers_of_follower
end
usersColl = db.collection("users")
existingUser = usersColl.find_one("screen_name" => TWITTER_SCREEN_NAME)
if !existingUser
$stderr.printf("screen_name:%s NOT FOUND\n", TWITTER_SCREEN_NAME)
exit
end
synthetic_followers_of_followers = []
synthetic_followers = []
synthetic_follows_of_followers_of_followers = []
follower_cursor = -1
while follower_cursor != 0 do
$stderr.printf("follower_cursor:%d\n", follower_cursor)
sleep_if_necessary()
tried_previously = false
begin
followers = Twitter.follower_ids(TWITTER_SCREEN_NAME, :cursor => follower_cursor)
followers.ids.each do |id|
$stderr.printf("FOUND follower user id:%s\n", id.to_s)
if !synthetic_followers.include?(id)
synthetic_followers.push(id)
end
existingFollowerUser = usersColl.find_one("id_str" => id.to_s)
if existingFollowerUser
if !existingFollowerUser["partial_following_screen_names"].include?(TWITTER_SCREEN_NAME)
existingFollowerUser["partial_following_screen_names"].push(TWITTER_SCREEN_NAME)
$stderr.printf("UPDATING user id:%s ADDING screen_name:%s\n",id.to_s, TWITTER_SCREEN_NAME )
usersColl.update({"id_str" =>id.to_s}, existingFollowerUser)
else
$stderr.printf("NOT UPDATING Follower user id:%s because screen_name:%s is PRESENT\n",id.to_s, TWITTER_SCREEN_NAME )
end
else
$stderr.printf("INSERTING user id:%s\n",id.to_s)
followerUser = { "id_str" => id.to_s, "user_info_initialized" => false, "partial_following_screen_names" => [TWITTER_SCREEN_NAME]}
usersColl.insert(followerUser)
end
new_synthetic_followers_of_followers = getFollowersOf(id, synthetic_followers_of_followers, usersColl)
synthetic_followers_of_followers.concat(new_synthetic_followers_of_followers)
new_synthetic_follows_of_followers_of_followers = getFollowsOfFollowersOfFollowers(new_synthetic_followers_of_followers,
synthetic_follows_of_followers_of_followers, usersColl)
follows_of_followers_of_followers.concat(new_synthetic_follows_of_followers_of_followers)
end
rescue Twitter::Error::ServiceUnavailable, Twitter::Error::BadGateway
if tried_previously
raise
else
tried_previously = true
$stderr.printf("RETRY SLEEP\n")
sleep(60)
retry
end
end
follower_cursor = followers.next_cursor
end
existingUser["synthetic_followers_of_followers"] = synthetic_followers_of_followers
existingUser["synthetic_followers"] = synthetic_followers
existingUser["synthetic_follows_of_followers_of_followers"] = synthetic_follows_of_followers_of_followers
existingUser.update({"id_str" => existingUser["id_str"]}, existingUser)