-
Notifications
You must be signed in to change notification settings - Fork 6
/
writer.py
148 lines (136 loc) · 4.98 KB
/
writer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from __future__ import print_function
import os
import sys
#opts relevant parameters:
#user_delay - number of days before a user should be rescraped; if -1 never update/overwrite
#thread_delay - number of days before a thread is rescraped; if -1 never update/overwrite
#history - can contain 'threads', 'users', and 'comments'; if present, append data to database
# if id is present and enough time has passed; if not present, just make sure that
# there is valid time between them; comments may not be overwritten unless the data is
# in thread mode (which overrides time delays); they may be appended regardless
seconds_to_days = 1./(3600 * 24)
def write_user(data, opts):
if opts.trophies:
trophy_data = data.pop('_trophy', {})
opts.db.write_trophy_data(trophy_data)
history_mode = 'users' in opts.history
db = opts.db
#check if user in database
#print(data)
try:
db.execute('SELECT max(timestamp) FROM %s.users WHERE username=%%s' % db.schema,
[data['username']])
last_time = db.fetchall()[0][0]
if last_time is None:
db.insert_user(data)
elif opts.user_delay == -1 or \
((data['timestamp'] - last_time).seconds*seconds_to_days < \
opts.user_delay - (data['timestamp'] - last_time).days):
if opts.verbose:
print('already have %s in database' % data['username'])
return False
else:
can_update = True
if history_mode:
if opts.verbose and False:
print('appending entry...')
db.insert_user(data)
else:
if opts.verbose and False:
print('updating entry...')
db.update_user(data)
except:
print(sys.exc_info())
#print(data)
db.insert_user(data)
db.commit()
return True
def write_thread(data, opts):
history_mode = 'threads' in opts.history
db = opts.db
#check if user in database
try:
db.execute('SELECT max(timestamp) FROM %s.threads WHERE id=%%s' % db.schema, [data['id']])
last_time = db.fetchall()[0][0]
if last_time is None:
db.insert_thread(data)
db.commit()
return True
fails_time = (data['timestamp'] - last_time).seconds*seconds_to_days < opts.thread_delay - \
(data['timestamp'] - last_time).days
if (opts.thread_delay == -1 or fails_time) and data['scrape_mode'] != 'thread':
return False
else:
if history_mode:
db.insert_thread(data)
elif data['scrape_mode'] == 'thread':
#threads should be updated if previous functions have not filtered them out
db.update_thread(data)
else:
return False
except:
print(sys.exc_info())
#print(data)
db.insert_thread(data)
db.commit()
return True
def write_comment(data, opts):
history_mode = 'comments' in opts.history
db = opts.db
#check if user in database
try:
query = 'SELECT max(timestamp) FROM %s.comments WHERE id=%%s' % db.schema
#print(query)
db.execute(query, [data['id']])
#print('executed query')
last_time = db.fetchall()[0][0]
if last_time is None:
db.insert_comment(data)
db.commit()
return True
meets_time_cutoff = (data['timestamp'] - last_time).seconds*seconds_to_days < \
opts.thread_delay - (data['timestamp'] - last_time).days
updateable = opts.thread_delay != -1
thread_mode = data['scrape_mode'] == 'thread'
if (not (thread_mode or history_mode)):
return False
else:
if history_mode:
#print('appending entry...')
db.insert_comment(data)
else:
#print('updating...')
db.update_comment(data)
except TypeError:
#print(data)
print(sys.exc_info())
db.insert_comment(data)
db.commit()
return True
def write_subreddit(data, opts):
history_mode = 'subreddits' in opts.history
#print(data)
db = opts.db
#subreddit has already passed the validation requirements
if 'subreddit' not in data:
print(data)
last_update = db.get_subreddit_update_time(data['subreddit'])
if history_mode or last_update==None:
db.insert_subreddit(data)
else:
db.update_subreddit(data)
def write_traffic(data, opts):
db = opts.db
for entry in data:
if db.check_if_traffic_entry_exists(entry):
db.update_traffic(entry)
else:
db.insert_traffic(entry)
def write_related_subreddits(data, opts):
db = opts.db
for entry in data:
db.insert_related_subreddit(entry)
def write_wikis(data, opts):
db = opts.db
for entry in data:
db.insert_wiki(entry)