forked from reddit-archive/reddit
/
traffic.py
105 lines (96 loc) · 4.07 KB
/
traffic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is Reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of the
# Original Code is CondeNet, Inc.
#
# All portions of the code written by CondeNet are Copyright (c) 2006-2010
# CondeNet, Inc. All Rights Reserved.
################################################################################
from httplib import HTTPConnection
from urlparse import urlparse
from cPickle import loads
from utils import query_string, url_join
import os, socket, time, datetime
from pylons import g
from r2.lib.memoize import memoize
def load_traffic_uncached(interval, what, iden,
start_time = None, stop_time = None,
npoints = None):
"""
Fetches pickled traffic from the traffic server and returns it as a list.
On connection failure (or no data) returns an empy list.
"""
from r2.lib import promote
def format_date(d):
if hasattr(d, "tzinfo"):
if d.tzinfo is None:
d = d.replace(tzinfo = g.tz)
else:
d = d.astimezone(g.tz)
return ":".join(map(str, d.timetuple()[:6]))
traffic_url = url_join(g.traffic_url, interval, what, iden)
args = {}
if what == 'thing' and interval == 'hour':
if start_time:
if not isinstance(start_time, datetime.datetime):
start_time = datetime.datetime(*start_time.timetuple()[:3])
start_time -= promote.timezone_offset
if stop_time:
if not isinstance(stop_time, datetime.datetime):
stop_time = datetime.datetime(*stop_time.timetuple()[:3])
stop_time -= promote.timezone_offset
if start_time:
args['start_time'] = format_date(start_time)
if stop_time:
args['stop_time'] = format_date(stop_time)
if npoints:
args['n'] = npoints
u = urlparse(traffic_url)
try:
conn = HTTPConnection(u.hostname, u.port)
conn.request("GET", u.path + query_string(args))
res = conn.getresponse()
res = loads(res.read()) if res.status == 200 else []
conn.close()
return res
except socket.error:
return []
#@memoize("cached_traffic", time = 60)
def load_traffic(interval, what, iden = '',
start_time = None, stop_time = None,
npoints = None):
"""
interval = (hour, day, month)
what = (reddit, lang, thing, promos)
iden is the specific thing (reddit name, language name, thing
fullname) that one is seeking traffic for.
"""
res = load_traffic_uncached(interval, what, iden,
start_time = start_time, stop_time = stop_time,
npoints = npoints)
if res and isinstance(res[0][0], datetime.datetime):
dates, data = zip(*res)
if interval == 'hour':
# shift hourly totals into local time zone.
dates = [x.replace(tzinfo=None) -
datetime.timedelta(0, time.timezone) for x in dates]
else:
# we don't care about the hours
dates = [x.date() for x in dates]
res = zip(dates, data)
return res
def load_summary(what, interval = "month", npoints = 50):
return load_traffic(interval, "summary", what, npoints = npoints)