Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 78 lines (60 sloc) 2.24 KB
# -*- coding: utf-8 -*-
import os
import re
import time
import httplib
from datetime import datetime
import pytz
from bs4 import BeautifulSoup
SUBREDDITS = ['programming', 'learnpython', 'python', 'django', 'flask']
HEADER = ['subreddit', 'users', 'date']
MODULE = os.path.dirname(os.path.abspath(__file__))
def parent_dir(path):
'''Return the parent of a directory.'''
return os.path.abspath(os.path.join(path, os.pardir))
REPO_DIR = os.environ.get("OPENSHIFT_REPO_DIR", parent_dir(MODULE))
# Where to save the data
DATA_FILE = os.path.join(REPO_DIR, 'wsgi', 'data', 'python-subreddit-traffic.csv')
DELAY = 10
BASE = ''
NON_DECIMAL_REGEX = re.compile(r'[^\d.]+')
HTTP_HEADER = {'User-Agent': 'Python subreddit user counter'}
def main():
# Check if the header exists
header_line = ','.join(HEADER) + '\n'
with open(DATA_FILE, 'a+') as fp:
first_line = fp.readline()
if first_line != header_line:
write_header(DATA_FILE, header_line)
# Get the user counts for each subreddit
connection = httplib.HTTPConnection("")
for i, subreddit in enumerate(SUBREDDITS):
count = get_user_count(connection, subreddit)
row = '{subreddit},{count},{time}'.format(subreddit=subreddit,
# Write the data to csv
with open(DATA_FILE, 'a+') as fp:
fp.write(row + '\n')
if i != len(SUBREDDITS) - 1:
def write_header(filename, header):
'''Writes a header line to a file.'''
with open(filename, 'r') as original:
data =
with open(filename, 'w') as newfile:
newfile.write(header + data)
return None
def get_user_count(connection, subreddit):
'''Scrape the user count for a subreddit.'''
url = '/r/' + subreddit
connection.request('GET', url, headers=HTTP_HEADER)
html = connection.getresponse().read()
soup = BeautifulSoup(html)
user_count_str ='p.users-online span.number')[0].text
return int(strip_nonnumeric(user_count_str))
def strip_nonnumeric(s):
return NON_DECIMAL_REGEX.sub('', s)
if __name__ == '__main__':