Skip to content

Commit

Permalink
initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
sampsyo committed Apr 16, 2012
0 parents commit 0e5f1db
Show file tree
Hide file tree
Showing 3 changed files with 244 additions and 0 deletions.
91 changes: 91 additions & 0 deletions README.rst
@@ -0,0 +1,91 @@
python-blekko
=============

This module provides simple bindings to the `Blekko`_ API. To use the API,
`contact Blekko for an API key`_.

This module currently only supports **search queries** and **page statistics**.
The API also provides tools for manipulating slashtags, but this library doesn't
support that yet.

The library is internally rate-limited to one query per second in accordance
with Blekko's guidelines.

.. _Blekko: https://blekko.com/
.. _contact Blekko for an API key: http://help.blekko.com/index.php/tag/api/

Searching
---------

To use the API, first create a ``Blekko`` object using your "source" or "auth"
API key::

import blekko
api = blekko.Blekko(source='my_api_key')

Then, to perform searches, use the ``query`` method. Its arguments are the
search terms (as a string) and, optionally, the page number::

results = api.query('peach cobbler')

The returned object is a sequence containing ``Result`` objects, which
themselves have a number of useful fields::

for result in results:
print result.url_title
print result.url
print result.snippet

Errors in communicating with the server are raised as ``BlekkoError``
exceptions, so you'll want to handle these exceptions when making calls to the
API.

An Example
''''''''''

Putting it all together, here's a short script that gets a single link for
search terms on the command line::

import blekko
import sys

_api = blekko.Blekko(source='my_api_key')

def get_link(terms):
try:
res = _api.query(terms + ' /ps=1')
except blekko.BlekkoError as exc:
print >>sys.stderr, str(exc)
return None
if len(res):
return res[0].url

if __name__ == '__main__':
link = get_link(' '.join(sys.argv[1:]))
if link:
print(link)
else:
sys.exit(1)

Page Statistics
---------------

Blekko provides an API for getting SEO-related statistics for a URL. Use the
``pagestats`` method, which takes a URL as its only parameter, to get a
dictionary containing information about a page::

>>> api.pagestats('http://python.org/')
{u'cached': True, u'ip': u'82.94.164.162', u'host_rank': 3835.107267,
u'host_inlinks': 467267, u'adsense': None, u'dup': True,
u'rss': u'http://www.python.org/channews.rdf'}

Credits
-------

These bindings were written by `Adrian Sampson`_ and modeled after the `Perl
bindings`_ by Greg Lindahl. The source is made available under the `MIT
license`_.

.. _Adrian Sampson: https://github.com/sampsyo/
.. _Perl bindings: http://search.cpan.org/~wumpus/WebService-Blekko-1.00_07/
.. _MIT license: http://www.opensource.org/licenses/MIT
128 changes: 128 additions & 0 deletions blekko.py
@@ -0,0 +1,128 @@
"""Bindings for the Blekko search API."""
import urllib
import time
import threading
import json

BASE_URL = 'http://blekko.com'
RATE_LIMIT = 1.0 # Seconds.

class _rate_limit(object):
"""A decorator that limits the rate at which the function may be
called. Minimum interval is given by RATE_LIMIT. Thread-safe using
locks.
"""
def __init__(self, fun):
self.fun = fun
self.last_call = 0.0
self.lock = threading.Lock()

def __call__(self, *args, **kwargs):
with self.lock:
# Wait until RATE_LIMIT time has passed since last_call,
# then update last_call.
since_last_call = time.time() - self.last_call
if since_last_call < RATE_LIMIT:
time.sleep(RATE_LIMIT - since_last_call)
self.last_call = time.time()

# Call the original function.
return self.fun(*args, **kwargs)

class BlekkoError(Exception):
"""Base class for exceptions raised by this module."""

class ServerError(BlekkoError):
"""Raised when the server denies a request for some reason."""

@_rate_limit
def _http_request(url):
"""Make a (rate-limited) request to the Blekko server and return the
resulting data.
"""
f = urllib.urlopen(url)
code = f.getcode()
if code == 503:
raise ServerError('server overloaded (503)')
elif code != 200:
raise ServerError('HTTP error {}'.format(code))
return f.read()

class ResponseObject(object):
"""An object wrapper for a dictionary providing item access to
values in the underlying dictionary.
"""
def __init__(self, data):
self.data = data

def __getattr__(self, key):
if key in self.data:
return self.data[key]
raise KeyError('no such field {}'.format(repr(key)))

def __repr__(self):
return '{}({})'.format(type(self).__name__, self.data)

class Result(ResponseObject):
"""A single search result. Available fields include url, url_title,
snippet, rss, short_host, short_host_url, and display_url.
"""

class ResultSet(ResponseObject):
"""A set of search results. Behaves as an immutable sequence
containing Result objects (accessible via iteration or
subscripting). Additional available fields include q, noslash_q,
total_num, num_elem_start, num_elem,end, nav_page_range_start,
nav_page_range_end, tag_switches, sug_slash, and
universal_total_results.
"""
def __iter__(self):
for result in self.data['RESULT']:
yield Result(result)

def __getitem__(self, index):
return Result(self.data['RESULT'][index])

def __len__(self):
return len(self.data['RESULT'])

class Blekko(object):
def __init__(self, auth=None, source=None):
"""Create an API object. Either `auth` or `source` must be
provided to identify the application (use whichever was assigned
to you by Blekko).
"""
if not auth and not source:
raise BlekkoError('API key not provided')
self.auth = auth
self.source = source

def _request(self, path, params):
"""Make a (rate-limited) request to the Blekko server and return
the result data.
"""
params = dict(params) # Make a copy.
if self.auth:
params['auth'] = self.auth
else:
params['source'] = self.source
query = urllib.urlencode(params)
url = "{}{}?{}".format(BASE_URL, path, query)
return _http_request(url)

def query(self, terms, page=0):
"""Perform a search and return a ResultSet object."""
data = self._request('/ws/', {
'q': terms + ' /json',
'p': str(page),
})
return ResultSet(json.loads(data))

def pagestats(self, url):
"""Get page statistics for a URL and return a dictionary of
available information.
"""
data = self._request('/api/pagestats', {
'url': url,
})
return json.loads(data)
25 changes: 25 additions & 0 deletions setup.py
@@ -0,0 +1,25 @@
import os
from distutils.core import setup

def _read(fn):
path = os.path.join(os.path.dirname(__file__), fn)
return open(path).read()

setup(name='blekko',
version='0.1',
description='bindings for the Blekko search engine API',
author='Adrian Sampson',
author_email='adrian@radbox.org',
url='https://github.com/sampsyo/python-blekko',
license='MIT',
platforms='ALL',
long_description=_read('README.rst'),

py_modules=['blekko'],

classifiers=[
'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
'Intended Audience :: Developers',
'Programming Language :: Python :: 2',
],
)

0 comments on commit 0e5f1db

Please sign in to comment.