Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 0e5f1db
Showing
3 changed files
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
python-blekko | ||
============= | ||
|
||
This module provides simple bindings to the `Blekko`_ API. To use the API, | ||
`contact Blekko for an API key`_. | ||
|
||
This module currently only supports **search queries** and **page statistics**. | ||
The API also provides tools for manipulating slashtags, but this library doesn't | ||
support that yet. | ||
|
||
The library is internally rate-limited to one query per second in accordance | ||
with Blekko's guidelines. | ||
|
||
.. _Blekko: https://blekko.com/ | ||
.. _contact Blekko for an API key: http://help.blekko.com/index.php/tag/api/ | ||
|
||
Searching | ||
--------- | ||
|
||
To use the API, first create a ``Blekko`` object using your "source" or "auth" | ||
API key:: | ||
|
||
import blekko | ||
api = blekko.Blekko(source='my_api_key') | ||
|
||
Then, to perform searches, use the ``query`` method. Its arguments are the | ||
search terms (as a string) and, optionally, the page number:: | ||
|
||
results = api.query('peach cobbler') | ||
|
||
The returned object is a sequence containing ``Result`` objects, which | ||
themselves have a number of useful fields:: | ||
|
||
for result in results: | ||
print result.url_title | ||
print result.url | ||
print result.snippet | ||
|
||
Errors in communicating with the server are raised as ``BlekkoError`` | ||
exceptions, so you'll want to handle these exceptions when making calls to the | ||
API. | ||
|
||
An Example | ||
'''''''''' | ||
|
||
Putting it all together, here's a short script that gets a single link for | ||
search terms on the command line:: | ||
|
||
import blekko | ||
import sys | ||
|
||
_api = blekko.Blekko(source='my_api_key') | ||
|
||
def get_link(terms): | ||
try: | ||
res = _api.query(terms + ' /ps=1') | ||
except blekko.BlekkoError as exc: | ||
print >>sys.stderr, str(exc) | ||
return None | ||
if len(res): | ||
return res[0].url | ||
|
||
if __name__ == '__main__': | ||
link = get_link(' '.join(sys.argv[1:])) | ||
if link: | ||
print(link) | ||
else: | ||
sys.exit(1) | ||
|
||
Page Statistics | ||
--------------- | ||
|
||
Blekko provides an API for getting SEO-related statistics for a URL. Use the | ||
``pagestats`` method, which takes a URL as its only parameter, to get a | ||
dictionary containing information about a page:: | ||
|
||
>>> api.pagestats('http://python.org/') | ||
{u'cached': True, u'ip': u'82.94.164.162', u'host_rank': 3835.107267, | ||
u'host_inlinks': 467267, u'adsense': None, u'dup': True, | ||
u'rss': u'http://www.python.org/channews.rdf'} | ||
|
||
Credits | ||
------- | ||
|
||
These bindings were written by `Adrian Sampson`_ and modeled after the `Perl | ||
bindings`_ by Greg Lindahl. The source is made available under the `MIT | ||
license`_. | ||
|
||
.. _Adrian Sampson: https://github.com/sampsyo/ | ||
.. _Perl bindings: http://search.cpan.org/~wumpus/WebService-Blekko-1.00_07/ | ||
.. _MIT license: http://www.opensource.org/licenses/MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
"""Bindings for the Blekko search API.""" | ||
import urllib | ||
import time | ||
import threading | ||
import json | ||
|
||
BASE_URL = 'http://blekko.com' | ||
RATE_LIMIT = 1.0 # Seconds. | ||
|
||
class _rate_limit(object): | ||
"""A decorator that limits the rate at which the function may be | ||
called. Minimum interval is given by RATE_LIMIT. Thread-safe using | ||
locks. | ||
""" | ||
def __init__(self, fun): | ||
self.fun = fun | ||
self.last_call = 0.0 | ||
self.lock = threading.Lock() | ||
|
||
def __call__(self, *args, **kwargs): | ||
with self.lock: | ||
# Wait until RATE_LIMIT time has passed since last_call, | ||
# then update last_call. | ||
since_last_call = time.time() - self.last_call | ||
if since_last_call < RATE_LIMIT: | ||
time.sleep(RATE_LIMIT - since_last_call) | ||
self.last_call = time.time() | ||
|
||
# Call the original function. | ||
return self.fun(*args, **kwargs) | ||
|
||
class BlekkoError(Exception): | ||
"""Base class for exceptions raised by this module.""" | ||
|
||
class ServerError(BlekkoError): | ||
"""Raised when the server denies a request for some reason.""" | ||
|
||
@_rate_limit | ||
def _http_request(url): | ||
"""Make a (rate-limited) request to the Blekko server and return the | ||
resulting data. | ||
""" | ||
f = urllib.urlopen(url) | ||
code = f.getcode() | ||
if code == 503: | ||
raise ServerError('server overloaded (503)') | ||
elif code != 200: | ||
raise ServerError('HTTP error {}'.format(code)) | ||
return f.read() | ||
|
||
class ResponseObject(object): | ||
"""An object wrapper for a dictionary providing item access to | ||
values in the underlying dictionary. | ||
""" | ||
def __init__(self, data): | ||
self.data = data | ||
|
||
def __getattr__(self, key): | ||
if key in self.data: | ||
return self.data[key] | ||
raise KeyError('no such field {}'.format(repr(key))) | ||
|
||
def __repr__(self): | ||
return '{}({})'.format(type(self).__name__, self.data) | ||
|
||
class Result(ResponseObject): | ||
"""A single search result. Available fields include url, url_title, | ||
snippet, rss, short_host, short_host_url, and display_url. | ||
""" | ||
|
||
class ResultSet(ResponseObject): | ||
"""A set of search results. Behaves as an immutable sequence | ||
containing Result objects (accessible via iteration or | ||
subscripting). Additional available fields include q, noslash_q, | ||
total_num, num_elem_start, num_elem,end, nav_page_range_start, | ||
nav_page_range_end, tag_switches, sug_slash, and | ||
universal_total_results. | ||
""" | ||
def __iter__(self): | ||
for result in self.data['RESULT']: | ||
yield Result(result) | ||
|
||
def __getitem__(self, index): | ||
return Result(self.data['RESULT'][index]) | ||
|
||
def __len__(self): | ||
return len(self.data['RESULT']) | ||
|
||
class Blekko(object): | ||
def __init__(self, auth=None, source=None): | ||
"""Create an API object. Either `auth` or `source` must be | ||
provided to identify the application (use whichever was assigned | ||
to you by Blekko). | ||
""" | ||
if not auth and not source: | ||
raise BlekkoError('API key not provided') | ||
self.auth = auth | ||
self.source = source | ||
|
||
def _request(self, path, params): | ||
"""Make a (rate-limited) request to the Blekko server and return | ||
the result data. | ||
""" | ||
params = dict(params) # Make a copy. | ||
if self.auth: | ||
params['auth'] = self.auth | ||
else: | ||
params['source'] = self.source | ||
query = urllib.urlencode(params) | ||
url = "{}{}?{}".format(BASE_URL, path, query) | ||
return _http_request(url) | ||
|
||
def query(self, terms, page=0): | ||
"""Perform a search and return a ResultSet object.""" | ||
data = self._request('/ws/', { | ||
'q': terms + ' /json', | ||
'p': str(page), | ||
}) | ||
return ResultSet(json.loads(data)) | ||
|
||
def pagestats(self, url): | ||
"""Get page statistics for a URL and return a dictionary of | ||
available information. | ||
""" | ||
data = self._request('/api/pagestats', { | ||
'url': url, | ||
}) | ||
return json.loads(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import os | ||
from distutils.core import setup | ||
|
||
def _read(fn): | ||
path = os.path.join(os.path.dirname(__file__), fn) | ||
return open(path).read() | ||
|
||
setup(name='blekko', | ||
version='0.1', | ||
description='bindings for the Blekko search engine API', | ||
author='Adrian Sampson', | ||
author_email='adrian@radbox.org', | ||
url='https://github.com/sampsyo/python-blekko', | ||
license='MIT', | ||
platforms='ALL', | ||
long_description=_read('README.rst'), | ||
|
||
py_modules=['blekko'], | ||
|
||
classifiers=[ | ||
'Topic :: Internet :: WWW/HTTP :: Indexing/Search', | ||
'Intended Audience :: Developers', | ||
'Programming Language :: Python :: 2', | ||
], | ||
) |