Skip to content

Commit

Permalink
Basics; and some fancy async
Browse files Browse the repository at this point in the history
  • Loading branch information
chbrown committed Jun 4, 2014
0 parents commit a525e22
Show file tree
Hide file tree
Showing 6 changed files with 299 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.DS_Store
*.pyc
*.pyo
*.egg-info/
/build/
/dist/
19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) 2014 Christopher Brown

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# crowdflower

Minimal client library for interacting with the [CrowdFlower](http://www.crowdflower.com/) API with Python.


## Installation

Install from github at this point:

git clone https://github.com/chbrown/crowdflower.git
cd crowdflower
python setup.py develop


## Example use

Import:

import crowdflower

CrowdFlower API keys are 20 characters long; the one below is just random characters.

conn = crowdflower.Connection('LbcxvIlE3x1M8F6TT5hN')

This library will default to an environment variable called `CROWDFLOWER_API_KEY` if
none is specified here:

conn = crowdflower.Connection()

Loop through all your jobs and print the titles:

for job in conn.jobs():
print job['title']

Create a new job with some new tasks:

job = conn.upload(data)
print job



## Fancy stuff

Run a bunch of DELETE calls on each item in the job.

for delete_response in job.clear_units():
print delete_response

If you don't want to print the responses, you still need to exhaust the loop:

list(job.clear_units())


## License

Copyright © 2014 Christopher Brown. [MIT Licensed](LICENSE).
13 changes: 13 additions & 0 deletions crowdflower/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
# here is the directory containing this __init__.py file
here = os.path.dirname(__file__) or os.curdir
# root is the directory containing `here`, i.e., the directory containing setup.py
root = os.path.dirname(os.path.abspath(here))

import pkg_resources
__version__ = pkg_resources.get_distribution('crowdflower').version

# module-level imports
from connection import Connection

__all__ = ['Connection']
177 changes: 177 additions & 0 deletions crowdflower/connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import os
import json
import requests
import grequests

import logging
from pprint import pformat

logger = logging.getLogger(__name__)


API_URL = 'https://api.crowdflower.com/v1'
API_KEY = os.getenv('CROWDFLOWER_API_KEY')


def merge(*dicts):
return dict((key, value) for d in dicts if d for key, value in d.items())


class Connection(object):
def __init__(self, api_key=API_KEY, api_url=API_URL):
self.api_key = api_key
self.api_url = api_url

def _prepare_request(self, path, method, params, data, headers):
url = self.api_url + path
kwargs = dict(
params=merge(params, dict(key=self.api_key)),
headers=merge(headers, dict(Accept='application/json'))
)
if data:
kwargs['headers']['Content-Type'] = 'application/json'
# N.b.: CF expects newline-separated JSON, not actual JSON
# e.g., this would fail with a status 500: kwargs['data'] = json.dumps(data)
kwargs['data'] = '\n'.join(json.dumps(datum) for datum in data)
return method, url, kwargs

def request(self, path, method='GET', params=None, data=None, headers=None):
method, url, kwargs = self._prepare_request(path, method, params, data, headers)
response = requests.request(method, url, **kwargs)
try:
return response.json()
# except simplejson.scanner.JSONDecodeError:
except Exception:
# should raise something like an APIException if JSON parse fails, but oh well
return response.text

def grequest(self, path, method='GET', params=None, data=None, headers=None):
method, url, kwargs = self._prepare_request(path, method, params, data, headers)
return grequests.request(method, url, **kwargs)

def jobs(self):
for job_response in self.request('/jobs').json():
job = Job(job_response['id'], self)
# populate the Job's properties, since we have all the data anyway
job._properties = job_response
yield job

def job(self, job_id):
return Job(job_id, self)

def upload(self, data):
'''
TODO: allow setting Job parameters at the same time
'''
job_response = self.request('/jobs/upload', method='POST', data=data)
job = Job(job_response['id'], self)
job._properties = job_response
return job


class Job(object):
'''
Read / Write attributes
auto_order
auto_order_threshold
auto_order_timeout
cml
cml_fields
confidence_fields
css
custom_key
excluded_countries
gold_per_assignment
included_countries
instructions
js
judgments_per_unit
language
max_judgments_per_unit
max_judgments_per_contributor
min_unit_confidence
options
pages_per_assignment
problem
send_judgments_webhook
state
title
units_per_assignment
webhook_uri
Read-only attributes
completed
completed_at
created_at
gold
golds_count
id
judgments_count
units_count
updated_at
Not sure about:
payment_cents
'''
READ_WRITE_FIELDS = ['auto_order', 'auto_order_threshold', 'auto_order_timeout', 'cml', 'cml_fields', 'confidence_fields', 'css', 'custom_key', 'excluded_countries', 'gold_per_assignment', 'included_countries', 'instructions', 'js', 'judgments_per_unit', 'language', 'max_judgments_per_unit', 'max_judgments_per_contributor', 'min_unit_confidence', 'options', 'pages_per_assignment', 'problem', 'send_judgments_webhook', 'state', 'title', 'units_per_assignment', 'webhook_uri']

def __init__(self, job_id, connection):
self.id = job_id
self._connection = connection
# cacheable:
self._properties = {}
self._units = {}

def __json__(self):
return self.properties

def __repr__(self):
return pformat(self.properties)

@property
def properties(self):
if len(self._properties) == 0:
self._properties = self._connection.request('/jobs/%s' % self.id)
return self._properties

@property
def units(self):
if len(self._units) == 0:
self._units = self._connection.request('/jobs/%s/units' % self.id)
return self._units

def clear_units(self, parallel=20):
reqs = (self._connection.grequest('/jobs/%s/units/%s' % (self.id, unit_id), method='DELETE')
for unit_id in self.units.keys())
for response in grequests.imap(reqs, size=parallel):
yield response

def upload(self, data):
return self._connection.request('/jobs/%s/upload' % self.id, method='POST', data=data)

def update(self, props):
params = {'job[%s]' % key: value for key, value in props.items()}
self._properties = {}
return self._connection.request('/jobs/%s' % self.id, method='PUT', params=params)

def delete(self):
return self._connection.request('/jobs/%s' % self.id, method='DELETE')


class Unit(object):
'''
Read / Write attributes
job_id
missed_count
difficulty
state
data
agreement
Read-only attributes
updated_at
created_at
judgments_count
id
'''
28 changes: 28 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from setuptools import setup, find_packages

setup(
name='crowdflower',
version='0.0.2',
author='Christopher Brown',
author_email='io@henrian.com',
url='https://github.com/chbrown/crowdflower',
keywords='',
description='',
long_description=open('README.md').read(),
license=open('LICENSE').read(),
packages=find_packages(),
include_package_data=True,
classifiers=[
# https://pypi.python.org/pypi?:action=list_classifiers
'Development Status :: 1 - Alpha',
'License :: OSI Approved :: MIT License',
],
install_requires=[
'requests>=2.0.0',
'grequests',
],
entry_points={
'console_scripts': [
],
},
)

0 comments on commit a525e22

Please sign in to comment.