scrapinghub · chekunkov · Mar 2, 2017 · Mar 2, 2017 · Mar 2, 2017 · Mar 2, 2017
diff --git a/scrapinghub/client.py b/scrapinghub/client.py
diff --git a/scrapinghub/client/__init__.py b/scrapinghub/client/__init__.py
@@ -0,0 +1,94 @@
+from scrapinghub import Connection as _Connection
+from scrapinghub import HubstorageClient as _HubstorageClient
+
+from .projects import Projects
+from .exceptions import wrap_http_errors
+
+from .utils import parse_auth
+from .utils import parse_project_id, parse_job_key
+
+
+__all__ = ['ScrapinghubClient']
+
+
+class Connection(_Connection):
+
+    @wrap_http_errors
+    def _request(self, *args, **kwargs):
+        return super(Connection, self)._request(*args, **kwargs)
+
+
+class HubstorageClient(_HubstorageClient):
+
+    @wrap_http_errors
+    def request(self, *args, **kwargs):
+        return super(HubstorageClient, self).request(*args, **kwargs)
+
+
+class ScrapinghubClient(object):
+    """Main class to work with Scrapinghub API.
+
+    :param auth: Scrapinghub APIKEY or other SH auth credentials.
+    :param dash_endpoint: (optional) Scrapinghub Dash panel url.
+    :param \*\*kwargs: (optional) Additional arguments for
+        :class:`scrapinghub.hubstorage.HubstorageClient` constructor.
+
+    :ivar projects: projects collection, :class:`Projects` instance.
+
+    Usage::
+
+        >>> from scrapinghub import ScrapinghubClient
+        >>> client = ScrapinghubClient('APIKEY')
+        >>> client
+        <scrapinghub.client.ScrapinghubClient at 0x1047af2e8>
+    """
+
+    def __init__(self, auth=None, dash_endpoint=None, **kwargs):
+        self.projects = Projects(self)
+        login, password = parse_auth(auth)
+        self._connection = Connection(apikey=login,
+                                      password=password,
+                                      url=dash_endpoint)
+        self._hsclient = HubstorageClient(auth=(login, password), **kwargs)
+
+    def get_project(self, projectid):
+        """Get :class:`Project` instance with a given project id.
+
+        The method is a shortcut for client.projects.get().
+
+        :param projectid: integer or string numeric project id.
+        :return: :class:`Project` object.
+        :rtype: scrapinghub.client.Project.
+
+        Usage::
+
+            >>> project = client.get_project(123)
+            >>> project
+            <scrapinghub.client.Project at 0x106cdd6a0>
+        """
+        return self.projects.get(parse_project_id(projectid))
+
+    def get_job(self, jobkey):
+        """Get Job with a given jobkey.
+
+        :param jobkey: job key string in format 'project/spider/job',
+            where all the components are integers.
+        :return: :class:`Job` object.
+        :rtype: scrapinghub.client.Job.
+
+        Usage::
+
+            >>> job = client.get_job('123/1/1')
+            >>> job
+            <scrapinghub.client.Job at 0x10afe2eb1>
+        """
+        projectid = parse_job_key(jobkey).projectid
+        return self.projects.get(projectid).jobs.get(jobkey)
+
+    def close(self, timeout=None):
+        """Close client instance.
+
+        :param timeout: (optional) float timeout secs to stop everything
+            gracefully.
+        """
+        self._hsclient.close(timeout=timeout)
diff --git a/scrapinghub/client/activity.py b/scrapinghub/client/activity.py
@@ -0,0 +1,60 @@
+from __future__ import absolute_import
+
+from .utils import _Proxy
+from .utils import parse_job_key
+
+
+class Activity(_Proxy):
+    """Representation of collection of job activity events.
+
+    Not a public constructor: use :class:`Project` instance to get a
+    :class:`Activity` instance. See :attr:`Project.activity` attribute.
+
+    Please note that list() method can use a lot of memory and for a large
+    amount of activities it's recommended to iterate through it via iter()
+    method (all params and available filters are same for both methods).
+
+    Usage:
+
+    - get all activity from a project::
+
+        >>> project.activity.iter()
+        <generator object jldecode at 0x1049ee990>
+
+    - get only last 2 events from a project::
+
+        >>> project.activity.list(count=2)
+        [{'event': 'job:completed', 'job': '123/2/3', 'user': 'jobrunner'},
+         {'event': 'job:started', 'job': '123/2/3', 'user': 'john'}]
+
+    - post a new event::
+
+        >>> event = {'event': 'job:completed',
+                     'job': '123/2/4',
+                     'user': 'jobrunner'}
+        >>> project.activity.add(event)
+
+    - post multiple events at once::
+
+        >>> events = [
+            {'event': 'job:completed', 'job': '123/2/5', 'user': 'jobrunner'},
+            {'event': 'job:cancelled', 'job': '123/2/6', 'user': 'john'},
+        ]
+        >>> project.activity.add(events)
+
+    """
+    def __init__(self, *args, **kwargs):
+        super(Activity, self).__init__(*args, **kwargs)
+        self._proxy_methods([('iter', 'list')])
+        self._wrap_iter_methods(['iter'])
+
+    def add(self, values, **kwargs):
+        if not isinstance(values, list):
+            values = list(values)
+        for activity in values:
+            if not isinstance(activity, dict):
+                raise ValueError("Please pass events as dictionaries")
+            jobkey = activity.get('job')
+            if jobkey and parse_job_key(jobkey).projectid != self.key:
+                raise ValueError('Please use same project id')
+        self._origin.post(values, **kwargs)
diff --git a/scrapinghub/client/collections.py b/scrapinghub/client/collections.py
@@ -0,0 +1,154 @@
+from __future__ import absolute_import
+import collections
+
+from six import string_types
+
+from ..hubstorage.collectionsrt import Collection as _Collection
+
+from .utils import _Proxy
+from .utils import format_iter_filters
+from .utils import proxy_methods
+from .utils import wrap_kwargs
+
+
+class Collections(_Proxy):
+    """Access to project collections.
+
+    Not a public constructor: use :class:`Project` instance to get a
+    :class:`Collections` instance. See :attr:`Project.collections` attribute.
+
+    Usage::
+
+        >>> collections = project.collections
+        >>> collections.list()
+        [{'name': 'Pages', 'type': 's'}]
+        >>> foo_store = collections.get_store('foo_store')
+    """
+
+    def get(self, coltype, colname):
+        """Base method to get a collection with a given type and name."""
+        self._origin._validate_collection(coltype, colname)
+        return Collection(self._client, self, coltype, colname)
+
+    def get_store(self, colname):
+        return self.get('s', colname)
+
+    def get_cached_store(self, colname):
+        return self.get('cs', colname)
+
+    def get_versioned_store(self, colname):
+        return self.get('vs', colname)
+
+    def get_versioned_cached_store(self, colname):
+        return self.get('vcs', colname)
+
+    def iter(self):
+        """Iterate through collections of a project."""
+        return self._origin.apiget('list')
+
+    def list(self):
+        """List collections of a project."""
+        return list(self.iter())
+
+
+class Collection(object):
+    """Representation of a project collection object.
+
+    Not a public constructor: use :class:`Collections` instance to get a
+    :class:`Collection` instance. See :meth:`Collections.get_store` and
+    similar methods.  # noqa
+
+    Usage:
+
+    - add a new item to collection::
+
+        >>> foo_store.set({'_key': '002d050ee3ff6192dcbecc4e4b4457d7',
+                           'value': '1447221694537'})
+
+    - count items in collection::
+
+        >>> foo_store.count()
+        1
+
+    - get an item from collection::
+
+        >>> foo_store.get('002d050ee3ff6192dcbecc4e4b4457d7')
+        {'value': '1447221694537'}
+
+    - get all items from collection::
+
+        >>> foo_store.iter()
+        <generator object jldecode at 0x1049eef10>
+
+    - iterate iterate over _key & value pair::
+
+        >>> for elem in foo_store.iter(count=1)):
+        >>> ... print(elem)
+        [{'_key': '002d050ee3ff6192dcbecc4e4b4457d7',
+            'value': '1447221694537'}]
+
+    - filter by multiple keys, only values for keys that exist will be returned::
+
+        >>> foo_store.list(key=['002d050ee3ff6192dcbecc4e4b4457d7', 'blah'])
+        [{'_key': '002d050ee3ff6192dcbecc4e4b4457d7', 'value': '1447221694537'}]
+
+    - delete an item by key::
+
+        >>> foo_store.delete('002d050ee3ff6192dcbecc4e4b4457d7')
+    """
+
+    def __init__(self, client, collections, coltype, colname):
+        self._client = client
+        self._origin = _Collection(coltype, colname, collections._origin)
+        proxy_methods(self._origin, self, [
+            'create_writer', 'count',
+            ('iter', 'iter_values'),
+            ('iter_raw_json', 'iter_json'),
+        ])
+        # simplified version of _Proxy._wrap_iter_methods logic
+        # to provide better support for filter param in iter methods
+        for method in ['iter', 'iter_raw_json']:
+            wrapped = wrap_kwargs(getattr(self, method), format_iter_filters)
+            setattr(self, method, wrapped)
+
+    def list(self, *args, **kwargs):
+        """Convenient shortcut to list iter results.
+
+        Please note that list() method can use a lot of memory and for a large
+        amount of elements it's recommended to iterate through it via iter()
+        method (all params and available filters are same for both methods).
+        """
+        return list(self.iter(*args, **kwargs))
+
+    def get(self, key, *args, **kwargs):
+        """Get item from collection by key.
+
+        :param key: string item key
+        :return: an item dictionary if exists
+        """
+        if key is None:
+            raise ValueError("key cannot be None")
+        return self._origin.get(key, *args, **kwargs)
+
+    def set(self, *args, **kwargs):
+        """Set item to collection by key.
+
+        The method returns None (original method returns an empty generator).
+        """
+        self._origin.set(*args, **kwargs)
+
+    def delete(self, keys):
+        """Delete item(s) from collection by key(s).
+
+        The method returns None (original method returns an empty generator).
+        """
+        if (not isinstance(keys, string_types) and
+                not isinstance(keys, collections.Iterable)):
+            raise ValueError("You should provide string key or iterable "
+                             "object providing string keys")
+        self._origin.delete(keys)
+
+    def iter_raw_msgpack(self, requests_params=None, **apiparams):
+        return self._origin._collections.iter_msgpack(
+            self._origin.coltype, self._origin.colname,
+            requests_params=requests_params, **apiparams)
diff --git a/scrapinghub/exceptions.py → scrapinghub/client/exceptions.py b/scrapinghub/exceptions.py → scrapinghub/client/exceptions.py
@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import
 from functools import wraps
 
 from requests import HTTPError
 
-from .legacy import APIError
-from .hubstorage import ValueTooLarge as _ValueTooLarge
+from ..legacy import APIError
+from ..hubstorage import ValueTooLarge as _ValueTooLarge
 
 
 def _get_http_error_msg(exc):