From a8103a208386159b8eed8c4b6e1b83c7b586d742 Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 11:58:05 +0300
Subject: [PATCH 1/8] Initial version of new client doc

---
 README_client.rst | 353 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 353 insertions(+)
 create mode 100644 README_client.rst

diff --git a/README_client.rst b/README_client.rst
new file mode 100644
index 00000000..5739cf8d
--- /dev/null
+++ b/README_client.rst
@@ -0,0 +1,353 @@
+===========================================
+[Beta] Client interface for Scrapinghub API
+===========================================
+
+
+The ``scrapinghub.ScrapinghubClient`` is a new Python client for communicating
+with the `Scrapinghub API`_. It takes best from ``scrapinghub.Connection`` and
+``scrapinghub.HubstorageClient`` and combines it under single interface.
+
+
+.. contents:: :depth: 1
+
+
+Usage
+=====
+
+Client
+------
+
+First, you connect to Scrapinghub::
+
+    >>> from scrapinghub import ScrapinghubClient
+    >>> client = ScrapinghubClient('APIKEY')
+    >>> client
+    <scrapinghub.client.ScrapinghubClient at 0x1047af2e8>
+
+Client instance has ``projects`` field for access to client projects collection.
+
+Projects
+--------
+
+You can list the projects available to your account::
+
+    >>> client.projects.list()
+    [123, 456]
+
+Or check the projects summary::
+
+    >>> client.projects.summary()
+    [{'finished': 674,
+      'has_capacity': True,
+      'pending': 0,
+      'project': 123,
+      'running': 1},
+     {'finished': 33079,
+      'has_capacity': True,
+      'pending': 0,
+      'project': 456,
+      'running': 2}]
+
+And select a particular project to work with::
+
+    >>> project = client.get_project(123)
+    >>> project
+    <scrapinghub.client.Project at 0x106cdd6a0>
+    >>> project.id
+    123
+
+(The above is a shortcut for ``client.projects.get(123)``.)
+
+Project
+-------
+
+Project instance has ``jobs`` field to work with the project jobs.
+
+To schedule a spider run (it returns a job object)::
+
+    >>> project.jobs.schedule('spider1', arg1='val1')
+    <scrapinghub.client.Job at 0x106ee12e8>>
+
+(Check ``Jobs`` section below for other features.)
+
+Project instance also has the following fields:
+
+- activity
+- collections
+- frontier
+- reports
+- settings
+- spiders
+
+Collections
+-----------
+
+Let's store hash and timestamp pair for foo spider. Usual workflow with `Collections`_ would be::
+
+    >>> collections = project.collections
+    >>> foo_store = collections.new_store('foo_store')
+    >>> foo_store.set({'_key': '002d050ee3ff6192dcbecc4e4b4457d7', 'value': '1447221694537'})
+    >>> foo_store.count()
+    1
+    >>> foo_store.get('002d050ee3ff6192dcbecc4e4b4457d7')
+    '1447221694537'
+    >>> for result in foo_store.iter_values():
+    # do something with _key & value pair
+    >>> foo_store.delete('002d050ee3ff6192dcbecc4e4b4457d7')
+    >>> foo_store.count()
+    0
+
+Frontier
+--------
+
+Typical workflow with `Frontier`_::
+
+    >>> frontier = project.frontier
+
+Add a request to the frontier::
+
+    >>> frontier.add('test', 'example.com', [{'fp': '/some/path.html'}])
+    >>> frontier.flush()
+    >>> frontier.newcount
+    1
+
+Add requests with additional parameters::
+
+    >>> frontier.add('test', 'example.com', [{'fp': '/'}, {'fp': 'page1.html', 'p': 1, 'qdata': {'depth': 1}}])
+    >>> frontier.flush()
+    >>> frontier.newcount
+    2
+
+To delete the slot ``example.com`` from the frontier::
+
+    >>> frontier.delete_slot('test', 'example.com')
+
+To retrieve requests for a given slot::
+
+    >>> reqs = frontier.read('test', 'example.com')
+
+To delete a batch of requests::
+
+    >>> frontier.delete('test', 'example.com', '00013967d8af7b0001')
+
+To retrieve fingerprints for a given slot::
+
+    >>> fps = [req['requests'] for req in frontier.read('test', 'example.com')]
+
+Spiders
+-------
+
+To get the list of spiders in the project::
+
+    >>> project.spiders.list()
+    [
+      {'id': 'spider1', 'tags': [], 'type': 'manual', 'version': '123'},
+      {'id': 'spider2', 'tags': [], 'type': 'manual', 'version': '123'}
+    ]
+
+To select a particular spider to work with::
+
+    >>> spider = project.spiders.get('spider2')
+    >>> spider
+    <scrapinghub.client.Spider at 0x106ee3748>
+    >>> spider.id
+    2
+    >>> spider.name
+    spider2
+
+Spider
+------
+
+Like project instance, spider instance has ``jobs`` field to work with the spider's jobs.
+
+To schedule a spider run (you don't need to specify spider name explicitly)::
+
+    >>> spider.jobs.schedule(arg1='val1')
+    <scrapinghub.client.Job at 0x106ee12e8>>
+
+Jobs
+----
+
+To select a specific job for a project::
+
+    >>> job = project.jobs.get('123/1/2')
+    >>> job.id
+    '123/1/2'
+
+Also there's a shortcut to get same job with client instance::
+
+    >>> job = client.get_job('123/1/2')
+
+Use ``schedule`` method to schedule a new job for project/spider::
+
+    >>> job = spider.jobs.schedule()
+
+It's possible to count jobs for a given project/spider::
+
+    >> spider.jobs.count()
+    5
+
+Count logic supports different filters, as described for `count endpoint`_.
+
+To get a list of jobs for a spider::
+
+    >>> jobs = spider.jobs.iter()
+
+Iter logic also supports different filters, as described for `list endpoint`_.
+
+For example, to get all finished jobs::
+
+    >>> jobs = spider.jobs.iter(state='finished')
+
+``jobs`` is an iterator and, when iterated, return an iterable of dict objects,
+so you typically use it like this::
+
+    >>> for job in jobs:
+    ...     # do something with job data
+
+Or, if you just want to get the job ids::
+
+    >>> [x['key'] for x in jobs]
+    ['123/1/1', '123/1/2', '123/1/3']
+
+Job dictionary object itself looks like::
+
+    >>> job
+    {
+      'key': '123/1/2',
+      'spider': 'myspider',
+      'version': 'some-version'
+      'state': 'finished',
+      'close_reason': 'success',
+      'errors': 0,
+      'logs': 8,
+      'pending_time': 1482852737072,
+      'running_time': 1482852737848,
+      'finished_time': 1482852774356,
+      'ts': 1482852755902,
+      'elapsed': 207609,
+    }
+
+Dict entries returned by ``iter`` method contain some additional meta, but can be
+easily converted to ``Job`` instances with::
+
+    >>> [Job(x['key']) for x in jobs]
+    [
+      <scrapinghub.client.Job at 0x106e2cc18>,
+      <scrapinghub.client.Job at 0x106e260b8>,
+      <scrapinghub.client.Job at 0x106e26a20>,
+    ]
+
+To check jobs summary::
+
+    >>> spider.jobs.summary()
+
+    [{'count': 0, 'name': 'pending', 'summary': []},
+     {'count': 0, 'name': 'running', 'summary': []},
+     {'count': 5,
+      'name': 'finished',
+      'summary': [..,
+
+It's also possible to get last job summary (for each spider)::
+
+    >>> list(sp.jobs.lastjobsummary())
+    [{'close_reason': 'success',
+      'elapsed': 3062444,
+      'errors': 1,
+      'finished_time': 1482911633089,
+      'key': '123/1/3',
+      'logs': 8,
+      'pending_time': 1482911596566,
+      'running_time': 1482911598909,
+      'spider': 'spider1',
+      'state': 'finished',
+      'ts': 1482911615830,
+      'version': 'some-version'}]
+
+(Note that there can be a lot of spiders, so the method above returns an iterator.)
+
+Job
+---
+
+
+
+To delete a job::
+
+    >>> job.delete()
+
+
+
+To get job metadata::
+
+    >>> job.metadata['spider']
+    'myspider'
+    >>> job.metadata['started_time']
+    '2010-09-28T15:09:57.629000'
+    >>> job.metadata['tags']
+    []
+    >>> j.metadata['scrapystats']['memusage/max']
+    53628928
+
+Items
+-----
+
+To retrieve all scraped items from a job::
+
+    >>> for item in job.items.iter():
+    ...     # do something with item (it's just a dict)
+
+Logs
+----
+
+To retrieve all log entries from a job::
+
+    >>> for logitem in job.logs.iter():
+    ...     # logitem is a dict with level, message, time
+    >>> logitem
+    {
+      'level': 20,
+      'message': '[scrapy.core.engine] Closing spider (finished)',
+      'time': 1482233733976},
+    }
+
+Requests
+--------
+
+To retrieve all requests from a job::
+
+    >>> for reqitem in job.requests.iter():
+    ...     # reqitem is a dict
+    >>> reqitem
+    [{
+      'duration': 354,
+      'fp': '6d748741a927b10454c83ac285b002cd239964ea',
+      'method': 'GET',
+      'rs': 1270,
+      'status': 200,
+      'time': 1482233733870,
+      'url': 'https://example.com'
+    }]
+
+
+Tags
+----
+
+Tags is a convenient way to mark specific jobs (for better search, postprocessing etc).
+
+To mark a job with tag ``consumed``::
+
+    >>> job.update_tags(add=['consumed'])
+
+To mark all spider jobs with tag ``consumed``::
+
+    >>> spider.update_tags(add=['consumed'])
+
+To remove existing tag ``existing`` for all spider jobs::
+
+    >>> spider.update_tags(remove=['existing'])
+
+
+.. _count endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-count
+.. _list endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-list
+.. _Collections: http://doc.scrapinghub.com/api/collections.html
+.. _Frontier: http://doc.scrapinghub.com/api/frontier.html

From 4d859c5f451d1742ec2e5e1f1dee3043444dcbc1 Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 12:24:48 +0300
Subject: [PATCH 2/8] Extending the doc

---
 README_client.rst | 175 +++++++++++++++++++++++-----------------------
 1 file changed, 88 insertions(+), 87 deletions(-)

diff --git a/README_client.rst b/README_client.rst
index 5739cf8d..44d6efe2 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -8,15 +8,12 @@ with the `Scrapinghub API`_. It takes best from ``scrapinghub.Connection`` and
 ``scrapinghub.HubstorageClient`` and combines it under single interface.
 
 
-.. contents:: :depth: 1
+.. contents:: :depth: 3
 
 
 Usage
 =====
 
-Client
-------
-
 First, you connect to Scrapinghub::
 
     >>> from scrapinghub import ScrapinghubClient
@@ -26,8 +23,8 @@ First, you connect to Scrapinghub::
 
 Client instance has ``projects`` field for access to client projects collection.
 
-Projects
---------
+Projects (client level)
+-----------------------
 
 You can list the projects available to your account::
 
@@ -58,8 +55,8 @@ And select a particular project to work with::
 
 (The above is a shortcut for ``client.projects.get(123)``.)
 
-Project
--------
+Project (projects level)
+------------------------
 
 Project instance has ``jobs`` field to work with the project jobs.
 
@@ -72,70 +69,16 @@ To schedule a spider run (it returns a job object)::
 
 Project instance also has the following fields:
 
-- activity
-- collections
-- frontier
-- reports
-- settings
-- spiders
-
-Collections
------------
+- activity - access to project activity records
+- collections - work with project collections (see ``Collections`` section)
+- frontier - using project frontier (see ``Frontier`` section)
+- reports - work with project reports
+- settings - interface to project settings
+- spiders - access to spiders collection (see ``Spiders`` section)
 
-Let's store hash and timestamp pair for foo spider. Usual workflow with `Collections`_ would be::
 
-    >>> collections = project.collections
-    >>> foo_store = collections.new_store('foo_store')
-    >>> foo_store.set({'_key': '002d050ee3ff6192dcbecc4e4b4457d7', 'value': '1447221694537'})
-    >>> foo_store.count()
-    1
-    >>> foo_store.get('002d050ee3ff6192dcbecc4e4b4457d7')
-    '1447221694537'
-    >>> for result in foo_store.iter_values():
-    # do something with _key & value pair
-    >>> foo_store.delete('002d050ee3ff6192dcbecc4e4b4457d7')
-    >>> foo_store.count()
-    0
-
-Frontier
---------
-
-Typical workflow with `Frontier`_::
-
-    >>> frontier = project.frontier
-
-Add a request to the frontier::
-
-    >>> frontier.add('test', 'example.com', [{'fp': '/some/path.html'}])
-    >>> frontier.flush()
-    >>> frontier.newcount
-    1
-
-Add requests with additional parameters::
-
-    >>> frontier.add('test', 'example.com', [{'fp': '/'}, {'fp': 'page1.html', 'p': 1, 'qdata': {'depth': 1}}])
-    >>> frontier.flush()
-    >>> frontier.newcount
-    2
-
-To delete the slot ``example.com`` from the frontier::
-
-    >>> frontier.delete_slot('test', 'example.com')
-
-To retrieve requests for a given slot::
-
-    >>> reqs = frontier.read('test', 'example.com')
-
-To delete a batch of requests::
-
-    >>> frontier.delete('test', 'example.com', '00013967d8af7b0001')
-
-To retrieve fingerprints for a given slot::
-
-    >>> fps = [req['requests'] for req in frontier.read('test', 'example.com')]
-
-Spiders
--------
+Spiders (project level)
+-----------------------
 
 To get the list of spiders in the project::
 
@@ -155,8 +98,8 @@ To select a particular spider to work with::
     >>> spider.name
     spider2
 
-Spider
-------
+Spider (spiders level)
+----------------------
 
 Like project instance, spider instance has ``jobs`` field to work with the spider's jobs.
 
@@ -165,8 +108,8 @@ To schedule a spider run (you don't need to specify spider name explicitly)::
     >>> spider.jobs.schedule(arg1='val1')
     <scrapinghub.client.Job at 0x106ee12e8>>
 
-Jobs
-----
+Jobs (project/spider level)
+---------------------------
 
 To select a specific job for a project::
 
@@ -266,10 +209,8 @@ It's also possible to get last job summary (for each spider)::
 
 (Note that there can be a lot of spiders, so the method above returns an iterator.)
 
-Job
----
-
-
+Job (jobs level)
+----------------
 
 To delete a job::
 
@@ -288,16 +229,16 @@ To get job metadata::
     >>> j.metadata['scrapystats']['memusage/max']
     53628928
 
-Items
------
+Items (job level)
+-----------------
 
 To retrieve all scraped items from a job::
 
     >>> for item in job.items.iter():
     ...     # do something with item (it's just a dict)
 
-Logs
-----
+Logs (job level)
+----------------
 
 To retrieve all log entries from a job::
 
@@ -310,8 +251,8 @@ To retrieve all log entries from a job::
       'time': 1482233733976},
     }
 
-Requests
---------
+Requests (job level)
+--------------------
 
 To retrieve all requests from a job::
 
@@ -329,8 +270,69 @@ To retrieve all requests from a job::
     }]
 
 
-Tags
-----
+Additional features
+===================
+
+Collections (project level)
+---------------------------
+
+As an example, let's store hash and timestamp pair for foo spider.
+
+Usual workflow with `Collections`_ would be::
+
+    >>> collections = project.collections
+    >>> foo_store = collections.new_store('foo_store')
+    >>> foo_store.set({'_key': '002d050ee3ff6192dcbecc4e4b4457d7', 'value': '1447221694537'})
+    >>> foo_store.count()
+    1
+    >>> foo_store.get('002d050ee3ff6192dcbecc4e4b4457d7')
+    '1447221694537'
+    >>> for result in foo_store.iter_values():
+    # do something with _key & value pair
+    >>> foo_store.delete('002d050ee3ff6192dcbecc4e4b4457d7')
+    >>> foo_store.count()
+    0
+
+Frontier (project level)
+------------------------
+
+Typical workflow with `Frontier`_::
+
+    >>> frontier = project.frontier
+
+Add a request to the frontier::
+
+    >>> frontier.add('test', 'example.com', [{'fp': '/some/path.html'}])
+    >>> frontier.flush()
+    >>> frontier.newcount
+    1
+
+Add requests with additional parameters::
+
+    >>> frontier.add('test', 'example.com', [{'fp': '/'}, {'fp': 'page1.html', 'p': 1, 'qdata': {'depth': 1}}])
+    >>> frontier.flush()
+    >>> frontier.newcount
+    2
+
+To delete the slot ``example.com`` from the frontier::
+
+    >>> frontier.delete_slot('test', 'example.com')
+
+To retrieve requests for a given slot::
+
+    >>> reqs = frontier.read('test', 'example.com')
+
+To delete a batch of requests::
+
+    >>> frontier.delete('test', 'example.com', '00013967d8af7b0001')
+
+To retrieve fingerprints for a given slot::
+
+    >>> fps = [req['requests'] for req in frontier.read('test', 'example.com')]
+
+
+Tags (spider/job level)
+-----------------------
 
 Tags is a convenient way to mark specific jobs (for better search, postprocessing etc).
 
@@ -346,7 +348,6 @@ To remove existing tag ``existing`` for all spider jobs::
 
     >>> spider.update_tags(remove=['existing'])
 
-
 .. _count endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-count
 .. _list endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-list
 .. _Collections: http://doc.scrapinghub.com/api/collections.html

From b9d792f0d558429f20c91de41c62468bcf4b2dc7 Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 12:49:57 +0300
Subject: [PATCH 3/8] Trying to make the doc more readable

---
 README_client.rst | 86 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 23 deletions(-)

diff --git a/README_client.rst b/README_client.rst
index 44d6efe2..c9332766 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -209,36 +209,61 @@ It's also possible to get last job summary (for each spider)::
 
 (Note that there can be a lot of spiders, so the method above returns an iterator.)
 
-Job (jobs level)
-----------------
+Job
+---
+
+Job instance provides access to job data:
+
+- items
+- logs
+- requests
+- samples
+- metadata
+
+Request to cancel a job::
+
+    >>> job.cancel()
 
 To delete a job::
 
     >>> job.delete()
 
 
+Metadata
+~~~~~~~~
 
-To get job metadata::
+Job details can be found in jobs metadata and it's scrapystats::
 
-    >>> job.metadata['spider']
-    'myspider'
-    >>> job.metadata['started_time']
-    '2010-09-28T15:09:57.629000'
-    >>> job.metadata['tags']
-    []
-    >>> j.metadata['scrapystats']['memusage/max']
-    53628928
+    >>> job.metadata['version']
+    '5123a86-master'
+    >>> job.metadata['scrapystats']
+    ...
+    'downloader/response_count': 104,
+    'downloader/response_status_count/200': 104,
+    'finish_reason': 'finished',
+    'finish_time': 1447160494937,
+    'item_scraped_count': 50,
+    'log_count/DEBUG': 157,
+    'log_count/INFO': 1365,
+    'log_count/WARNING': 3,
+    'memusage/max': 182988800,
+    'memusage/startup': 62439424,
+    ...
 
-Items (job level)
------------------
+Anything can be stored in metadata, here is example how to add tags::
+
+    >>> job.update_metadata({'tags': 'obsolete'})
+
+Items
+~~~~~
 
 To retrieve all scraped items from a job::
 
     >>> for item in job.items.iter():
     ...     # do something with item (it's just a dict)
 
-Logs (job level)
-----------------
+Logs
+~~~~
 
 To retrieve all log entries from a job::
 
@@ -251,8 +276,8 @@ To retrieve all log entries from a job::
       'time': 1482233733976},
     }
 
-Requests (job level)
---------------------
+Requests
+~~~~~~~~
 
 To retrieve all requests from a job::
 
@@ -269,12 +294,21 @@ To retrieve all requests from a job::
       'url': 'https://example.com'
     }]
 
+Samples
+~~~~~~~
+
+To retrieve all samples for a job::
+
+    >>> for sample in job.samples.iter():
+    ...     # sample is a list with a timestamp and data
+    >>> sample
+    [1482233732452, 0, 0, 0, 0, 0]
 
 Additional features
 ===================
 
-Collections (project level)
----------------------------
+Collections
+-----------
 
 As an example, let's store hash and timestamp pair for foo spider.
 
@@ -293,8 +327,10 @@ Usual workflow with `Collections`_ would be::
     >>> foo_store.count()
     0
 
-Frontier (project level)
-------------------------
+Collections are available on project level only.
+
+Frontier
+--------
 
 Typical workflow with `Frontier`_::
 
@@ -330,9 +366,10 @@ To retrieve fingerprints for a given slot::
 
     >>> fps = [req['requests'] for req in frontier.read('test', 'example.com')]
 
+Frontier is available on project level only.
 
-Tags (spider/job level)
------------------------
+Tags
+----
 
 Tags is a convenient way to mark specific jobs (for better search, postprocessing etc).
 
@@ -348,6 +385,9 @@ To remove existing tag ``existing`` for all spider jobs::
 
     >>> spider.update_tags(remove=['existing'])
 
+Modifying tags is available on spider/job levels.
+
+.. _Scrapinghub API: http://doc.scrapinghub.com/api.html
 .. _count endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-count
 .. _list endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-list
 .. _Collections: http://doc.scrapinghub.com/api/collections.html

From 3c07ba56121af7e7fa1e9bf4e0c985e6368e15b1 Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 12:58:44 +0300
Subject: [PATCH 4/8] Another attempt

---
 README_client.rst | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/README_client.rst b/README_client.rst
index c9332766..7ccddbe4 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -11,8 +11,8 @@ with the `Scrapinghub API`_. It takes best from ``scrapinghub.Connection`` and
 .. contents:: :depth: 3
 
 
-Usage
-=====
+Basic usage
+===========
 
 First, you connect to Scrapinghub::
 
@@ -23,8 +23,8 @@ First, you connect to Scrapinghub::
 
 Client instance has ``projects`` field for access to client projects collection.
 
-Projects (client level)
------------------------
+Projects
+--------
 
 You can list the projects available to your account::
 
@@ -55,8 +55,8 @@ And select a particular project to work with::
 
 (The above is a shortcut for ``client.projects.get(123)``.)
 
-Project (projects level)
-------------------------
+Project
+~~~~~~~
 
 Project instance has ``jobs`` field to work with the project jobs.
 
@@ -77,8 +77,8 @@ Project instance also has the following fields:
 - spiders - access to spiders collection (see ``Spiders`` section)
 
 
-Spiders (project level)
------------------------
+Spiders
+-------
 
 To get the list of spiders in the project::
 
@@ -98,8 +98,8 @@ To select a particular spider to work with::
     >>> spider.name
     spider2
 
-Spider (spiders level)
-----------------------
+Spider
+~~~~~~
 
 Like project instance, spider instance has ``jobs`` field to work with the spider's jobs.
 
@@ -108,8 +108,10 @@ To schedule a spider run (you don't need to specify spider name explicitly)::
     >>> spider.jobs.schedule(arg1='val1')
     <scrapinghub.client.Job at 0x106ee12e8>>
 
-Jobs (project/spider level)
----------------------------
+Jobs
+----
+
+Jobs collection is available on project/spider level.
 
 To select a specific job for a project::
 
@@ -210,7 +212,7 @@ It's also possible to get last job summary (for each spider)::
 (Note that there can be a lot of spiders, so the method above returns an iterator.)
 
 Job
----
+~~~
 
 Job instance provides access to job data:
 
@@ -230,7 +232,7 @@ To delete a job::
 
 
 Metadata
-~~~~~~~~
+^^^^^^^^
 
 Job details can be found in jobs metadata and it's scrapystats::
 
@@ -255,7 +257,7 @@ Anything can be stored in metadata, here is example how to add tags::
     >>> job.update_metadata({'tags': 'obsolete'})
 
 Items
-~~~~~
+^^^^^
 
 To retrieve all scraped items from a job::
 
@@ -263,7 +265,7 @@ To retrieve all scraped items from a job::
     ...     # do something with item (it's just a dict)
 
 Logs
-~~~~
+^^^^
 
 To retrieve all log entries from a job::
 
@@ -277,7 +279,7 @@ To retrieve all log entries from a job::
     }
 
 Requests
-~~~~~~~~
+^^^^^^^^
 
 To retrieve all requests from a job::
 
@@ -295,7 +297,7 @@ To retrieve all requests from a job::
     }]
 
 Samples
-~~~~~~~
+^^^^^^^
 
 To retrieve all samples for a job::
 

From 19e9bbb5dd78e3fa7e785654d603fd9e5618d4ee Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 13:03:58 +0300
Subject: [PATCH 5/8] Lets keep it simple

---
 README_client.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README_client.rst b/README_client.rst
index 7ccddbe4..7e29e339 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -56,7 +56,7 @@ And select a particular project to work with::
 (The above is a shortcut for ``client.projects.get(123)``.)
 
 Project
-~~~~~~~
+-------
 
 Project instance has ``jobs`` field to work with the project jobs.
 
@@ -99,7 +99,7 @@ To select a particular spider to work with::
     spider2
 
 Spider
-~~~~~~
+------
 
 Like project instance, spider instance has ``jobs`` field to work with the spider's jobs.
 
@@ -212,7 +212,7 @@ It's also possible to get last job summary (for each spider)::
 (Note that there can be a lot of spiders, so the method above returns an iterator.)
 
 Job
-~~~
+---
 
 Job instance provides access to job data:
 

From 3f3355badd8256a227699a0ab3bf21c3aaf36952 Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 13:57:26 +0300
Subject: [PATCH 6/8] Minor fixes for the doc

---
 README_client.rst | 109 +++++++++++++++++++++++++++++-----------------
 1 file changed, 70 insertions(+), 39 deletions(-)

diff --git a/README_client.rst b/README_client.rst
index 7e29e339..f0aad963 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -21,7 +21,7 @@ First, you connect to Scrapinghub::
     >>> client
     <scrapinghub.client.ScrapinghubClient at 0x1047af2e8>
 
-Client instance has ``projects`` field for access to client projects collection.
+Client instance has ``projects`` field for access to client projects.
 
 Projects
 --------
@@ -53,20 +53,20 @@ And select a particular project to work with::
     >>> project.id
     123
 
-(The above is a shortcut for ``client.projects.get(123)``.)
+The above is a shortcut for ``client.projects.get(123)``.
 
 Project
 -------
 
 Project instance has ``jobs`` field to work with the project jobs.
 
-To schedule a spider run (it returns a job object)::
+Jobs instance is described well in ``Jobs`` section below.
+
+For example, to schedule a spider run (it returns a job object)::
 
     >>> project.jobs.schedule('spider1', arg1='val1')
     <scrapinghub.client.Job at 0x106ee12e8>>
 
-(Check ``Jobs`` section below for other features.)
-
 Project instance also has the following fields:
 
 - activity - access to project activity records
@@ -80,7 +80,7 @@ Project instance also has the following fields:
 Spiders
 -------
 
-To get the list of spiders in the project::
+To get the list of spiders of the project::
 
     >>> project.spiders.list()
     [
@@ -103,11 +103,13 @@ Spider
 
 Like project instance, spider instance has ``jobs`` field to work with the spider's jobs.
 
-To schedule a spider run (you don't need to specify spider name explicitly)::
+To schedule a spider run::
 
     >>> spider.jobs.schedule(arg1='val1')
     <scrapinghub.client.Job at 0x106ee12e8>>
 
+Note that you don't need to specify spider name explicitly.
+
 Jobs
 ----
 
@@ -127,27 +129,27 @@ Use ``schedule`` method to schedule a new job for project/spider::
 
     >>> job = spider.jobs.schedule()
 
-It's possible to count jobs for a given project/spider::
+It's also possible to count jobs for a given project/spider::
 
     >> spider.jobs.count()
     5
 
 Count logic supports different filters, as described for `count endpoint`_.
 
-To get a list of jobs for a spider::
 
-    >>> jobs = spider.jobs.iter()
+List jobs
+^^^^^^^^^
 
-Iter logic also supports different filters, as described for `list endpoint`_.
+To iterate through the spider jobs (descending order)::
 
-For example, to get all finished jobs::
+    >>> jobs_metadata = spider.jobs.iter()
+    >>> [j['key'] for j in jobs_metadata]
+    ['1111111/1/3', '1111111/1/2', '1111111/1/1']
 
-    >>> jobs = spider.jobs.iter(state='finished')
+``jobs_metadata`` is an iterator and, when iterated, returns an iterable
+of dict objects, so you typically use it like this::
 
-``jobs`` is an iterator and, when iterated, return an iterable of dict objects,
-so you typically use it like this::
-
-    >>> for job in jobs:
+    >>> for job in jobs_metadata:
     ...     # do something with job data
 
 Or, if you just want to get the job ids::
@@ -155,26 +157,51 @@ Or, if you just want to get the job ids::
     >>> [x['key'] for x in jobs]
     ['123/1/1', '123/1/2', '123/1/3']
 
-Job dictionary object itself looks like::
+Job metadata fieldset from ``iter()`` is less detailed than ``job.metadata``,
+but contains few new fields as well. Additional fields can be requested using
+the ``jobmeta`` parameter. If it used, then it's up to the user to list all the
+required fields, so only few default fields would be added except requested
+ones::
 
-    >>> job
-    {
-      'key': '123/1/2',
-      'spider': 'myspider',
-      'version': 'some-version'
-      'state': 'finished',
-      'close_reason': 'success',
-      'errors': 0,
-      'logs': 8,
-      'pending_time': 1482852737072,
-      'running_time': 1482852737848,
-      'finished_time': 1482852774356,
-      'ts': 1482852755902,
-      'elapsed': 207609,
-    }
+    >>> metadata = next(project.jobs.iter())
+    >>> metadata.get('spider', 'missing')
+    'foo'
+    >>> jobs_metadata = project.jobs.iter(jobmeta=['scheduled_by', ])
+    >>> metadata = next(jobs_metadata)
+    >>> metadata.get('scheduled_by', 'missing')
+    'John'
+    >>> metadata.get('spider', 'missing')
+    missing
+
+By default ``jobs.iter()`` returns maximum last 1000 results.
+Pagination is available using the ``start`` parameter::
+
+    >>> jobs_metadata = spider.jobs.iter(start=1000)
+
+There are several filters like spider, state, has_tag, lacks_tag,
+startts and endts (check `list endpoint`_ for more details).
+
+To get jobs filtered by tags::
 
-Dict entries returned by ``iter`` method contain some additional meta, but can be
-easily converted to ``Job`` instances with::
+    >>> jobs_metadata = project.jobs.iter(has_tag=['new', 'verified'], lacks_tag='obsolete')
+
+List of tags has ``OR`` power, so in the case above jobs with 'new' or
+'verified' tag are expected.
+
+To get certain number of last finished jobs per some spider::
+
+    >>> jobs_metadata = project.jobs.iter(spider='foo', state='finished', count=3)
+
+There are 4 possible job states, which can be used as values
+for filtering by state:
+
+- pending
+- running
+- finished
+- deleted
+
+Dict entries returned by ``iter`` method contain some additional meta,
+but can be easily converted to ``Job`` instances with::
 
     >>> [Job(x['key']) for x in jobs]
     [
@@ -183,6 +210,9 @@ easily converted to ``Job`` instances with::
       <scrapinghub.client.Job at 0x106e26a20>,
     ]
 
+Show summaries
+^^^^^^^^^^^^^^
+
 To check jobs summary::
 
     >>> spider.jobs.summary()
@@ -209,18 +239,18 @@ It's also possible to get last job summary (for each spider)::
       'ts': 1482911615830,
       'version': 'some-version'}]
 
-(Note that there can be a lot of spiders, so the method above returns an iterator.)
+Note that there can be a lot of spiders, so the method above returns an iterator.
 
 Job
 ---
 
-Job instance provides access to job data:
+Job instance provides access to a job data with the following fields:
 
+- metadata
 - items
 - logs
 - requests
 - samples
-- metadata
 
 Request to cancel a job::
 
@@ -230,7 +260,6 @@ To delete a job::
 
     >>> job.delete()
 
-
 Metadata
 ^^^^^^^^
 
@@ -306,6 +335,7 @@ To retrieve all samples for a job::
     >>> sample
     [1482233732452, 0, 0, 0, 0, 0]
 
+
 Additional features
 ===================
 
@@ -389,6 +419,7 @@ To remove existing tag ``existing`` for all spider jobs::
 
 Modifying tags is available on spider/job levels.
 
+
 .. _Scrapinghub API: http://doc.scrapinghub.com/api.html
 .. _count endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-count
 .. _list endpoint: https://doc.scrapinghub.com/api/jobq.html#jobq-project-id-list

From b51f1aa237f504ccd51aab9649afc74e6a4a3c44 Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 14:18:41 +0300
Subject: [PATCH 7/8] Extend jobs section

---
 README_client.rst | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/README_client.rst b/README_client.rst
index f0aad963..d0eec24c 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -115,6 +115,9 @@ Jobs
 
 Jobs collection is available on project/spider level.
 
+get
+^^^
+
 To select a specific job for a project::
 
     >>> job = project.jobs.get('123/1/2')
@@ -125,10 +128,33 @@ Also there's a shortcut to get same job with client instance::
 
     >>> job = client.get_job('123/1/2')
 
+schedule
+^^^^^^^^
+
 Use ``schedule`` method to schedule a new job for project/spider::
 
     >>> job = spider.jobs.schedule()
 
+Scheduling logic supports different options, like
+
+- units to specify amount of units to schedule the job
+- job_settings to pass additional settings for the job
+- priority to set higher/lower priority of the job
+- add_tag to create a job with a set of initial tags
+- meta to pass additional custom metadata
+
+For example, to schedule a new job for a given spider with custom params::
+
+    >>> job = spider.jobs.schedule(units=2, job_settings={'SETTING': 'VALUE'},
+        priority=1, add_tag=['tagA','tagB'], meta={'custom-data': 'val1'})
+
+Note that if you schedule a job on project level, spider name is required::
+
+    >>> job = project.jobs.schedule('spider1')
+
+count
+^^^^^
+
 It's also possible to count jobs for a given project/spider::
 
     >> spider.jobs.count()
@@ -137,14 +163,14 @@ It's also possible to count jobs for a given project/spider::
 Count logic supports different filters, as described for `count endpoint`_.
 
 
-List jobs
-^^^^^^^^^
+iter
+^^^^
 
 To iterate through the spider jobs (descending order)::
 
     >>> jobs_metadata = spider.jobs.iter()
     >>> [j['key'] for j in jobs_metadata]
-    ['1111111/1/3', '1111111/1/2', '1111111/1/1']
+    ['123/1/3', '123/1/2', '123/1/1']
 
 ``jobs_metadata`` is an iterator and, when iterated, returns an iterable
 of dict objects, so you typically use it like this::
@@ -154,8 +180,8 @@ of dict objects, so you typically use it like this::
 
 Or, if you just want to get the job ids::
 
-    >>> [x['key'] for x in jobs]
-    ['123/1/1', '123/1/2', '123/1/3']
+    >>> [x['key'] for x in jobs_metadata]
+    ['123/1/3', '123/1/2', '123/1/1']
 
 Job metadata fieldset from ``iter()`` is less detailed than ``job.metadata``,
 but contains few new fields as well. Additional fields can be requested using
@@ -210,8 +236,8 @@ but can be easily converted to ``Job`` instances with::
       <scrapinghub.client.Job at 0x106e26a20>,
     ]
 
-Show summaries
-^^^^^^^^^^^^^^
+summary
+^^^^^^^
 
 To check jobs summary::
 

From ba91a3a57042c0513abdad8aa5a68d5f544288bd Mon Sep 17 00:00:00 2001
From: Viktor Shlapakov <vshlapakov@gmail.com>
Date: Wed, 28 Dec 2016 14:20:40 +0300
Subject: [PATCH 8/8] Decrease content depth for simplicity

---
 README_client.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_client.rst b/README_client.rst
index d0eec24c..a3e06b99 100644
--- a/README_client.rst
+++ b/README_client.rst
@@ -8,7 +8,7 @@ with the `Scrapinghub API`_. It takes best from ``scrapinghub.Connection`` and
 ``scrapinghub.HubstorageClient`` and combines it under single interface.
 
 
-.. contents:: :depth: 3
+.. contents:: :depth: 2
 
 
 Basic usage