Skip to content

Commit

Permalink
Merge pull request #230 from valgur/check-query-length
Browse files Browse the repository at this point in the history
Update check_query_length(), Unicode support
  • Loading branch information
valgur committed Sep 7, 2018
2 parents 6e193d8 + eb89e3e commit 91a9c03
Show file tree
Hide file tree
Showing 44 changed files with 8,200 additions and 6,230 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ Changed
* Replaced ``[test]`` and ``[docs]`` with a single ``[dev]`` installation extras target. (#208)
* Adapted `.travis.yml` to build `fiona` and `pyproj` from source for Python 3.7
* minimum pytest version ``pytest >= 3.6.3`` required by ``pytest-socket``
* Updated ``check_query_length()`` logic. (#230)
* Added support for Unicode symbols in search queries. (#230)

Deprecated
~~~~~~~~~~
Expand Down
31 changes: 9 additions & 22 deletions sentinelsat/sentinel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import html2text
import requests
from six import string_types
from six.moves.urllib.parse import urljoin
from six.moves.urllib.parse import urljoin, quote_plus
from tqdm import tqdm

from . import __version__ as sentinelsat_version
Expand Down Expand Up @@ -290,7 +290,8 @@ def _load_subquery(self, query, order_by=None, limit=None, offset=0):

# load query results
url = self._format_url(order_by, limit, offset)
response = self.session.post(url, {'q': query}, auth=self.session.auth)
response = self.session.post(url, {'q': query}, auth=self.session.auth,
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
_check_scihub_response(response)

# store last status code (for testing)
Expand Down Expand Up @@ -406,7 +407,7 @@ def get_product_odata(self, id, full=False):
https://github.com/SentinelDataHub/DataHubSystem/blob/master/addon/sentinel-2/src/main/resources/META-INF/sentinel-2.owl
https://github.com/SentinelDataHub/DataHubSystem/blob/master/addon/sentinel-3/src/main/resources/META-INF/sentinel-3.owl
"""
url = urljoin(self.api_url, "odata/v1/Products('{}')?$format=json".format(id))
url = urljoin(self.api_url, u"odata/v1/Products('{}')?$format=json".format(id))
if full:
url += '&$expand=Attributes'
response = self.session.get(url, auth=self.session.auth)
Expand Down Expand Up @@ -580,9 +581,8 @@ def get_products_size(products):
def check_query_length(query):
"""Determine whether a query to the OpenSearch API is too long.
The length of a query string is limited to approximately 3893 characters but
any special characters (that is, not alphanumeric or -_.~) are counted twice
towards that limit.
The length of a query string is limited to approximately 3938 characters but
any special characters (that is, not alphanumeric or -_.*) will take up more space.
Parameters
----------
Expand All @@ -593,23 +593,10 @@ def check_query_length(query):
-------
float
Ratio of the query length to the maximum length
Notes
-----
The query size limit arises from a limit on the length of the server's internal query,
which looks like
http://localhost:30333//solr/dhus/select?q=...
&wt=xslt&tr=opensearch_atom.xsl&dhusLongName=Sentinels+Scientific+Data+Hub
&dhusServer=https%3A%2F%2Fscihub.copernicus.eu%2Fapihub%2F&originalQuery=...
&rows=100&start=0&sort=ingestiondate+desc
This function will estimate the length of the "q" and "originalQuery" parameters to
determine whether the query will fail. Their combined length can be at most about
7786 bytes.
"""
effective_length = len(query) + 2 * len(re.findall('[^-_.* 0-9A-Za-z]', query))
return effective_length / 3950
# The server uses the Java's URLEncoder implementation internally, which we are replicating here
effective_length = len(quote_plus(query, safe="-_.*").replace('~', '%7E'))
return effective_length / 3938

def _query_names(self, names):
"""Find products by their names, e.g.
Expand Down
1,221 changes: 741 additions & 480 deletions tests/fixtures/vcr_cassettes/products_fixture.yaml

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions tests/fixtures/vcr_cassettes/test_SentinelAPI_connection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['151']
Content-Type: [application/x-www-form-urlencoded]
User-Agent: [sentinelsat/0.12.1]
Content-Type: [application/x-www-form-urlencoded; charset=UTF-8]
User-Agent: [sentinelsat/0.12.2]
method: POST
uri: https://scihub.copernicus.eu/apihub/search?format=json&rows=100&start=0
response:
body: {string: '{"feed":{"xmlns:opensearch":"http://a9.com/-/spec/opensearch/1.1/","xmlns":"http://www.w3.org/2005/Atom","title":"Sentinels
Scientific Data Hub search results for: beginPosition:[2015-01-01T00:00:00Z
TO 2015-01-02T00:00:00Z] footprint:\"Intersects(POLYGON((0 0,1 1,0 1,0 0)))\"","subtitle":"Displaying
0 results. Request done in 0.018 seconds.","updated":"2017-10-24T19:58:28.794Z","author":{"name":"Sentinels
0 results. Request done in 0.067 seconds.","updated":"2018-09-06T13:23:14.756Z","author":{"name":"Sentinels
Scientific Data Hub"},"id":"https://scihub.copernicus.eu/apihub/search?q=beginPosition:[2015-01-01T00:00:00Z
TO 2015-01-02T00:00:00Z] footprint:\"Intersects(POLYGON((0 0,1 1,0 1,0 0)))\"","opensearch:totalResults":"0","opensearch:startIndex":"0","opensearch:itemsPerPage":"100","opensearch:Query":{"role":"request","searchTerms":"beginPosition:[2015-01-01T00:00:00Z
TO 2015-01-02T00:00:00Z] footprint:\"Intersects(POLYGON((0 0,1 1,0 1,0 0)))\"","startPage":"1"},"link":[{"rel":"self","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=beginPosition:[2015-01-01T00:00:00Z
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['151']
Content-Type: [application/x-www-form-urlencoded]
User-Agent: [sentinelsat/0.12.1]
Content-Type: [application/x-www-form-urlencoded; charset=UTF-8]
User-Agent: [sentinelsat/0.12.2]
method: POST
uri: https://scihub.copernicus.eu/apihub/search?format=json&rows=100&start=0
response:
Expand Down Expand Up @@ -38,7 +38,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -69,7 +69,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -100,7 +100,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -131,7 +131,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -162,7 +162,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -193,7 +193,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -224,7 +224,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -255,7 +255,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -286,7 +286,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -317,7 +317,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -348,7 +348,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down Expand Up @@ -379,7 +379,7 @@ interactions:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [sentinelsat/0.12.1]
User-Agent: [sentinelsat/0.12.2]
method: GET
uri: https://scihub.copernicus.eu/apihub/odata/v1/Products('8df46c9e-a20c-43db-a19a-4240c2ed3b8b')?$format=json
response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['38']
Content-Type: [application/x-www-form-urlencoded]
Content-Type: [application/x-www-form-urlencoded; charset=UTF-8]
User-Agent: [sentinelsat/0.12.2]
method: POST
uri: https://scihub.copernicus.eu/apihub/search?format=json&rows=0&start=0
response:
body: {string: '{"feed":{"xmlns:opensearch":"http://a9.com/-/spec/opensearch/1.1/","xmlns":"http://www.w3.org/2005/Atom","title":"Sentinels
Scientific Data Hub search results for: timeliness:Non\\ Time\\ Critical","subtitle":"Displaying
0 to -1 of 82287 total results. Request done in 0.018 seconds.","updated":"2018-06-25T16:33:57.401Z","author":{"name":"Sentinels
0 to -1 of 90480 total results. Request done in 0.014 seconds.","updated":"2018-09-06T13:23:17.408Z","author":{"name":"Sentinels
Scientific Data Hub"},"id":"https://scihub.copernicus.eu/apihub/search?q=timeliness:Non\\
Time\\ Critical","opensearch:totalResults":"82287","opensearch:startIndex":"0","opensearch:itemsPerPage":"0","opensearch:Query":{"role":"request","searchTerms":"timeliness:Non\\
Time\\ Critical","opensearch:totalResults":"90480","opensearch:startIndex":"0","opensearch:itemsPerPage":"0","opensearch:Query":{"role":"request","searchTerms":"timeliness:Non\\
Time\\ Critical","startPage":"1"},"link":[{"rel":"self","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:Non\\
Time\\ Critical&start=0&rows=0"},{"rel":"first","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:Non\\
Time\\ Critical&start=0&rows=0"},{"rel":"next","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:Non\\
Time\\ Critical&start=NaN&rows=0"},{"rel":"last","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:Non\\
Time\\ Critical&start=82286&rows=0"},{"rel":"search","type":"application/opensearchdescription+xml","href":"opensearch_description.xml"}]}}'}
Time\\ Critical&start=90479&rows=0"},{"rel":"search","type":"application/opensearchdescription+xml","href":"opensearch_description.xml"}]}}'}
headers:
Content-Type: [application/json]
Pragma: [no-cache]
Expand All @@ -35,21 +35,21 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['31']
Content-Type: [application/x-www-form-urlencoded]
Content-Type: [application/x-www-form-urlencoded; charset=UTF-8]
User-Agent: [sentinelsat/0.12.2]
method: POST
uri: https://scihub.copernicus.eu/apihub/search?format=json&rows=0&start=0
response:
body: {string: '{"feed":{"xmlns:opensearch":"http://a9.com/-/spec/opensearch/1.1/","xmlns":"http://www.w3.org/2005/Atom","title":"Sentinels
Scientific Data Hub search results for: timeliness:.+\\ Critical","subtitle":"Displaying
0 to -1 of 85720 total results. Request done in 0.007 seconds.","updated":"2018-06-25T16:33:57.715Z","author":{"name":"Sentinels
0 to -1 of 93887 total results. Request done in 0.009 seconds.","updated":"2018-09-06T13:23:17.584Z","author":{"name":"Sentinels
Scientific Data Hub"},"id":"https://scihub.copernicus.eu/apihub/search?q=timeliness:.+\\
Critical","opensearch:totalResults":"85720","opensearch:startIndex":"0","opensearch:itemsPerPage":"0","opensearch:Query":{"role":"request","searchTerms":"timeliness:.+\\
Critical","opensearch:totalResults":"93887","opensearch:startIndex":"0","opensearch:itemsPerPage":"0","opensearch:Query":{"role":"request","searchTerms":"timeliness:.+\\
Critical","startPage":"1"},"link":[{"rel":"self","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:.+\\
Critical&start=0&rows=0"},{"rel":"first","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:.+\\
Critical&start=0&rows=0"},{"rel":"next","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:.+\\
Critical&start=NaN&rows=0"},{"rel":"last","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=timeliness:.+\\
Critical&start=85719&rows=0"},{"rel":"search","type":"application/opensearchdescription+xml","href":"opensearch_description.xml"}]}}'}
Critical&start=93886&rows=0"},{"rel":"search","type":"application/opensearchdescription+xml","href":"opensearch_description.xml"}]}}'}
headers:
Content-Type: [application/json]
Pragma: [no-cache]
Expand All @@ -64,26 +64,26 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
Content-Length: ['37']
Content-Type: [application/x-www-form-urlencoded]
Content-Type: [application/x-www-form-urlencoded; charset=UTF-8]
User-Agent: [sentinelsat/0.12.2]
method: POST
uri: https://scihub.copernicus.eu/apihub/search?format=json&rows=0&start=0
response:
body: {string: '{"feed":{"xmlns:opensearch":"http://a9.com/-/spec/opensearch/1.1/","xmlns":"http://www.w3.org/2005/Atom","title":"Sentinels
Scientific Data Hub search results for: identifier:/S[123 ]A.*/","subtitle":"Displaying
0 to -1 of 4791500 total results. Request done in 2.191 seconds.","updated":"2018-06-25T16:34:00.520Z","author":{"name":"Sentinels
0 to -1 of 5575647 total results. Request done in 2.62 seconds.","updated":"2018-09-06T13:23:20.325Z","author":{"name":"Sentinels
Scientific Data Hub"},"id":"https://scihub.copernicus.eu/apihub/search?q=identifier:/S[123
]A.*/","opensearch:totalResults":"4791500","opensearch:startIndex":"0","opensearch:itemsPerPage":"0","opensearch:Query":{"role":"request","searchTerms":"identifier:/S[123
]A.*/","opensearch:totalResults":"5575647","opensearch:startIndex":"0","opensearch:itemsPerPage":"0","opensearch:Query":{"role":"request","searchTerms":"identifier:/S[123
]A.*/","startPage":"1"},"link":[{"rel":"self","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=identifier:/S[123
]A.*/&start=0&rows=0"},{"rel":"first","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=identifier:/S[123
]A.*/&start=0&rows=0"},{"rel":"next","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=identifier:/S[123
]A.*/&start=NaN&rows=0"},{"rel":"last","type":"application/atom+xml","href":"https://scihub.copernicus.eu/apihub/search?q=identifier:/S[123
]A.*/&start=4791499&rows=0"},{"rel":"search","type":"application/opensearchdescription+xml","href":"opensearch_description.xml"}]}}'}
]A.*/&start=5575646&rows=0"},{"rel":"search","type":"application/opensearchdescription+xml","href":"opensearch_description.xml"}]}}'}
headers:
Content-Type: [application/json]
Pragma: [no-cache]
Server: [Apache-Coyote/1.1]
Vary: [Accept-Encoding]
content-length: ['1300']
content-length: ['1299']
status: {code: 200, message: OK}
version: 1

0 comments on commit 91a9c03

Please sign in to comment.