removing future aliases bug when backporting url-related functions wi…

…th threads, fixing tests in py2
rizac · Sep 18, 2018 · 1f61ba9 · 1f61ba9
1 parent dcc458a
commit 1f61ba9
Show file tree

Hide file tree

Showing 14 changed files with 146 additions and 80 deletions.
diff --git a/stream2segment/download/modules/channels.py b/stream2segment/download/modules/channels.py
@@ -9,6 +9,7 @@
 # (http://python-future.org/imports.html#explicit-imports):
 from builtins import map, next, zip, range, object
 
+import re
 import logging
 from itertools import cycle
 
@@ -22,13 +23,7 @@
 from stream2segment.utils import get_progressbar, strconvert
 from stream2segment.io.db.pdsql import dbquery2df, shared_colnames, mergeupdate
 
-# make the following(s) behave like python3 counterparts if running from python2.7.x
-# (http://python-future.org/imports.html#aliased-imports):
-from future import standard_library
-import re
-standard_library.install_aliases()
-from urllib.parse import urlparse  # @IgnorePep8
-from urllib.request import Request  # @IgnorePep8
+from stream2segment.utils.url import Request  # this handles py2and3 compatibility
 
 
 # logger: do not use logging.getLogger(__name__) but point to stream2segment.download.logger:

diff --git a/stream2segment/download/modules/segments.py b/stream2segment/download/modules/segments.py
@@ -24,13 +24,7 @@
 from stream2segment.utils import get_progressbar
 from stream2segment.io.db.pdsql import dbquery2df, mergeupdate, DbManager
 
-# make the following(s) behave like python3 counterparts if running from python2.7.x
-# (http://python-future.org/imports.html#aliased-imports):
-from future import standard_library
-standard_library.install_aliases()
-from urllib.parse import urlparse  # @IgnorePep8
-from urllib.request import Request  # @IgnorePep8
-
+from stream2segment.utils.url import Request  # this handles py2and3 compatibility
 
 # logger: do not use logging.getLogger(__name__) but point to stream2segment.download.logger:
 # this way we preserve the logging namespace hierarchy

diff --git a/stream2segment/download/modules/stations.py b/stream2segment/download/modules/stations.py
@@ -20,13 +20,7 @@
 from stream2segment.io.db.pdsql import DbManager
 from stream2segment.io.utils import dumps_inv
 
-# make the following(s) behave like python3 counterparts if running from python2.7.x
-# (http://python-future.org/imports.html#aliased-imports):
-from future import standard_library
-standard_library.install_aliases()
-from urllib.parse import urlparse  # @IgnorePep8
-from urllib.request import Request  # @IgnorePep8
-
+from stream2segment.utils.url import Request  # this handles py2and3 compatibility
 
 # logger: do not use logging.getLogger(__name__) but point to stream2segment.download.logger:
 # this way we preserve the logging namespace hierarchy

diff --git a/stream2segment/utils/url.py b/stream2segment/utils/url.py
@@ -12,11 +12,30 @@
 from multiprocessing.pool import ThreadPool
 import os
 
+from future.utils import PY2
+
 # make the following(s) behave like python3 counterparts if running from python2.7.x
 # (http://python-future.org/imports.html#aliased-imports):
-from future import standard_library
-standard_library.install_aliases()
-import urllib.request, urllib.error  # @IgnorePep8
+# from future import standard_library
+# standard_library.install_aliases()
+# import urllib.request, urllib.error  # @IgnorePep8
+
+
+# Python 2 and 3: Futures (http://python-future.org/imports.html#aliased-imports) backports
+# to python2 are buggy when used with ThreadPools (like here). As there seem to be no particular
+# difference in function signature but only import placement, we do the old way
+# ALSO, ALL IMPORTS REQUIRING ANY OF THE MODULES/CLASSES BELOW SHOULD IMPORT FROM HERE
+# TO GUARANTEE PY2+3 COMPATIBILITY
+try:
+    from urllib.parse import urlparse, urlencode
+    from urllib.request import urlopen, Request
+    from urllib.error import HTTPError, URLError
+    from http.client import HTTPException 
+except ImportError:
+    from urlparse import urlparse
+    from urllib import urlencode
+    from urllib2 import urlopen, Request, HTTPError, URLError
+    from httplib import HTTPException 
 
 
 def urlread(url, blocksize=-1, decode=None, wrap_exceptions=True,
@@ -64,11 +83,13 @@ def urlread(url, blocksize=-1, decode=None, wrap_exceptions=True,
         # normalize it fiorst to None. If None, don't pass it to urlopen
         if timeout is None or timeout <= 0:
             timeout = None
+#         if PY2 and hasattr(url, 'data'):
+#             kwargs.setdefault('method', 'POST')
         # urlib2 does not support with statement in py2. See:
         # http://stackoverflow.com/questions/3880750/closing-files-properly-opened-with-urllib2-urlopen
         # https://docs.python.org/2.7/library/contextlib.html#contextlib.closing
-        with closing(urllib.request.urlopen(url, **kwargs) if timeout is None else
-                     urllib.request.urlopen(url, timeout=timeout, **kwargs)) as conn:
+        with closing(urlopen(url, **kwargs) if timeout is None else
+                     urlopen(url, timeout=timeout, **kwargs)) as conn:
             if blocksize < 0:  # https://docs.python.org/2.4/lib/bltin-file-objects.html
                 ret = conn.read()  # pylint: disable=no-member
             else:
@@ -80,16 +101,16 @@ def urlread(url, blocksize=-1, decode=None, wrap_exceptions=True,
         if decode:
             ret = ret.decode(decode)
         return ret, conn.code, conn.msg  # pylint: disable=no-member
-    except urllib.error.HTTPError as exc:
+    except HTTPError as exc:
         if not raise_http_err:
             return None, exc.code, exc.msg
         else:
             if wrap_exceptions:
                 raise URLException(exc)
             else:
                 raise exc
-    except (http.client.HTTPException,  # @UndefinedVariable
-            urllib.error.URLError, socket.error) as exc:
+    except (HTTPException,  # @UndefinedVariable
+            URLError, socket.error) as exc:
         if wrap_exceptions:
             raise URLException(exc)
         else:

diff --git a/tests/functional/test_dinfo.py b/tests/functional/test_dinfo.py
@@ -23,8 +23,8 @@
     Channel, Download, DataCenter
 from stream2segment.download.utils import custom_download_codes
 from future.utils import PY2
-from future import standard_library
-standard_library.install_aliases()
+# from future import standard_library
+# standard_library.install_aliases()
 
 
 class Test(object):

diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
@@ -50,7 +50,7 @@
 from stream2segment.io.db.models import DataCenter, Segment, Download, Station, Channel, WebService,\
     withdata
 from itertools import cycle, repeat, count, product
-from urllib.error import URLError
+
 import socket
 from obspy.taup.helper_classes import TauModelError
 # import logging
@@ -62,17 +62,23 @@
 from logging import StreamHandler
 import logging
 from io import BytesIO
-import urllib.request, urllib.error, urllib.parse
+# import urllib.request, urllib.error, urllib.parse
 from stream2segment.download.utils import custom_download_codes
 from stream2segment.download.modules.mseedlite import MSeedError, unpack
 import threading
-from stream2segment.utils.url import read_async
+# from urllib.error import URLError
+from stream2segment.utils.url import read_async, URLError, HTTPError
 from stream2segment.utils.resources import get_templates_fpath, yaml_load
 from stream2segment.utils.log import configlog4download
 
-from future.standard_library import install_aliases
-install_aliases()
-from http.client import responses  # @UnresolvedImport @IgnorePep8
+# from future.standard_library import install_aliases
+# install_aliases()
+from future.utils import PY2
+if PY2:
+    from BaseHTTPServer import BaseHTTPRequestHandler
+    responses = BaseHTTPRequestHandler.responses
+else:
+    from http.client import responses
 
 # when debugging, I want the full dataframe with to_string(), not truncated
 pd.set_option('display.max_colwidth', -1)
@@ -158,7 +164,7 @@ def init(self, request, db, data, pytestdir):
         # self._logout_cache = ""
 
         # class-level patchers:
-        with patch('stream2segment.utils.url.urllib.request.urlopen') as mock_urlopen:
+        with patch('stream2segment.utils.url.urlopen') as mock_urlopen:
             self.mock_urlopen = mock_urlopen
             with patch('stream2segment.utils.inputargs.get_session', return_value=db.session):
                 # this mocks yaml_load and sets inventory to False, as tests rely on that
@@ -235,7 +241,7 @@ def setup_urlopen(self, urlread_side_effect):
         for k in urlread_side_effect:
             a = Mock()
             if type(k) == int:
-                a.read.side_effect = urllib.error.HTTPError('url', int(k),  responses[k], None, None)
+                a.read.side_effect = HTTPError('url', int(k),  responses[k], None, None)
             elif type(k) in (bytes, str):
                 def func(k):
                     b = BytesIO(k.encode('utf8') if type(k) == str else k)  # py2to3 compatible

diff --git a/tests/functional/test_download2.py b/tests/functional/test_download2.py
@@ -9,8 +9,9 @@
 # from utils import date
 # assert sys.path[0] == os.path.realpath(myPath + '/../../')
 
-from future import standard_library
-standard_library.install_aliases()
+# from future import standard_library
+# standard_library.install_aliases()
+
 from builtins import str
 import re
 import numpy as np
@@ -46,7 +47,7 @@
 from stream2segment.io.db.models import DataCenter, Segment, Download, Station, Channel, WebService,\
     withdata
 from itertools import cycle, repeat, count, product
-from urllib.error import URLError
+# from urllib.error import URLError
 import socket
 from obspy.taup.helper_classes import TauModelError
 # import logging
@@ -58,18 +59,22 @@
 from logging import StreamHandler
 import logging
 from io import BytesIO
-import urllib.request, urllib.error, urllib.parse
+# import urllib.request, urllib.error, urllib.parse
 from stream2segment.download.utils import custom_download_codes
 from stream2segment.download.modules.mseedlite import MSeedError, unpack
 import threading
-from stream2segment.utils.url import read_async
+from stream2segment.utils.url import read_async, URLError, HTTPError
 from stream2segment.utils.resources import get_templates_fpath, yaml_load
 from stream2segment.utils.log import configlog4download
 
-from future.standard_library import install_aliases
-install_aliases()
-from http.client import responses  # @UnresolvedImport @IgnorePep8
-
+# from future.standard_library import install_aliases
+# install_aliases()
+from future.utils import PY2
+if PY2:
+    from BaseHTTPServer import BaseHTTPRequestHandler
+    responses = BaseHTTPRequestHandler.responses
+else:
+    from http.client import responses
 # when debugging, I want the full dataframe with to_string(), not truncated
 pd.set_option('display.max_colwidth', -1)
 
@@ -234,7 +239,7 @@ def init(self, request, db, data, pytestdir):
         # self._logout_cache = ""
 
                 # class-level patchers:
-        with patch('stream2segment.utils.url.urllib.request.urlopen') as mock_urlopen:
+        with patch('stream2segment.utils.url.urlopen') as mock_urlopen:
             self.mock_urlopen = mock_urlopen
             with patch('stream2segment.utils.inputargs.get_session', return_value=db.session):
                 # this mocks yaml_load and sets inventory to False, as tests rely on that
@@ -316,7 +321,7 @@ def setup_urlopen(self, urlread_side_effect):
         for k in urlread_side_effect:
             a = Mock()
             if type(k) == int:
-                a.read.side_effect = urllib.error.HTTPError('url', int(k),  responses[k], None, None)
+                a.read.side_effect = HTTPError('url', int(k),  responses[k], None, None)
             elif type(k) in (bytes, str):
                 def func(k):
                     b = BytesIO(k.encode('utf8') if type(k) == str else k)  # py2to3 compatible

diff --git a/tests/functional/test_processing.py b/tests/functional/test_processing.py
@@ -50,6 +50,7 @@ def func(*a, **v):
 
 def readcsv(filename, header=True):
     return pd.read_csv(filename, header=None) if not header else pd.read_csv(filename)
+
 
 class Test(object):
 
@@ -207,6 +208,13 @@ def logfilecontent(self):
         assert os.path.isfile(self._logfilename)
         with open(self._logfilename) as opn:
             return opn.read()
+
+
+#     def inlogtext(self, string):
+#         logtext = self.logfilecontent
+#         for i in range(1 + len(logtext)-len(string)):
+#             if (sum(ord(a)-ord(b) for a, b in zip(string, logtext[i:])))
+
 
 # ## ======== ACTUAL TESTS: ================================
 
@@ -487,14 +495,16 @@ def test_simple_run_retDict_saveinv_complex_select(self, mock_yaml_load,
         assert len(csv1) == 1
         assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id
         logtext = self.logfilecontent
-        assert """3 segment(s) found to process
+        cmpstr =  """3 segment(s) found to process
 
 segment (id=3): 4 traces (probably gaps/overlaps)
 segment (id=2): Station inventory (xml) error: <urlopen error error>
 
 station inventories saved: 1
 1 of 3 segment(s) successfully processed
-2 of 3 segment(s) skipped with error message (check log or details)""" in logtext
+2 of 3 segment(s) skipped with error message (check log or details)"""
+
+        assert cmpstr in logtext
 
         # save_downloaded_inventory True, test that we did save any:
         assert len(db.session.query(Station).filter(Station.has_inventory).all()) > 0

diff --git a/tests/test_request.py b/tests/test_request.py
@@ -0,0 +1,41 @@
+'''
+Created on Sep 18, 2018
+
+@author: rizac
+'''
+
+
+import re
+
+
+from stream2segment.utils.url import read_async, Request
+
+def test_request():
+    '''This test performs a REAL connection to test a real case. It should be removed
+    in case of no connection'''
+
+    post_data_str = """* * * HH?,HL?,HN? 2017-01-01T00:00:00 2017-06-01T00:00:00
+format=text
+level=channel"""
+    urls = ["http://geofon.gfz-potsdam.de/fdsnws/station/1/query",
+            "http://geofon.gfz-potsdam.de/fdsnws/station/1/query2"]
+    ids = [1]
+    iterable = ((id_, Request(url,
+                              data=('format=text\nlevel=channel\n'+post_data_str).encode('utf8')))
+                for url, id_ in zip(urls, ids))
+
+
+
+    for obj, result, exc, url in read_async(iterable, urlkey=lambda obj: obj[-1],
+                                                blocksize=1048576,
+                                                max_workers=None,
+                                                decode='utf8', timeout=120):
+
+        pass
+#     r = Request("http://geofon.gfz-potsdam.de/fdsnws/station/1/query",
+#                 data="""* * * HH?,HL?,HN? 2017-01-01T00:00:00 2017-06-01T00:00:00
+# format=text
+# level=channel""".encode('utf8'))
+#     
+#     urlread(r)
+#     h = 9
diff --git a/tests/unit/test_mseeds.py b/tests/unit/test_mseeds.py
@@ -5,9 +5,9 @@
 @author: riccardo
 '''
 
-from future import standard_library
+# from future import standard_library
+# standard_library.install_aliases()
 
-standard_library.install_aliases()
 import mock, os, sys
 import pytest
 import re