test client request with live server

rizac · Apr 17, 2024 · 195b3af · 195b3af
1 parent cbcd0a0
commit 195b3af
Show file tree

Hide file tree

Showing 12 changed files with 125 additions and 31 deletions.
diff --git a/egsim/api/data/client/snippets/get_egsim_predictions.py b/egsim/api/data/client/snippets/get_egsim_predictions.py
@@ -13,7 +13,8 @@ def get_egsim_predictions(
         distances: list[float],
         rupture_params: Optional[dict] = None,
         site_params: Optional[dict] = None,
-        data_format="hdf"
+        data_format="hdf",
+        base_url="https://egsim.gfz-potsdam.de/query/predictions"
 ) -> pd.DataFrame:
     """Retrieve the ground motion predictions for the selected set of ground motion
     models and intensity measure types. Each prediction will be the result of a given
@@ -65,21 +66,19 @@ def get_egsim_predictions(
         'dist': distances
     }
 
-    # POST request for eGSIM
-    response = requests.post(
-        "https://egsim.gfz-potsdam.de/query/predictions",  # the base request URL
-        json=parameters
-    )
+    # POST request to eGSIM
+    response = requests.post(base_url, json=parameters)
+
     # eGSIM might return response denoting an error. Treat these response as
-    # Python exceptions outputting the original eGSIM message (more meaningful)
+    # Python exceptions and output the original eGSIM message
     try:
         response.raise_for_status()
     except requests.exceptions.HTTPError as exc:
         msg = exc.response.json()['message']  # eGSIM detailed error message
         raise ValueError(f"eGSIM error: {msg} ({exc.response.url}) ") from None
 
-    # `response.content` is the computation result, as bytes sequence in CSV or HDF
-    # format. Read it into a pandas.DataFrame:
+    # `response.content` is the computed data, as in-memory file (bytes sequence)
+    # in CSV or HDF format. Read it into a pandas.DataFrame:
     if parameters['format'] == 'hdf':
         # `pd.read_hdf` works for HDF files on disk. Workaround:
         with pd.HDFStore(

diff --git a/egsim/api/data/client/snippets/get_egsim_residuals.py b/egsim/api/data/client/snippets/get_egsim_residuals.py
@@ -12,7 +12,8 @@ def get_egsim_residuals(
         flatfile: Union[io.IOBase, str],
         query_string=None,
         likelihood=False,
-        data_format="hdf"
+        data_format="hdf",
+        base_url="https://egsim.gfz-potsdam.de/query/residuals"
 ) -> pd.DataFrame:
     """Retrieve the residuals for the flatfile and the selected
     set of ground motion models and intensity measure types. Examples:
@@ -75,21 +76,19 @@ def get_egsim_residuals(
         # uploaded flatfile:
         args = {'data': parameters, 'files': {'flatfile': flatfile}}
 
-    # POST request for eGSIM. Return a response object (the server/eGSIM response)
-    response = requests.post(
-        "https://egsim.gfz-potsdam.de/query/residuals",  # the base request URL
-        **args
-    )
+    # POST request to eGSIM
+    response = requests.post(base_url, **args)
+
     # eGSIM might return response denoting an error. Treat these response as
-    # Python exceptions outputting the original eGSIM message (more meaningful)
+    # Python exceptions and output the original eGSIM message
     try:
         response.raise_for_status()
     except requests.exceptions.HTTPError as exc:
         msg = exc.response.json()['message']  # eGSIM detailed error message
         raise ValueError(f"eGSIM error: {msg} ({exc.response.url}) ") from None
 
-    # `response.content` is the computation result, as bytes sequence in CSV or HDF
-    # format. Read it into a pandas.DataFrame:
+    # `response.content` is the computed data, as in-memory file (bytes sequence)
+    # in CSV or HDF format. Read it into a pandas.DataFrame:
     if parameters['format'] == 'hdf':
         # `pd.read_hdf` works for HDF files on disk. Workaround:
         with pd.HDFStore(

diff --git a/tests/data/predictions.hdf b/tests/data/predictions.hdf
diff --git a/tests/data/residuals.hdf b/tests/data/residuals.hdf
diff --git a/...a/tk_20230206_flatfile_geometric_mean.csv → tests/data/test_flatfile.csv b/...a/tk_20230206_flatfile_geometric_mean.csv → tests/data/test_flatfile.csv
diff --git a/tests/django/test_api_residuals.py b/tests/django/test_api_residuals.py
@@ -37,7 +37,7 @@ class Test:
     # flatfile 2 (in parent data dir because it is shared with egsim.smtk):
     flatfile_tk_content: bytes
     data_dir = join(dirname(dirname(data_dir)), 'data')
-    with open(join(data_dir, 'tk_20230206_flatfile_geometric_mean.csv'), 'rb') as _:
+    with open(join(data_dir, 'test_flatfile.csv'), 'rb') as _:
         flatfile_tk_content = _.read()
 
 

diff --git a/tests/django/test_app_views.py b/tests/django/test_app_views.py
@@ -41,7 +41,7 @@
 @pytest.mark.django_db
 class Test:
     with open(abspath(join(dirname(dirname(__file__)), 'data',
-                           'tk_20230206_flatfile_geometric_mean.csv')), 'rb') as _:
+                           'test_flatfile.csv')), 'rb') as _:
         flatfile_tk_content = _.read()
         del _
 

diff --git a/tests/django/test_client_snippets.py b/tests/django/test_client_snippets.py
@@ -0,0 +1,52 @@
+from os.path import dirname, abspath, join, isdir, isfile
+import pandas as pd
+
+from egsim.api.data.client.snippets.get_egsim_predictions import get_egsim_predictions
+from egsim.api.data.client.snippets.get_egsim_residuals import get_egsim_residuals
+from egsim.api.urls import PREDICTIONS_URL_PATH, RESIDUALS_URL_PATH
+
+test_data_dir = join(dirname(dirname(abspath(__file__))), 'data')
+
+assert isdir(test_data_dir)
+
+# these must be equal to the values provided in tests.smtk.test_create_data:
+models = ['CauzziEtAl2014', 'BindiEtAl2014Rjb']
+imts = ['PGA', 'SA(0.032)', 'SA(0.034)']
+
+
+def test_server_requests(live_server):
+    create_predictions(live_server.url)
+    create_residuals(live_server.url)
+
+
+def create_residuals(base_url):
+    ffile_path = abspath(join(test_data_dir,
+                         'test_flatfile.csv'))
+    with open(ffile_path) as fpt:
+        dfr = get_egsim_residuals(
+            models, imts, fpt, likelihood=False,
+            base_url=f"{base_url}/{RESIDUALS_URL_PATH}"
+        )
+    file = join(test_data_dir, 'residuals.hdf')
+    if isfile(file):
+        dfr2:pd.DataFrame = pd.read_hdf(file) # noqa
+        # dfr2 has only required columns for performance reasons,
+        # so check those are the same:
+        dfr = dfr[[c for c in dfr.columns if c in dfr2.columns]]
+        # now test equality:
+        pd.testing.assert_frame_equal(
+            dfr, dfr2, check_exact=False, atol=0, rtol=1e-8
+        )
+
+
+def create_predictions(base_url):
+    dfr = get_egsim_predictions(
+        models, imts, [4, 5], [1, 10, 100],
+        base_url=f"{base_url}/{PREDICTIONS_URL_PATH}"
+    )
+    file = join(test_data_dir, 'predictions.hdf')
+    if isfile(file):
+        dfr2 = pd.read_hdf(file)
+        pd.testing.assert_frame_equal(
+            dfr, dfr2, check_exact=False, atol=0, rtol=1e-3
+        )
diff --git a/tests/django/test_forms.py b/tests/django/test_forms.py
@@ -21,7 +21,7 @@
 GSIM, IMT = 'gsim', 'imt'
 
 flatfile_tk_path = abspath(join(dirname(dirname(__file__)), 'data',
-                                'tk_20230206_flatfile_geometric_mean.csv'))
+                                'test_flatfile.csv'))
 
 @pytest.mark.django_db
 class Test:

diff --git a/tests/smtk/flatfile/test_flatfile_io.py b/tests/smtk/flatfile/test_flatfile_io.py
@@ -33,7 +33,7 @@ def test_read_flatifle_yaml():
 
 def test_flatfile_turkey():
     fpath = abspath(join(dirname(dirname(dirname(__file__))),
-                         'data', 'tk_20230206_flatfile_geometric_mean.csv'))
+                         'data', 'test_flatfile.csv'))
     dfr = read_flatfile(fpath)
 
     # tst with file-like object

diff --git a/tests/smtk/flatfile/test_flatfile_metadata.py b/tests/smtk/flatfile/test_flatfile_metadata.py
@@ -234,18 +234,20 @@ def test_get_dtype():
         # np.array:
         assert get_dtype_of(pd.Series(val).values[0]) == ctype
         assert get_dtype_of(pd.Series([val]).values) == ctype
-        if ctype != ColumnDtype.datetime:
-            # skip np.array(datetime) and use to_datetime (see below):
-            assert get_dtype_of(np.array(val)) == ctype
-            assert get_dtype_of(np.array([val])) == ctype
         # pd.numeric and pd.to_datetime
         if ctype in (ColumnDtype.float, ColumnDtype.bool, ColumnDtype.int):
             assert get_dtype_of(pd.to_numeric(val)) == ctype
             assert get_dtype_of(pd.to_numeric([val])) == ctype
         elif ctype == ColumnDtype.datetime:
-            # to_datetime returns a Timestamp so it is not datetime dtype:
+            # to_datetime returns a Timestamp, so it is not datetime dtype:
             assert get_dtype_of(pd.to_datetime(val)) == ctype
             assert get_dtype_of(pd.to_datetime([val])) == ctype
+        # NOTE: NUMPY IS ACTUALLY NOT SUPPORTED, THE CODE BELOW IS LEGACY CODE:
+        # IF IT FAILS AND THE FIX IS A PAIN, YOU CAN REMOVE THE TEST
+        if ctype != ColumnDtype.datetime:
+            # skip np.array(datetime) and use to_datetime (see above):
+            assert get_dtype_of(np.array(val)) == ctype
+            assert get_dtype_of(np.array([val])) == ctype
 
     # cases of mixed types that return None as dtype (by default they return string
     # but this is a behaviour of pandas that we do not want to mimic):
@@ -284,8 +286,10 @@ def test_get_dtype_mixed_categories():
     """test that get_dtypoe_of mixed categorical returns None and not
     ColumnDtype.category"""
     assert get_dtype_of(pd.Series([2, True]).astype('category')) is None
-    assert get_dtype_of(pd.Series([False, True]).astype('category')) is ColumnDtype.category
-    assert get_dtype_of(pd.Series([False, None]).astype('category')) is ColumnDtype.category
+    assert (get_dtype_of(pd.Series([False, True]).astype('category'))
+            is ColumnDtype.category)
+    assert (get_dtype_of(pd.Series([False, None]).astype('category'))
+            is ColumnDtype.category)
     assert get_dtype_of(pd.Series([datetime.utcnow(), pd.NaT]).astype(
         'category')) is ColumnDtype.category
     assert get_dtype_of(pd.Series([2, 3]).astype(
@@ -320,7 +324,9 @@ def test_mixed_arrays_are_mostly_null_dtype():
             if set(val) == {2, 2.2} and cdtype == ColumnDtype.float:
                 pass
 
-        # test n umpy arrays
+        # test numpy arrays
+        # NOTE: NUMPY IS ACTUALLY NOT SUPPORTED, THE CODE BELOW IS LEGACY CODE:
+        # IF IT FAILS AND THE FIX IS A PAIN, YOU CAN REMOVE THE TEST
         cdtype = None
         try:
             cdtype = get_dtype_of(np.array(val))
@@ -355,7 +361,8 @@ def test_dtypes_with_null():
             assert cdtype in (ColumnDtype.int, ColumnDtype.bool)
 
         # test numpy array:
-        # assert cdtype == get_dtype_of(np.array(vals))
+        # NOTE: NUMPY IS ACTUALLY NOT SUPPORTED, THE CODE BELOW IS LEGACY CODE:
+        # IF IT FAILS AND THE FIX IS A PAIN, YOU CAN REMOVE THE TEST
         try:
             assert cdtype != get_dtype_of(np.array(vals))
         except AssertionError:

diff --git a/tests/smtk/test_create_data.py b/tests/smtk/test_create_data.py
@@ -0,0 +1,37 @@
+from os.path import dirname, abspath, join, isdir, isfile
+import pandas as pd
+from egsim.smtk import get_scenarios_predictions, get_residuals, read_flatfile
+
+test_data_dir = join(dirname(dirname(abspath(__file__))), 'data')
+
+assert isdir(test_data_dir)
+
+models = ['CauzziEtAl2014', 'BindiEtAl2014Rjb']
+imts = ['PGA', 'SA(0.032)', 'SA(0.034)']
+
+
+def test_create_predictions():
+    dfr = get_scenarios_predictions(models, imts, [4, 5], [1, 10, 100])
+    file = join(test_data_dir, 'predictions.hdf')
+    if not isfile(file):
+        dfr.to_hdf(file, key='data')
+    else:
+        dfr2 = pd.read_hdf(file)
+        pd.testing.assert_frame_equal(dfr, dfr2)
+
+
+def test_create_residuals():
+    ffile = read_flatfile(join(test_data_dir,
+                          'test_flatfile.csv'))
+    # take only the relevant columns, otherwise the file is too big:
+    columnz = {'event_id', 'rjb', 'rrup', 'rake', 'magnitude', 'vs30',
+               'SA(0.032)', 'SA(0.035)', 'PGA'}
+    ffile = ffile[[c for c in ffile.columns if c in columnz]]
+
+    dfr = get_residuals(models, imts, ffile, likelihood=False)
+    file = join(test_data_dir, 'residuals.hdf')
+    if not isfile(file):
+        dfr.to_hdf(file, key='data')
+    else:
+        dfr2 = pd.read_hdf(file)
+        pd.testing.assert_frame_equal(dfr, dfr2)