Feature/new test (#80)

* updated test notebook * update notebook test to not use notebooks * Update uat_associations.txt with new collections * changes to drop variables * update tests * update tests * update python libraries * update notebook test * update changelog * update tests names * fix checking venue to lower case * debugging tests * testing larger runners * debug tests * debug test * debug test * debug test * debug test * updated concise tests --------- Co-authored-by: jonathansmolenski <jonathansmolenski@users.noreply.github.com>
podaac · Aug 28, 2023 · 6d8415f · 6d8415f
1 parent a5dbcb9
commit 6d8415f
Show file tree

Hide file tree

Showing 3 changed files with 151 additions and 77 deletions.
diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml
@@ -141,17 +141,19 @@ jobs:
           git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
           git push origin "${{ env.software_version }}"
       - name: Publish UMM-S with new version
-        uses: podaac/cmr-umm-updater@0.2.3
+        uses: podaac/cmr-umm-updater@0.5.0
         if: |
           github.ref == 'refs/heads/main'    ||
           startsWith(github.ref, 'refs/heads/release')
         with:
-          umm-s-json: 'cmr/concise_cmr_umm_s.json'
+          umm-json: 'cmr/concise_cmr_umm_s.json'
           provider: 'POCLOUD'
           env: ${{ env.venue }}
           version: ${{ env.software_version }}
           timeout: 60
           disable_removal: 'true'
+          umm_type: 'umm-s'
+          use_associations: 'false'
         env:
           cmr_user: ${{secrets.CMR_USER}}
           cmr_pass: ${{secrets.CMR_PASS}}

diff --git a/.github/workflows/jupyter_test.yml b/.github/workflows/jupyter_test.yml
@@ -40,9 +40,7 @@ jobs:
       - name: Install dependencies
         run: |
           pip3 install --upgrade pip
-          pip3 install xarray
           pip3 install black
-          pip3 install matplotlib
           pip3 install netCDF4
           pip3 install git+https://github.com/nasa/harmony-py.git
           pip3 install git+https://github.com/podaac/cmr-umm-updater.git
@@ -54,8 +52,8 @@ jobs:
           cmr_association_diff -e ops -t service -a "cmr/ops_associations.txt" -p POCLOUD -n 'PODAAC Concise' -o ${{ env.OPS_OUTPUT_FILE }} --token ${{ secrets.LAUNCHPAD_TOKEN_OPS }}
       - name: Run Add Collection Test
         run: |
-          python3 "add_collection_test.py" -e uat -i ${{ env.UAT_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
-          python3 "add_collection_test.py" -e ops -i ${{ env.OPS_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
+          python3 add_collection_test.py -e uat -i ${{ env.UAT_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
+          python3 add_collection_test.py -e ops -i ${{ env.OPS_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
       - name: Check UAT files
         id: check_UAT_output_files
         run: |

diff --git a/add_collection_test.py b/add_collection_test.py
@@ -1,15 +1,15 @@
 import os
-import matplotlib.pyplot as plt
+from os import path
+from urllib.parse import urlparse
+import itertools
+import unittest
+import numpy as np
 import netCDF4 as nc
-import xarray as xr
+import requests
 from harmony import BBox, Client, Collection, Request, Environment
 import argparse
-
-from os import path
-
 from utils import FileHandler
 from utils.enums import Venue
-import itertools
 
 
 def parse_args():
@@ -51,9 +51,9 @@ def parse_args():
 
 
 def get_username_and_password(venue):
-    if venue == "UAT":
+    if venue.lower() == "uat":
         return os.environ.get("UAT_USERNAME"), os.environ.get("UAT_PASSWORD")
-    elif venue == "OPS":
+    elif venue.lower() == "ops":
         return os.environ.get('OPS_USERNAME'), os.environ.get('OPS_PASSWORD')
     else:
         raise ValueError("Invalid venue")
@@ -75,12 +75,115 @@ def get_x_y_variables(variables):
     return x_var, y_var
 
 
+def verify_dims(merged_group, origin_group, both_merged):
+    for dim in origin_group.dimensions:
+        if both_merged:
+            unittest.TestCase().assertEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size)
+        else:
+            unittest.TestCase().assertGreaterEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size)
+
+
+def verify_attrs(merged_obj, origin_obj, both_merged):
+    ignore_attributes = [
+        'request-bounding-box', 'request-bounding-box-description', 'PODAAC-dataset-shortname',
+        'PODAAC-persistent-ID', 'time_coverage_end', 'time_coverage_start'
+    ]
+
+    merged_attrs = merged_obj.ncattrs()
+    origin_attrs = origin_obj.ncattrs()
+
+    for attr in origin_attrs:
+        if attr in ignore_attributes:
+            # Skip attributes which are present in the Java implementation,
+            # but not (currently) present in the Python implementation
+            continue
+
+        if not both_merged and attr not in merged_attrs:
+            # Skip attributes which are not present in both merged and origin.
+            # This is normal operation as some attributes may be omited b/c
+            # they're inconsistent between granules
+            continue
+
+        merged_attr = merged_obj.getncattr(attr)
+        if both_merged and isinstance(merged_attr, int):
+            # Skip integer values - the Java implementation seems to omit
+            # these values due to its internal handling of all values as
+            # Strings
+            continue
+
+        origin_attr = origin_obj.getncattr(attr)
+        if isinstance(origin_attr, np.ndarray):
+            unittest.TestCase().assertTrue(np.array_equal(merged_attr, origin_attr))
+        else:
+            if attr != "history_json":
+                unittest.TestCase().assertEqual(merged_attr, origin_attr)
+
+
+def verify_variables(merged_group, origin_group, subset_index, both_merged):
+    for var in origin_group.variables:
+        merged_var = merged_group.variables[var]
+        origin_var = origin_group.variables[var]
+
+        verify_attrs(merged_var, origin_var, both_merged)
+
+        if both_merged:
+            # both groups require subset indexes
+            merged_data = merged_var[subset_index[0]]
+            origin_data = origin_var[subset_index[1]]
+        else:
+            # merged group requires a subset index
+            merged_data = np.resize(merged_var[subset_index], origin_var.shape)
+            origin_data = origin_var
+
+        # verify variable data
+        if isinstance(origin_data, str):
+            unittest.TestCase().assertEqual(merged_data, origin_data)
+        else:
+            unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=True))
+
+
+def verify_groups(merged_group, origin_group, subset_index, both_merged=False):
+    verify_dims(merged_group, origin_group, both_merged)
+    verify_attrs(merged_group, origin_group, both_merged)
+    verify_variables(merged_group, origin_group, subset_index, both_merged)
+
+    for child_group in origin_group.groups:
+        merged_subgroup = merged_group[child_group]
+        origin_subgroup = origin_group[child_group]
+        verify_groups(merged_subgroup, origin_subgroup, subset_index, both_merged)
+
+
+# GET TOKEN FROM CMR
+def get_token(cmr_root, username, password):
+    token_api = "https://{}/api/users/tokens".format(cmr_root)
+    response = requests.get(token_api, auth=(username, password))
+    content = response.json()
+    if len(content) > 0:
+        return content[0].get('access_token')
+    else:
+        create_token_api = "https://{}/api/users/token".format(cmr_root)
+        response = requests.post(create_token_api, auth=(username, password))
+        content = response.json()
+        return content.get('access_token')
+
+
+def download_file(url, local_path, headers):
+    response = requests.get(url, stream=True, headers=headers)
+    if response.status_code == 200:
+        with open(local_path, 'wb') as file:
+            for chunk in response.iter_content(chunk_size=8192):
+                file.write(chunk)
+        print("Original File downloaded successfully.")
+    else:
+        print(f"Failed to download the file. Status code: {response.status_code}")
+
+
 def test(collection_id, venue):
 
     max_results = 2
 
     username, password = get_username_and_password(venue)
-    environment = Environment.UAT if venue == "UAT" else Environment.PROD
+    environment = Environment.UAT if venue.lower() == "uat" else Environment.PROD
     harmony_client = Client(auth=(username, password), env=environment)
 
     collection = Collection(id=collection_id)
@@ -115,74 +218,44 @@ def test(collection_id, venue):
 
     filename = file_names[0]
     # Handle time dimension and variables dropping
-    dt = nc.Dataset(filename, 'r')
-    groups = list(dt.groups)
-    dt.close()
-
-    drop_variables = [
-        'time',
-        'sample',
-        'meas_ind',
-        'wvf_ind',
-        'ddm',
-        'averaged_l1'
-    ]
-    if not groups:
-        groups = [None]
+    merge_dataset = nc.Dataset(filename, 'r')
 
-    for group in groups:
+    cmr_base_url = "https://cmr.earthdata.nasa.gov/search/granules.umm_json?readable_granule_name="
+    edl_root = 'urs.earthdata.nasa.gov'
 
-        ds = xr.open_dataset(filename, group=group, decode_times=False, drop_variables=drop_variables)
+    if venue.lower() == 'uat':
+        cmr_base_url = "https://cmr.uat.earthdata.nasa.gov/search/granules.umm_json?readable_granule_name="
+        edl_root = 'uat.urs.earthdata.nasa.gov'
+
+    token = get_token(edl_root, username, password)
+    headers = {
+        "Authorization": f"Bearer {token}"
+    }
 
-        assert len(ds.coords['subset_index']) == max_results
-        variables = list(ds.variables)
-        x_var, y_var = get_x_y_variables(variables)
+    original_files = merge_dataset.variables['subset_files']
+    assert len(original_files) == max_results
 
-        for v in variables:
-            if v not in ['subset_files', 'lat', 'lon', 'latitude', 'longitude', 'beam_clat', 'beam_clon']:
-                variable = v
-                break
+    for file in original_files:
 
-        if x_var is not None and y_var is not None:
-            break
+        file_name = file.rsplit(".", 1)[0]
+        print(file_name)
+        cmr_query = f"{cmr_base_url}{file_name}&collection_concept_id={collection_id}"
+        print(cmr_query)
 
-        ds.close()
-
-    if x_var is None or y_var is None:
-        raise Exception("Lon and Lat variables are not found")
-
-    for index in range(0, max_results):
-        ax = ds.isel(subset_index=index).plot.scatter(
-            y=y_var,
-            x=x_var,
-            hue=variable,
-            s=1,
-            levels=9,
-            cmap="jet",
-            aspect=2.5,
-            size=9
-        )
-        plt.xlim(0., 360.)
-        plt.ylim(-90., 90.)
-        plt.show(block=False)
-        plt.close(ax.figure)
-
-    ax = ds.plot.scatter(
-        y=y_var,
-        x=x_var,
-        hue=variable,
-        s=1,
-        levels=9,
-        cmap="jet",
-        aspect=2.5,
-        size=9
-    )
-    plt.xlim(0., 360.)
-    plt.ylim(-90., 90.)
-    plt.show(block=False)
-    plt.close(ax.figure)
+        response = requests.get(cmr_query, headers=headers)
+
+        result = response.json()
+        links = result.get('items')[0].get('umm').get('RelatedUrls')
+        for link in links:
+            if link.get('Type') == 'GET DATA':
+                data_url = link.get('URL')
+                parsed_url = urlparse(data_url)
+                local_file_name = os.path.basename(parsed_url.path)
+                download_file(data_url, local_file_name, headers)
 
-    ds.close()
+    for i, file in enumerate(original_files):
+        origin_dataset = nc.Dataset(file)
+        verify_groups(merge_dataset, origin_dataset, i)
 
 
 def run():
@@ -220,7 +293,7 @@ def run():
                 fails.append(collection)
 
         # Create output files
-        if output_location:
+         if output_location:
             success_outfile = path.realpath(f'{output_location}/{_args.env}_success.txt')
             fail_outfile = path.realpath(f'{output_location}/{_args.env}_fail.txt')
 
@@ -234,4 +307,5 @@ def run():
 
 
 if __name__ == '__main__':
+    print("Start running test .......")
     run()