Skip to content

Commit

Permalink
Feature/new test (#80)
Browse files Browse the repository at this point in the history
* updated test notebook

* update notebook test to not use notebooks

* Update uat_associations.txt with new collections

* changes to drop variables

* update tests

* update tests

* update python libraries

* update notebook test

* update changelog

* update tests names

* fix checking venue to lower case

* debugging tests

* testing larger runners

* debug tests

* debug test

* debug test

* debug test

* debug test

* updated concise tests

---------

Co-authored-by: jonathansmolenski <jonathansmolenski@users.noreply.github.com>
  • Loading branch information
sliu008 and jonathansmolenski authored Aug 28, 2023
1 parent a5dbcb9 commit 6d8415f
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 77 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,19 @@ jobs:
git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
git push origin "${{ env.software_version }}"
- name: Publish UMM-S with new version
uses: podaac/cmr-umm-updater@0.2.3
uses: podaac/cmr-umm-updater@0.5.0
if: |
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release')
with:
umm-s-json: 'cmr/concise_cmr_umm_s.json'
umm-json: 'cmr/concise_cmr_umm_s.json'
provider: 'POCLOUD'
env: ${{ env.venue }}
version: ${{ env.software_version }}
timeout: 60
disable_removal: 'true'
umm_type: 'umm-s'
use_associations: 'false'
env:
cmr_user: ${{secrets.CMR_USER}}
cmr_pass: ${{secrets.CMR_PASS}}
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/jupyter_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ jobs:
- name: Install dependencies
run: |
pip3 install --upgrade pip
pip3 install xarray
pip3 install black
pip3 install matplotlib
pip3 install netCDF4
pip3 install git+https://github.com/nasa/harmony-py.git
pip3 install git+https://github.com/podaac/cmr-umm-updater.git
Expand All @@ -54,8 +52,8 @@ jobs:
cmr_association_diff -e ops -t service -a "cmr/ops_associations.txt" -p POCLOUD -n 'PODAAC Concise' -o ${{ env.OPS_OUTPUT_FILE }} --token ${{ secrets.LAUNCHPAD_TOKEN_OPS }}
- name: Run Add Collection Test
run: |
python3 "add_collection_test.py" -e uat -i ${{ env.UAT_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
python3 "add_collection_test.py" -e ops -i ${{ env.OPS_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
python3 add_collection_test.py -e uat -i ${{ env.UAT_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
python3 add_collection_test.py -e ops -i ${{ env.OPS_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
- name: Check UAT files
id: check_UAT_output_files
run: |
Expand Down
216 changes: 145 additions & 71 deletions add_collection_test.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import os
import matplotlib.pyplot as plt
from os import path
from urllib.parse import urlparse
import itertools
import unittest
import numpy as np
import netCDF4 as nc
import xarray as xr
import requests
from harmony import BBox, Client, Collection, Request, Environment
import argparse

from os import path

from utils import FileHandler
from utils.enums import Venue
import itertools


def parse_args():
Expand Down Expand Up @@ -51,9 +51,9 @@ def parse_args():


def get_username_and_password(venue):
if venue == "UAT":
if venue.lower() == "uat":
return os.environ.get("UAT_USERNAME"), os.environ.get("UAT_PASSWORD")
elif venue == "OPS":
elif venue.lower() == "ops":
return os.environ.get('OPS_USERNAME'), os.environ.get('OPS_PASSWORD')
else:
raise ValueError("Invalid venue")
Expand All @@ -75,12 +75,115 @@ def get_x_y_variables(variables):
return x_var, y_var


def verify_dims(merged_group, origin_group, both_merged):
for dim in origin_group.dimensions:
if both_merged:
unittest.TestCase().assertEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size)
else:
unittest.TestCase().assertGreaterEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size)


def verify_attrs(merged_obj, origin_obj, both_merged):
ignore_attributes = [
'request-bounding-box', 'request-bounding-box-description', 'PODAAC-dataset-shortname',
'PODAAC-persistent-ID', 'time_coverage_end', 'time_coverage_start'
]

merged_attrs = merged_obj.ncattrs()
origin_attrs = origin_obj.ncattrs()

for attr in origin_attrs:
if attr in ignore_attributes:
# Skip attributes which are present in the Java implementation,
# but not (currently) present in the Python implementation
continue

if not both_merged and attr not in merged_attrs:
# Skip attributes which are not present in both merged and origin.
# This is normal operation as some attributes may be omited b/c
# they're inconsistent between granules
continue

merged_attr = merged_obj.getncattr(attr)
if both_merged and isinstance(merged_attr, int):
# Skip integer values - the Java implementation seems to omit
# these values due to its internal handling of all values as
# Strings
continue

origin_attr = origin_obj.getncattr(attr)
if isinstance(origin_attr, np.ndarray):
unittest.TestCase().assertTrue(np.array_equal(merged_attr, origin_attr))
else:
if attr != "history_json":
unittest.TestCase().assertEqual(merged_attr, origin_attr)


def verify_variables(merged_group, origin_group, subset_index, both_merged):
for var in origin_group.variables:
merged_var = merged_group.variables[var]
origin_var = origin_group.variables[var]

verify_attrs(merged_var, origin_var, both_merged)

if both_merged:
# both groups require subset indexes
merged_data = merged_var[subset_index[0]]
origin_data = origin_var[subset_index[1]]
else:
# merged group requires a subset index
merged_data = np.resize(merged_var[subset_index], origin_var.shape)
origin_data = origin_var

# verify variable data
if isinstance(origin_data, str):
unittest.TestCase().assertEqual(merged_data, origin_data)
else:
unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=True))


def verify_groups(merged_group, origin_group, subset_index, both_merged=False):
verify_dims(merged_group, origin_group, both_merged)
verify_attrs(merged_group, origin_group, both_merged)
verify_variables(merged_group, origin_group, subset_index, both_merged)

for child_group in origin_group.groups:
merged_subgroup = merged_group[child_group]
origin_subgroup = origin_group[child_group]
verify_groups(merged_subgroup, origin_subgroup, subset_index, both_merged)


# GET TOKEN FROM CMR
def get_token(cmr_root, username, password):
token_api = "https://{}/api/users/tokens".format(cmr_root)
response = requests.get(token_api, auth=(username, password))
content = response.json()
if len(content) > 0:
return content[0].get('access_token')
else:
create_token_api = "https://{}/api/users/token".format(cmr_root)
response = requests.post(create_token_api, auth=(username, password))
content = response.json()
return content.get('access_token')


def download_file(url, local_path, headers):
response = requests.get(url, stream=True, headers=headers)
if response.status_code == 200:
with open(local_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print("Original File downloaded successfully.")
else:
print(f"Failed to download the file. Status code: {response.status_code}")


def test(collection_id, venue):

max_results = 2

username, password = get_username_and_password(venue)
environment = Environment.UAT if venue == "UAT" else Environment.PROD
environment = Environment.UAT if venue.lower() == "uat" else Environment.PROD
harmony_client = Client(auth=(username, password), env=environment)

collection = Collection(id=collection_id)
Expand Down Expand Up @@ -115,74 +218,44 @@ def test(collection_id, venue):

filename = file_names[0]
# Handle time dimension and variables dropping
dt = nc.Dataset(filename, 'r')
groups = list(dt.groups)
dt.close()

drop_variables = [
'time',
'sample',
'meas_ind',
'wvf_ind',
'ddm',
'averaged_l1'
]
if not groups:
groups = [None]
merge_dataset = nc.Dataset(filename, 'r')

for group in groups:
cmr_base_url = "https://cmr.earthdata.nasa.gov/search/granules.umm_json?readable_granule_name="
edl_root = 'urs.earthdata.nasa.gov'

ds = xr.open_dataset(filename, group=group, decode_times=False, drop_variables=drop_variables)
if venue.lower() == 'uat':
cmr_base_url = "https://cmr.uat.earthdata.nasa.gov/search/granules.umm_json?readable_granule_name="
edl_root = 'uat.urs.earthdata.nasa.gov'

token = get_token(edl_root, username, password)
headers = {
"Authorization": f"Bearer {token}"
}

assert len(ds.coords['subset_index']) == max_results
variables = list(ds.variables)
x_var, y_var = get_x_y_variables(variables)
original_files = merge_dataset.variables['subset_files']
assert len(original_files) == max_results

for v in variables:
if v not in ['subset_files', 'lat', 'lon', 'latitude', 'longitude', 'beam_clat', 'beam_clon']:
variable = v
break
for file in original_files:

if x_var is not None and y_var is not None:
break
file_name = file.rsplit(".", 1)[0]
print(file_name)
cmr_query = f"{cmr_base_url}{file_name}&collection_concept_id={collection_id}"
print(cmr_query)

ds.close()

if x_var is None or y_var is None:
raise Exception("Lon and Lat variables are not found")

for index in range(0, max_results):
ax = ds.isel(subset_index=index).plot.scatter(
y=y_var,
x=x_var,
hue=variable,
s=1,
levels=9,
cmap="jet",
aspect=2.5,
size=9
)
plt.xlim(0., 360.)
plt.ylim(-90., 90.)
plt.show(block=False)
plt.close(ax.figure)

ax = ds.plot.scatter(
y=y_var,
x=x_var,
hue=variable,
s=1,
levels=9,
cmap="jet",
aspect=2.5,
size=9
)
plt.xlim(0., 360.)
plt.ylim(-90., 90.)
plt.show(block=False)
plt.close(ax.figure)
response = requests.get(cmr_query, headers=headers)

result = response.json()
links = result.get('items')[0].get('umm').get('RelatedUrls')
for link in links:
if link.get('Type') == 'GET DATA':
data_url = link.get('URL')
parsed_url = urlparse(data_url)
local_file_name = os.path.basename(parsed_url.path)
download_file(data_url, local_file_name, headers)

ds.close()
for i, file in enumerate(original_files):
origin_dataset = nc.Dataset(file)
verify_groups(merge_dataset, origin_dataset, i)


def run():
Expand Down Expand Up @@ -220,7 +293,7 @@ def run():
fails.append(collection)

# Create output files
if output_location:
if output_location:
success_outfile = path.realpath(f'{output_location}/{_args.env}_success.txt')
fail_outfile = path.realpath(f'{output_location}/{_args.env}_fail.txt')

Expand All @@ -234,4 +307,5 @@ def run():


if __name__ == '__main__':
print("Start running test .......")
run()

0 comments on commit 6d8415f

Please sign in to comment.