Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/new test #80

Merged
merged 21 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,19 @@ jobs:
git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
git push origin "${{ env.software_version }}"
- name: Publish UMM-S with new version
uses: podaac/cmr-umm-updater@0.2.3
uses: podaac/cmr-umm-updater@0.5.0
if: |
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release')
with:
umm-s-json: 'cmr/concise_cmr_umm_s.json'
umm-json: 'cmr/concise_cmr_umm_s.json'
provider: 'POCLOUD'
env: ${{ env.venue }}
version: ${{ env.software_version }}
timeout: 60
disable_removal: 'true'
umm_type: 'umm-s'
use_associations: 'false'
env:
cmr_user: ${{secrets.CMR_USER}}
cmr_pass: ${{secrets.CMR_PASS}}
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/jupyter_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ jobs:
- name: Install dependencies
run: |
pip3 install --upgrade pip
pip3 install xarray
pip3 install black
pip3 install matplotlib
pip3 install netCDF4
pip3 install git+https://github.com/nasa/harmony-py.git
pip3 install git+https://github.com/podaac/cmr-umm-updater.git
Expand All @@ -54,8 +52,8 @@ jobs:
cmr_association_diff -e ops -t service -a "cmr/ops_associations.txt" -p POCLOUD -n 'PODAAC Concise' -o ${{ env.OPS_OUTPUT_FILE }} --token ${{ secrets.LAUNCHPAD_TOKEN_OPS }}
- name: Run Add Collection Test
run: |
python3 "add_collection_test.py" -e uat -i ${{ env.UAT_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
python3 "add_collection_test.py" -e ops -i ${{ env.OPS_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
python3 add_collection_test.py -e uat -i ${{ env.UAT_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
python3 add_collection_test.py -e ops -i ${{ env.OPS_OUTPUT_FILE }} -o ${{ env.OUTPUT_DIR }}
- name: Check UAT files
id: check_UAT_output_files
run: |
Expand Down
216 changes: 145 additions & 71 deletions add_collection_test.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import os
import matplotlib.pyplot as plt
from os import path
from urllib.parse import urlparse
import itertools
import unittest
import numpy as np
import netCDF4 as nc
import xarray as xr
import requests
from harmony import BBox, Client, Collection, Request, Environment
import argparse

from os import path

from utils import FileHandler
from utils.enums import Venue
import itertools


def parse_args():
Expand Down Expand Up @@ -51,9 +51,9 @@ def parse_args():


def get_username_and_password(venue):
if venue == "UAT":
if venue.lower() == "uat":
return os.environ.get("UAT_USERNAME"), os.environ.get("UAT_PASSWORD")
elif venue == "OPS":
elif venue.lower() == "ops":
return os.environ.get('OPS_USERNAME'), os.environ.get('OPS_PASSWORD')
else:
raise ValueError("Invalid venue")
Expand All @@ -75,12 +75,115 @@ def get_x_y_variables(variables):
return x_var, y_var


def verify_dims(merged_group, origin_group, both_merged):
for dim in origin_group.dimensions:
if both_merged:
unittest.TestCase().assertEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size)
else:
unittest.TestCase().assertGreaterEqual(merged_group.dimensions[dim].size, origin_group.dimensions[dim].size)


def verify_attrs(merged_obj, origin_obj, both_merged):
ignore_attributes = [
'request-bounding-box', 'request-bounding-box-description', 'PODAAC-dataset-shortname',
'PODAAC-persistent-ID', 'time_coverage_end', 'time_coverage_start'
]

merged_attrs = merged_obj.ncattrs()
origin_attrs = origin_obj.ncattrs()

for attr in origin_attrs:
if attr in ignore_attributes:
# Skip attributes which are present in the Java implementation,
# but not (currently) present in the Python implementation
continue

if not both_merged and attr not in merged_attrs:
# Skip attributes which are not present in both merged and origin.
# This is normal operation as some attributes may be omited b/c
# they're inconsistent between granules
continue

merged_attr = merged_obj.getncattr(attr)
if both_merged and isinstance(merged_attr, int):
# Skip integer values - the Java implementation seems to omit
# these values due to its internal handling of all values as
# Strings
continue

origin_attr = origin_obj.getncattr(attr)
if isinstance(origin_attr, np.ndarray):
unittest.TestCase().assertTrue(np.array_equal(merged_attr, origin_attr))
else:
if attr != "history_json":
unittest.TestCase().assertEqual(merged_attr, origin_attr)


def verify_variables(merged_group, origin_group, subset_index, both_merged):
for var in origin_group.variables:
merged_var = merged_group.variables[var]
origin_var = origin_group.variables[var]

verify_attrs(merged_var, origin_var, both_merged)

if both_merged:
# both groups require subset indexes
merged_data = merged_var[subset_index[0]]
origin_data = origin_var[subset_index[1]]
else:
# merged group requires a subset index
merged_data = np.resize(merged_var[subset_index], origin_var.shape)
origin_data = origin_var

# verify variable data
if isinstance(origin_data, str):
unittest.TestCase().assertEqual(merged_data, origin_data)
else:
unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=True))


def verify_groups(merged_group, origin_group, subset_index, both_merged=False):
verify_dims(merged_group, origin_group, both_merged)
verify_attrs(merged_group, origin_group, both_merged)
verify_variables(merged_group, origin_group, subset_index, both_merged)

for child_group in origin_group.groups:
merged_subgroup = merged_group[child_group]
origin_subgroup = origin_group[child_group]
verify_groups(merged_subgroup, origin_subgroup, subset_index, both_merged)


# GET TOKEN FROM CMR
def get_token(cmr_root, username, password):
token_api = "https://{}/api/users/tokens".format(cmr_root)
response = requests.get(token_api, auth=(username, password))
content = response.json()
if len(content) > 0:
return content[0].get('access_token')
else:
create_token_api = "https://{}/api/users/token".format(cmr_root)
response = requests.post(create_token_api, auth=(username, password))
content = response.json()
return content.get('access_token')


def download_file(url, local_path, headers):
response = requests.get(url, stream=True, headers=headers)
if response.status_code == 200:
with open(local_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print("Original File downloaded successfully.")
else:
print(f"Failed to download the file. Status code: {response.status_code}")


def test(collection_id, venue):

max_results = 2

username, password = get_username_and_password(venue)
environment = Environment.UAT if venue == "UAT" else Environment.PROD
environment = Environment.UAT if venue.lower() == "uat" else Environment.PROD
harmony_client = Client(auth=(username, password), env=environment)

collection = Collection(id=collection_id)
Expand Down Expand Up @@ -115,74 +218,44 @@ def test(collection_id, venue):

filename = file_names[0]
# Handle time dimension and variables dropping
dt = nc.Dataset(filename, 'r')
groups = list(dt.groups)
dt.close()

drop_variables = [
'time',
'sample',
'meas_ind',
'wvf_ind',
'ddm',
'averaged_l1'
]
if not groups:
groups = [None]
merge_dataset = nc.Dataset(filename, 'r')

for group in groups:
cmr_base_url = "https://cmr.earthdata.nasa.gov/search/granules.umm_json?readable_granule_name="
edl_root = 'urs.earthdata.nasa.gov'

ds = xr.open_dataset(filename, group=group, decode_times=False, drop_variables=drop_variables)
if venue.lower() == 'uat':
cmr_base_url = "https://cmr.uat.earthdata.nasa.gov/search/granules.umm_json?readable_granule_name="
edl_root = 'uat.urs.earthdata.nasa.gov'

token = get_token(edl_root, username, password)
headers = {
"Authorization": f"Bearer {token}"
}

assert len(ds.coords['subset_index']) == max_results
variables = list(ds.variables)
x_var, y_var = get_x_y_variables(variables)
original_files = merge_dataset.variables['subset_files']
assert len(original_files) == max_results

for v in variables:
if v not in ['subset_files', 'lat', 'lon', 'latitude', 'longitude', 'beam_clat', 'beam_clon']:
variable = v
break
for file in original_files:

if x_var is not None and y_var is not None:
break
file_name = file.rsplit(".", 1)[0]
print(file_name)
cmr_query = f"{cmr_base_url}{file_name}&collection_concept_id={collection_id}"
print(cmr_query)

ds.close()

if x_var is None or y_var is None:
raise Exception("Lon and Lat variables are not found")

for index in range(0, max_results):
ax = ds.isel(subset_index=index).plot.scatter(
y=y_var,
x=x_var,
hue=variable,
s=1,
levels=9,
cmap="jet",
aspect=2.5,
size=9
)
plt.xlim(0., 360.)
plt.ylim(-90., 90.)
plt.show(block=False)
plt.close(ax.figure)

ax = ds.plot.scatter(
y=y_var,
x=x_var,
hue=variable,
s=1,
levels=9,
cmap="jet",
aspect=2.5,
size=9
)
plt.xlim(0., 360.)
plt.ylim(-90., 90.)
plt.show(block=False)
plt.close(ax.figure)
response = requests.get(cmr_query, headers=headers)

result = response.json()
links = result.get('items')[0].get('umm').get('RelatedUrls')
for link in links:
if link.get('Type') == 'GET DATA':
data_url = link.get('URL')
parsed_url = urlparse(data_url)
local_file_name = os.path.basename(parsed_url.path)
download_file(data_url, local_file_name, headers)

ds.close()
for i, file in enumerate(original_files):
origin_dataset = nc.Dataset(file)
verify_groups(merge_dataset, origin_dataset, i)


def run():
Expand Down Expand Up @@ -220,7 +293,7 @@ def run():
fails.append(collection)

# Create output files
if output_location:
if output_location:
success_outfile = path.realpath(f'{output_location}/{_args.env}_success.txt')
fail_outfile = path.realpath(f'{output_location}/{_args.env}_fail.txt')

Expand All @@ -234,4 +307,5 @@ def run():


if __name__ == '__main__':
print("Start running test .......")
run()