Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/podaac 4653: Introducing new test pipeline #35

Merged
merged 4 commits into from
Jul 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ jobs:
run: |
poetry run pylint podaac
poetry run flake8 podaac
- name: Install dependencies
run: |
poetry add --dev papermill
- name: Test and coverage
run: |
poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
Expand Down
29 changes: 29 additions & 0 deletions .github/workflows/jupyter_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# This is the main build pipeline that verifies and publishes the software
name: Jupyter Test
# Controls when the workflow will run
on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
# First job in the workflow installs and verifies the software
build:
name: Test Execution
# The type of runner that the job will run on
runs-on: ubuntu-latest
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Create prerequisites
run: |
mkdir current_project
touch current_project/test_in.txt
echo "ASCATC-L2-Coastal" > current_project/test_in.txt
mkdir tests/jupyter/notebooks/output
- name: Run Jupyter notebook
run: |
python3 "./tests/jupyter/notebook_test.py" -n "./tests/jupyter/notebooks/harmony_concise_api_test.ipynb" -e uat -i ./current_project/test_in.txt -o ./tests/jupyter/notebooks//output

116 changes: 116 additions & 0 deletions tests/jupyter/notebook_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import papermill as pm
import json
import os
import argparse

def parse_args():
"""
Parses the program arguments
Returns
-------
args
"""

parser = argparse.ArgumentParser(
description='Update CMR with latest profile',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument('-c', '--collections',
help='List of collection test',
required=False,
metavar='',
type=str)

parser.add_argument('-e', '--env',
help='CMR environment used to pull results from.',
required=True,
choices=["uat", "ops"],
metavar='uat or ops')

parser.add_argument('-n', '--notebook',
help='Notebook to run',
required=True,
metavar='')

parser.add_argument('-i', '--input_file',
help='File of json collections',
required=False,
metavar='')

parser.add_argument('-o', '--output_path',
help='output path for success and fails',
required=False,
metavar='')

args = parser.parse_args()
return args

def run():
"""
Run from command line.

Returns
-------
"""

_args = parse_args()

collection_json = _args.collections
environment = _args.env
notebook = _args.notebook
input_file = _args.input_file

if collection_json:
collections = json.loads(collection_json)
if input_file:
with open(_args.input_file) as json_data:
try:
collections = json.load(json_data)
except ValueError:
collections = []
json_data.seek(0)
lines = json_data.readlines()
for line in lines:
collections.append(line.strip())


notebook = "./tests/jupyter/notebooks/harmony_concise_api_test.ipynb"
notebook_path = os.path.dirname(notebook)
notebook_name = os.path.basename(notebook)

success = []
fails = []

venue = "prod"
if environment == "uat":
venue = "uat"

for collection in collections:

try:
print(collection)
pm.execute_notebook(
notebook,
"{}/output/{}_{}_output_{}".format(notebook_path, collection, environment, notebook_name),
parameters=dict(collection=collection, venue=venue)
)
success.append(collection)
except Exception as ex:
print(ex)
fails.append(collection)

if _args.output_path:
success_outfile = f'{_args.output_path}/{_args.env}_success.txt'
fail_outfile = f'{_args.output_path}/{_args.env}_fail.txt'

if success:
with open(success_outfile, 'w') as the_file:
the_file.writelines(x + '\n' for x in success)

if fails:
with open(fail_outfile, 'w') as the_file:
the_file.writelines(x + '\n' for x in fails)

if __name__ == '__main__':
run()
241 changes: 241 additions & 0 deletions tests/jupyter/notebooks/harmony_concise_api_test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"# Harmony EOSS Concise API Tutorial"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## Before you start\n",
"Before you beginning this tutorial, make sure you have an account in the Earthdata Login UAT or Production environment, which \n",
"will be used for this notebook by visiting [https://uat.urs.earthdata.nasa.gov](https://uat.urs.earthdata.nasa.gov).\n",
"These accounts, as all Earthdata Login accounts, are free to create and only take a moment to set up."
]
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"### Set Up Authentication\n",
"\n",
"We need some boilerplate up front to log in to Earthdata Login. The function below will allow Python\n",
"scripts to log into any Earthdata Login application programmatically. To avoid being prompted for\n",
"credentials every time you run and also allow clients such as curl to log in, you can add the following\n",
"to a `.netrc` (`_netrc` on Windows) file in your home directory:\n",
"\n",
"```\n",
"machine uat.urs.earthdata.nasa.gov\n",
" login <your username>\n",
" password <your password>\n",
" \n",
"machine urs.earthdata.nasa.gov\n",
" login <your username>\n",
" password <your password>\n",
"```\n",
"\n",
"Make sure that this file is only readable by the current user or you will receive an error stating\n",
"\"netrc access too permissive.\"\n",
"\n",
"`$ chmod 0600 ~/.netrc` \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from urllib import request\n",
"from http.cookiejar import CookieJar\n",
"import getpass\n",
"import netrc\n",
"import json\n",
"import requests\n",
"import sys\n",
"import shutil\n",
"import xarray as xr\n",
"import cmr\n",
"import numpy as np\n",
"from podaac.subsetter import subset\n",
"\n",
"def setup_earthdata_login_auth(endpoint):\n",
" \"\"\"\n",
" Set up the request library so that it authenticates against the given Earthdata Login\n",
" endpoint and is able to track cookies between requests. This looks in the .netrc file \n",
" first and if no credentials are found, it prompts for them.\n",
"\n",
" Valid endpoints include:\n",
" uat.urs.earthdata.nasa.gov - Earthdata Login UAT (Harmony's current default)\n",
" urs.earthdata.nasa.gov - Earthdata Login production\n",
" \"\"\"\n",
" try:\n",
" username, _, password = netrc.netrc().authenticators(endpoint)\n",
" except (FileNotFoundError, TypeError):\n",
" # FileNotFound = There's no .netrc file\n",
" # TypeError = The endpoint isn't in the netrc file, causing the above to try unpacking None\n",
" print('Please provide your Earthdata Login credentials to allow data access')\n",
" print('Your credentials will only be passed to %s and will not be exposed in Jupyter' % (endpoint))\n",
" username = input('Username:')\n",
" password = getpass.getpass()\n",
"\n",
" manager = request.HTTPPasswordMgrWithDefaultRealm()\n",
" manager.add_password(None, endpoint, username, password)\n",
" auth = request.HTTPBasicAuthHandler(manager)\n",
"\n",
" jar = CookieJar()\n",
" processor = request.HTTPCookieProcessor(jar)\n",
" opener = request.build_opener(auth, processor)\n",
" request.install_opener(opener)\n",
"\n",
"\n",
"# GET TOKEN FROM CMR \n",
"def get_token( url: str,client_id: str, user_ip: str,endpoint: str) -> str:\n",
" try:\n",
" token: str = ''\n",
" username, _, password = netrc.netrc().authenticators(endpoint)\n",
" xml: str = \"\"\"<?xml version='1.0' encoding='utf-8'?>\n",
" <token><username>{}</username><password>{}</password><client_id>{}</client_id>\n",
" <user_ip_address>{}</user_ip_address></token>\"\"\".format(username, password, client_id, user_ip)\n",
" headers: Dict = {'Content-Type': 'application/xml','Accept': 'application/json'}\n",
" resp = requests.post(url, headers=headers, data=xml)\n",
" \n",
" response_content: Dict = json.loads(resp.content)\n",
" token = response_content['token']['id']\n",
" except:\n",
" print(\"Error getting the token - check user name and password\", sys.exc_info()[0])\n",
" return token"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"### Set up Environment and Collection data\n",
"\n",
"Below we set a default collection (C1940473819-POCLOUD) and environment (prod) which we will use for the podaac Concise test. As the field is tagged with parameters tag, when executing the notebook, they can be provided as inputs from command line, which will overwrite these default values.\n",
"Finding this information would complicate the tutorial- but po.daac has a tutorial available for using the CMR API to find collections and granules of interest. Please see the following tutorial for that information:\n",
"\n",
"PODAAC_CMR.ipynb\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# This cell is tagged with parameters\n",
"\n",
"collection = 'C1940473819-POCLOUD'\n",
"venue = 'prod'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Prod Defaults\n",
"cmr_root = 'cmr.earthdata.nasa.gov'\n",
"harmony_root = 'https://harmony.earthdata.nasa.gov'\n",
"edl_root = 'urs.earthdata.nasa.gov'\n",
"mode = cmr.queries.CMR_OPS\n",
"\n",
"# UAT Defaults\n",
"if venue == 'uat':\n",
" cmr_root = 'cmr.uat.earthdata.nasa.gov'\n",
" harmony_root = 'https://harmony.uat.earthdata.nasa.gov'\n",
" edl_root = 'uat.urs.earthdata.nasa.gov'\n",
" mode = cmr.queries.CMR_UAT\n",
"\n",
"print (\"Environments: \")\n",
"print (\"\\t\" + cmr_root)\n",
"print (\"\\t\" + harmony_root)\n",
"print (\"\\t\" + edl_root)"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## Tets execution\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"Now call the above function to set up Earthdata Login for subsequent requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"setup_earthdata_login_auth(edl_root)\n",
"token_url=\"https://\"+cmr_root+\"/legacy-services/rest/tokens\"\n",
"token=get_token(token_url,'jupyter', '127.0.0.1',edl_root)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Verify test results\n"
]
}
],
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"toc-autonumbering": true
},
"nbformat": 4,
"nbformat_minor": 4
}