Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cryo 184 #60

Merged
merged 46 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
dc580b8
outline for uwg demo notebook
Sep 8, 2023
2940919
add geopandas function
Sep 8, 2023
6207f93
new coiled notebook
Sep 14, 2023
88bcfee
troublshoot coiled env
asteiker Sep 14, 2023
53fc07d
adding h5coro notebook
betolink Sep 18, 2023
c6d8fb7
add gridding workflow, refactor atl reader
Sep 20, 2023
1b86ece
refactor reader and add resample code
Sep 20, 2023
c5d60c5
add affine and resampling
Sep 20, 2023
2dabceb
add geojson for ross sea
Sep 21, 2023
fea1bd3
add explanation for reader
Sep 21, 2023
99b9301
fix typos and grammar
Sep 21, 2023
803a6cd
extract readers to module
Sep 21, 2023
6fd8d98
add aggregation code
Sep 22, 2023
9bb4917
add plotting
Sep 22, 2023
8f6cf5a
adding h5cloud methods for ATL10
betolink Sep 22, 2023
e91e27f
Merge remote-tracking branch 'origin/cryo-184' into cryo-184
betolink Sep 22, 2023
58f2c40
delete unnecessary imports
Sep 22, 2023
0fe084c
Merge branch 'cryo-184' of https://github.com/nsidc/NSIDC-Data-Tutori…
Sep 22, 2023
148497e
fix transform
Sep 23, 2023
de7b3f3
update demo intro text and file renames
asteiker Sep 23, 2023
e12a076
updated text and sample image
asteiker Sep 24, 2023
6c4ed54
added readme and included h5coro with pip install
betolink Sep 24, 2023
7a302c0
fixed transform from x,y to col, row
Sep 24, 2023
856e5a0
fix plotting and gridding
Sep 24, 2023
6012e22
add notebook to work out gridding issue
Sep 24, 2023
7a795dc
add new final plot for ATL10_h5coro
Sep 24, 2023
5717ccd
fix path to plot
Sep 24, 2023
96810b6
fix merge conflicts
Sep 24, 2023
24c56c6
remove geopandas memory usage
Sep 24, 2023
7c86e12
remove warning not about not needing open with h5coro
Sep 24, 2023
cc61a35
change text to reflect use of pqdm
Sep 24, 2023
e0027d2
add note to explain why parquest step not needed
Sep 24, 2023
5cd8b02
remove write to parquet
Sep 24, 2023
bbc409b
respond to question about grids
Sep 24, 2023
caee183
make equations nice
Sep 24, 2023
e2ddc46
trouble shooting read_atl10_local
Sep 24, 2023
08b7ef2
add error to readers
Sep 24, 2023
210922a
updated notebook, now it uses the same function from the script
betolink Sep 25, 2023
453f7d7
minor cleanup, added questions
asteiker Sep 25, 2023
3824416
comment coiled in script and remove todos
asteiker Sep 25, 2023
afcb25c
chenge final product plot
Sep 25, 2023
84235ae
fix regridding
Sep 25, 2023
f637072
remove check on nrow type
Sep 25, 2023
a56dbec
rm old img and copy rendered notebook
asteiker Sep 26, 2023
ae922e6
deleted unused file
betolink Sep 26, 2023
ebc848c
ensure coiled is >=0.9.30
betolink Sep 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion binder/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies:
- netcdf4=1.6.2
- geopy
- pyresample
- earthaccess>=0.5.2
- earthaccess>=0.6.1
- fiona
- zarr
- ipympl
Expand All @@ -35,9 +35,11 @@ dependencies:
- pipreqsnb
- conda-lock>=1.2.1
- mamba>=1.0
- coiled>=0.9.30
- pip
- pip:
- git+https://github.com/icesat2py/icepyx.git
- git+https://github.com/ICESat2-SlideRule/h5coro.git@main
- awscliv2
platforms:
- linux-64
Expand Down
893 changes: 893 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/ATL10-h5coro.ipynb

Large diffs are not rendered by default.

1,733 changes: 1,733 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/ATL10-h5coro_rendered.ipynb

Large diffs are not rendered by default.

115 changes: 115 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/h5cloud/read_atl10.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env python

#import coiled

import geopandas as gpd
import numpy as np
import pandas as pd
from rich import print as rprint
from itertools import product
from pqdm.threads import pqdm


import earthaccess
from h5coro import s3driver, webdriver
import h5coro




def get_strong_beams(f):
"""Returns ground track for strong beams based on IS2 orientation"""
orient = f['orbit_info/sc_orient'][0]

if orient == 0:
return [f"gt{i}l" for i in [1, 2, 3]]
elif orient == 1:
return [f"gt{i}r" for i in [1, 2, 3]]
else:
raise KeyError("Spacecraft orientation neither forward nor backward")






def read_atl10(files, bounding_box=None, executors=4, environment="local", credentials=None):
"""Returns a consolidated GeoPandas dataframe for a set of ATL10 file pointers.

Parameters:
files (list[S3FSFile]): list of authenticated fsspec file references to ATL10 on S3 (via earthaccess)
executors (int): number of threads

"""
if environment == "local":
driver = webdriver.HTTPDriver
else:
driver = s3driver.S3Driver

GPS_EPOCH = pd.to_datetime('1980-01-06 00:00:00')

def read_h5coro(file):
"""Reads datasets required for creating gridded freeboard from a single ATL10 file

file: an authenticated fsspec file reference on S3 (returned by earthaccess)

returns: a list of geopandas dataframes
"""
# Open file object
h5 = h5coro.H5Coro(file, driver, credentials=credentials)

# Get strong beams based on orientation
ancillary_datasets = ["orbit_info/sc_orient", "ancillary_data/atlas_sdp_gps_epoch"]
f = h5.readDatasets(datasets=ancillary_datasets, block=True)
strong_beams = get_strong_beams(f)
atlas_sdp_gps_epoch = f["ancillary_data/atlas_sdp_gps_epoch"][:]

# Create list of datasets to load
datasets = ["freeboard_segment/latitude",
"freeboard_segment/longitude",
"freeboard_segment/delta_time",
"freeboard_segment/seg_dist_x",
"freeboard_segment/heights/height_segment_length_seg",
"freeboard_segment/beam_fb_height",
"freeboard_segment/heights/height_segment_type"]
ds_list = ["/".join(p) for p in list(product(strong_beams, datasets))]
# Load datasets
f = h5.readDatasets(datasets=ds_list, block=True)
# rprint(f["gt2l/freeboard_segment/latitude"], type(f["gt2l/freeboard_segment/latitude"]))

# Create a list of geopandas.DataFrames containing beams
tracks = []
for beam in strong_beams:
ds = {dataset.split("/")[-1]: f[dataset][:] for dataset in ds_list if dataset.startswith(beam)}

# Convert delta_time to datetime
ds["delta_time"] = GPS_EPOCH + pd.to_timedelta(ds["delta_time"]+atlas_sdp_gps_epoch, unit='s')
# we don't need nanoseconds to grid daily let alone weekly
ds["delta_time"] = ds["delta_time"].astype('datetime64[s]')

# Add beam identifier
ds["beam"] = beam

# Set fill values to NaN - assume 100 m as threshold
ds["beam_fb_height"] = np.where(ds["beam_fb_height"] > 100, np.nan, ds["beam_fb_height"])

geometry = gpd.points_from_xy(ds["longitude"], ds["latitude"])
del ds["longitude"]
del ds["latitude"]

gdf = gpd.GeoDataFrame(ds, geometry=geometry, crs="EPSG:4326")
gdf.dropna(axis=0, inplace=True)
if bounding_box is not None:
bbox = [float(coord) for coord in bounding_box.split(",")]
gdf = gdf.cx[bbox[0]:bbox[2],bbox[1]:bbox[3]]
tracks.append(gdf)

df = pd.concat(tracks)
return df

dfs = pqdm(files, read_h5coro, n_jobs=executors)
combined = pd.concat(dfs)

return combined


30 changes: 30 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/h5cloud/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Running and scaling Python with [Coiled serverless functions](https://docs.coiled.io/user_guide/usage/functions/index.html).

This script contains the same code to read ATL10 data as the notebook, the one difference is that we are using a function decorator from Coiled that allows us to execute the function in the cloud with no modifications whatsoever.

The only requirement for this workflow is to have an active account with Coiled and execute this from our terminal:

```bash
coiled login
```

This will open a browser tab to authenticate ourselves with their APIs

> Note: If you would like to test his ask us to include you with Openscapes!


Our functions can be paralleliza, scaling the computation to hundreds of nodes if needed in the same way we could use Amazon lambda functions. Once we install and activate [`nsidc-tutorials`](../../binder/environment.yml) We can run the script with the following python command:

```bash
python workflow.py --bbox="-180, -90, 180, -60" --year=2023 --out="test-2023-local" --env=local

```

This will run the code locally. If we want to run the code in the cloud we'll

```bash
python workflow.py --bbox="-180, -90, 180, -60" --year=2023 --out="test-2023-local" --env=cloud

```

The first time we execute this function, the provisioning will take a couple minutes and will sync our current Python environment with the cloud instances executing our code.
74 changes: 74 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/h5cloud/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python

import coiled

import geopandas as gpd
import numpy as np
import pandas as pd
from rich import print as rprint
from itertools import product
import argparse

import earthaccess
from h5coro import h5coro, s3driver

from read_atl10 import read_atl10

if __name__ == "__main__":

rprint(f"executing locally")
parser = argparse.ArgumentParser()
parser.add_argument('--bbox', help='bbox')
parser.add_argument('--year', help='year to process')
parser.add_argument('--env', help='execute in the cloud or local, default:local')
parser.add_argument('--out', help='output file name')
args = parser.parse_args()


auth = earthaccess.login()

# ross_sea = (-180, -78, -160, -74)
# antarctic = (-180, -90, 180, -60)

year = int(args.year)
bbox = tuple([float(c) for c in args.bbox.split(",")])

print(f"Searching ATL10 data for year {year} ...")
granules = earthaccess.search_data(
short_name = 'ATL10',
version = '006',
cloud_hosted = True,
bounding_box = bbox,
temporal = (f'{args.year}-06-01',f'{args.year}-09-30'),
count=4,
debug=True
)


if args.env == "local":
files = [g.data_links(access="out_of_region")[0] for g in granules]
credentials = earthaccess.__auth__.token["access_token"]

df = read_atl10(files, bounding_box=args.bbox, environment="local", credentials=credentials)
else:
files = [g.data_links(access="direct")[0].replace("s3://", "") for g in granules]
aws_credentials = earthaccess.get_s3_credentials("NSIDC")
credentials = {
"aws_access_key_id": aws_credentials["accessKeyId"],
"aws_secret_access_key": aws_credentials["secretAccessKey"],
"aws_session_token": aws_credentials["sessionToken"]
}

@coiled.function(region= "us-west-2",
memory= "4 GB",
keepalive="1 HOUR")
def cloud_runnner(files, bounding_box, credentials):
df = read_atl10(files, bounding_box=bounding_box, environment="cloud", credentials=credentials)
return df

df = cloud_runnner(files, args.bbox, credentials=credentials)


df.to_parquet(f"{args.out}.parquet")
rprint(df)

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
81 changes: 81 additions & 0 deletions notebooks/ICESat-2_Cloud_Access/ross_sea.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[
-191.09608556432593,
-73.84648052492354
],
[
-196.32896148365322,
-76.01486891873441
],
[
-201.3333475059275,
-79.45419249238772
],
[
-195.62738051734928,
-82.20681871096693
],
[
-189.41756278781764,
-84.1511348270979
],
[
-167.0795373869447,
-84.71222453066771
],
[
-154.94650971884352,
-84.47077199426083
],
[
-147.87987772139172,
-83.76551904624706
],
[
-138.89031336546202,
-83.16126208208007
],
[
-139.89760391715487,
-81.81509135152459
],
[
-145.07462138020958,
-75.8454713912678
],
[
-145.2859453568654,
-73.60545521193768
],
[
-155.7529050321871,
-71.77435794070743
],
[
-173.60352774698885,
-71.50777786832501
],
[
-187.08441940129651,
-71.32576778967325
],
[
-191.09608556432593,
-73.84648052492354
]
]
],
"type": "Polygon"
},
"id": 0
}
]
}
Loading
Loading