Accessing via the API per the suggested jupyter notebook was complex. Trying earthaccess python package. 


In [None]:
import earthaccess
import rioxarray as rxr
import xarray as xr
import fsspec
import datetime 

In [None]:
# auth = earthaccess.login()
auth = earthaccess.login(strategy="netrc")


Using date + time in the format of how it is returned from Nasa EarthData (%Y-%m-%dT%H%M). It has more detail than minutes, but I think that is sufficient for the search 

In [None]:
today = datetime.datetime.today() 
start_date = today - datetime.timedelta(days=4)
start_date = start_date.strftime("%Y-%m-%dT%H")
end_date = today.strftime("%Y-%m-%dT%H") # If needed to the second in their format T%H:%M:%SZ"
print(start_date)
print(end_date)

In [None]:


granules = earthaccess.search_data(
    # count=100,
    short_name='SPL4SMGP',
    version='008',
    daac='NSIDC',
    provider='NSIDC_ECS',
    doi='10.5067/T5RUATAQREF8',
    bounding_box=(-126, 24, -65, 50),
    temporal=(f"{start_date}", f"{end_date}"),
    sort_key="-end_date",
)

This search returns a list of datagranules. I need to sort them and pull out the latest manually. 

In [None]:
item = granules[0]
print(item)

In [None]:

# latest = max(granules, key=lambda g: datetime.fromisoformat(g.temporal_coverage["end"]))
url = item.data_links()[0]
print(url)


In [None]:
# Can download directly this way if desired
# earthaccess.download(data, "./data/smap/", provider='NSIDC_ECS')

Using Earth access I can get a file-like object HTTPFileSystem. But it can't be opened directly with xarray (h5netcdf gives a "unhashable type list" error, and netcdf4 cannot read bytes or file-like object). I can access the url by using .data_links()[0] with the data search result. 

I tried using: 

`fs = fsspec.filesystem("https")
with fs.open(url, mode="rb") as f:
    ds_smap = xr.open_dataset(f, engine="h5netcdf")`

But this gave a 401 unauthorized error. 

Earthaccess provides a earthaccess.get_fsspec() to authenticate using the url. 

In [None]:
fs = earthaccess.get_fsspec_https_session()
# url = granules[0].data_links()[0] 
with fs.open(url, mode="rb") as f:
    ds_smap = xr.open_dataset(f, engine="h5netcdf", group='Geophysical_Data')
    ds_vars_smap = ds_smap[["sm_surface_wetness", "vegetation_greenness_fraction", "surface_temp"]]
    ds_vars_smap.load()

In [None]:
print(url)
ds_vars_smap["sm_surface_wetness"].mean()

In [None]:
ds_vars_smap = ds_smap[["sm_surface_wetness", "vegetation_greenness_fraction", "surface_temp"]]
ds_vars_smap.load()

In [None]:
ds_sw = ds_vars_smap["sm_surface_wetness"]

In [None]:
ds_sw.plot()

## workflow when downloading file

In [None]:
# smap_20250604 = xr.open_mfdataset(httpfile, engine='h5netcdf') #need to have dask installed to use mfdataset. Couldn't get "open_dataset" to work
ds_smap = xr.open_dataset("data/smap/SMAP_L4_SM_gph_20250604T223000_Vv8011_001.h5", engine="h5netcdf", group='Geophysical_Data')
ds_smap_all = xr.open_dataset("data/smap/SMAP_L4_SM_gph_20250604T223000_Vv8011_001.h5", engine="h5netcdf")

In [None]:
ds_smap

In [None]:
ds_smap_combined = ds_smap.merge(ds_smap_all[['cell_column', 'cell_row']]) #The data is on a projected grid (EASE 2.0), so I select the row and column rather than lat/long. 

In [None]:
ds_smap_combined

In [None]:
# ds_smap_us = ds_smap.sel(y=slice(5556000, 2667000), x=slice(-14010000, -7223000)) Worked for original, but I think it needs to be by row/column for the geophysical_data
import math 

pixel_size = 9024.13
y_min = int(math.ceil(2667000 / pixel_size))
y_max = int(math.ceil(5556000 / pixel_size))
x_min = int(math.ceil(-14010000 / pixel_size))
x_max = int(math.ceil(-7223000 / pixel_size))



In [None]:
# ds_smap_us = ds_smap.sel(y=slice(y_min, y_max), x=slice(x_min, x_max)) # slicing by cells. Worked before based on ds without coordinates. 
# ds_smap_combined_us = ds_smap_combined.sel(y=slice(y_min, y_max), x=slice(x_min, x_max)) #Didn't work, becuase it now has coordinates.
ds_smap_combined_us = ds_smap_combined.sel(y=slice(5656000, 2667000), x=slice(-12310000, -6323000))

In [None]:
ds_smap_combined_us['sm_surface_wetness'].plot()

In [None]:
ds_combined_6931 = ds_smap_combined_us.rio.write_crs("EPSG:6933", inplace=True)

In [None]:
ds_combined_6931['sm_surface_wetness'].plot()

In [None]:
ds_smap_analysis = ds_smap_combined_us[['sm_surface', 'sm_surface_wetness', 'precipitation_total_surface_flux', 'overland_runoff_flux']]

In [None]:
ds_smap_combined_us['sm_surface_wetness']

In [None]:
ds_smap_sw = ds_smap_combined_us['sm_surface_wetness']
# ds_smap_sw_masked = ds_smap_sw.where(ds_smap_sw != ds_smap_sw.attrs['fmissing_value']) Didn't seem to do anything. 

In [None]:
ds_smap_5070 = ds_smap_analysis.rio.reproject("EPSG:5070")

In [None]:
ds_smap_5070['sm_surface_wetness'].plot()

In [None]:
# ds_smap_5070.to_netcdf("data/smap/ds_smap_5070.nc")