Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Functionality to client to download new GOES 16/17 data and reprocessed 13/14/15 #4394

Merged
merged 34 commits into from Nov 21, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
bbfd98d
adding functionality to client to download new GOES 16/17 data and al…
hayesla Jul 27, 2020
49fe2c6
fixing tests
hayesla Jul 28, 2020
e5878b5
adding detail to register_values
hayesla Jul 28, 2020
c7d7cf8
fixing source
hayesla Jul 28, 2020
883bb96
fixing database fido search
hayesla Jul 28, 2020
1b6a0cb
updating the register_values
hayesla Jul 28, 2020
9af4f20
adding changelog
hayesla Jul 28, 2020
486c155
merging master
hayesla Sep 17, 2020
8e1a0bd
updating to new generic client
hayesla Sep 17, 2020
c308057
pre-commit fixes
hayesla Sep 17, 2020
39d2fc1
Merge branch 'master' into new_goes_fido
hayesla Sep 24, 2020
bc44a59
removing VersionData attrs - now search old by Provider
hayesla Sep 24, 2020
2625734
Merge branch 'master' into new_goes_fido
hayesla Oct 19, 2020
fd6ec76
Merge branch 'master' into new_goes_fido
hayesla Oct 19, 2020
299bcac
incl overlapping providers
hayesla Oct 19, 2020
1a8e52b
fixing mistake in test
hayesla Oct 20, 2020
67108d8
Merge branch 'new_goes_fido' of https://github.com/hayesla/sunpy into…
hayesla Oct 20, 2020
51db86c
wildcard for file namechanges (already happened!)
hayesla Oct 20, 2020
4e1f5d7
Merge branch 'master' into new_goes_fido
hayesla Oct 23, 2020
2854954
Merge branch 'master' of github.com:sunpy/sunpy into pr/4394
nabobalis Nov 15, 2020
dfdf7a3
some minor style changes
nabobalis Nov 15, 2020
43cd3bb
fixing test
hayesla Nov 15, 2020
af3d45c
Merge branch 'master' into new_goes_fido
hayesla Nov 18, 2020
df69aa2
Add some HTTP 429 backoff code in scraper
Cadair Nov 18, 2020
9fc43f6
Fix extern.parse logging bug
Cadair Nov 18, 2020
640cfed
updating from comments
hayesla Nov 19, 2020
f8ec6e3
updating doc string
hayesla Nov 19, 2020
ddc048c
cleaning up with function added
hayesla Nov 20, 2020
1959192
Merge branch 'master' into new_goes_fido
hayesla Nov 20, 2020
3a5d94e
tidying up
hayesla Nov 20, 2020
cdc8891
fixing format input
hayesla Nov 20, 2020
a8a5128
incl. sat number keyword
hayesla Nov 20, 2020
1aed943
trying shorter timerange to see if time for search/download issue
hayesla Nov 20, 2020
8e813f0
Merge branch 'master' into new_goes_fido
hayesla Nov 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/4394.feature.rst
@@ -0,0 +1 @@
Add support to the GOES XRS fido client to download new GOES 16 & 17 data reprocessed GOES 13, 14 and 15 data.
5 changes: 3 additions & 2 deletions sunpy/database/tests/test_tables.py
Expand Up @@ -144,8 +144,9 @@ def test_entries_from_fido_search_result(fido_search_result):
wavemin=0.1, wavemax=30.4)
# 2 entries from goes
assert entries[56] == DatabaseEntry(
source='nasa', provider='sdac', physobs='irradiance',
fileid='https://umbra.nascom.nasa.gov/goes/fits/2012/go1520120101.fits',
source='noaa', provider='sdac', physobs='irradiance',
hayesla marked this conversation as resolved.
Show resolved Hide resolved
fileid='https://satdat.ngdc.noaa.gov/sem/goes/data/science/xrs/goes15/'
'gxrs-l2-irrad_science/2012/01/sci_gxrs-l2-irrad_g15_d20120101_v0-0-0.nc',
observation_time_start=datetime(2012, 1, 1, 0, 0),
observation_time_end=datetime(2012, 1, 1, 23, 59, 59, 999000),
wavemin=np.nan, wavemax=np.nan,
Expand Down
11 changes: 10 additions & 1 deletion sunpy/net/dataretriever/attrs/goes.py
@@ -1,9 +1,18 @@
from sunpy.net.attr import SimpleAttr

__all__ = ['SatelliteNumber']
__all__ = ["SatelliteNumber", "VersionData"]


class SatelliteNumber(SimpleAttr):
"""
The GOES Satellite Number
"""


class VersionData(SimpleAttr):
hayesla marked this conversation as resolved.
Show resolved Hide resolved
"""
The version of the data. The GOES 13, 14 and
15 data has now been preprocessed but we still
want the availability of the old data for
reproducibility.
"""
49 changes: 32 additions & 17 deletions sunpy/net/dataretriever/sources/goes.py
Expand Up @@ -5,7 +5,6 @@
import os
from datetime import datetime
from itertools import compress
from urllib.parse import urlsplit

import astropy.units as u
from astropy.time import Time, TimeDelta
Expand Down Expand Up @@ -68,7 +67,8 @@ def _get_goes_sat_num(self, date):
12: TimeRange("2002-12-13", "2007-05-08"),
13: TimeRange("2006-08-01", "2006-08-01"),
14: TimeRange("2009-12-02", "2010-10-04"),
15: TimeRange("2010-09-01", parse_time("now")),
15: TimeRange("2010-09-01", "2020-03-04"),
16: TimeRange("2020-03-04", parse_time("now"))
}

results = []
Expand All @@ -91,10 +91,10 @@ def _get_goes_sat_num(self, date):
def _get_time_for_url(self, urls):
times = []
for uri in urls:
uripath = urlsplit(uri).path

# Extract the yymmdd or yyyymmdd timestamp
datestamp = os.path.splitext(os.path.split(uripath)[1])[0][4:]
if uri.endswith('fits'):
datestamp = os.path.splitext(os.path.basename(uri))[0][4:]
else:
datestamp = os.path.basename(uri).split('_')[3][1:]

# 1999-01-15 as an integer.
if int(datestamp) <= 990115:
Expand All @@ -121,16 +121,30 @@ def _get_url_for_timerange(self, timerange, **kwargs):
`list`
The URL(s) for the corresponding timerange.
"""
timerange = TimeRange(timerange.start.strftime('%Y-%m-%d'), timerange.end)
if timerange.end < parse_time("1999/01/15"):
goes_file = "%Y/go{satellitenumber:02d}%y%m%d.fits"
elif timerange.start < parse_time("1999/01/15") and timerange.end >= parse_time("1999/01/15"):
return self._get_overlap_urls(timerange, **kwargs)
else:
goes_file = "%Y/go{satellitenumber}%Y%m%d.fits"

goes_pattern = f"https://umbra.nascom.nasa.gov/goes/fits/{goes_file}"
satellitenumber = kwargs.get("satellitenumber", self._get_goes_sat_num(timerange.start))
versiondata = kwargs.get("versiondata", None) # this is for the old data before it was re-processed by NOAA

if satellitenumber < 13 or (satellitenumber in [13, 14, 15] and versiondata == 'old'):
timerange = TimeRange(timerange.start.strftime('%Y-%m-%d'), timerange.end)
if timerange.end < parse_time("1999/01/15"):
goes_file = "%Y/go{satellitenumber:02d}%y%m%d.fits"
elif timerange.start < parse_time("1999/01/15") and timerange.end >= parse_time("1999/01/15"):
return self._get_overlap_urls(timerange, **kwargs)
else:
goes_file = "%Y/go{satellitenumber}%Y%m%d.fits"

goes_pattern = f"https://umbra.nascom.nasa.gov/goes/fits/{goes_file}"

elif satellitenumber in [13, 14, 15]:
goes_file = f"sci_gxrs-l2-irrad_g{satellitenumber}_d%Y%m%d_v0-0-0.nc"
goes_pattern = f"https://satdat.ngdc.noaa.gov/sem/goes/data/science/xrs/goes{satellitenumber}/gxrs-l2-irrad_science/%Y/%m/{goes_file}"

else:
goes_file = "sci_xrsf-l2-flx1s_g{satellitenumber}_d%Y%m%d_v2-0-1.nc"
goes_pattern = f"https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes{satellitenumber}" \
f"/l2/data/xrsf-l2-flx1s_science/%Y/%m/{goes_file}"

goes_files = Scraper(goes_pattern, satellitenumber=satellitenumber)

return goes_files.filelist(timerange)
Expand Down Expand Up @@ -159,7 +173,7 @@ def _makeimap(self):
"""
Helper function used to hold information about source.
"""
self.map_["source"] = "nasa"
self.map_["source"] = "noaa"
hayesla marked this conversation as resolved.
Show resolved Hide resolved
self.map_["instrument"] = "goes"
self.map_["physobs"] = "irradiance"
self.map_["provider"] = "sdac"
Expand All @@ -178,7 +192,7 @@ def _can_handle_query(cls, *query):
boolean
answer as to whether client can service the query
"""
chkattr = ["Time", "Instrument", "SatelliteNumber"]
chkattr = ["Time", "Instrument", "SatelliteNumber", "VersionData"]
chklist = [x.__class__.__name__ in chkattr for x in query]
for x in query:
if x.__class__.__name__ == "Instrument" and x.value.lower() in (
Expand All @@ -199,7 +213,8 @@ def register_values(cls):
adict = {attrs.Instrument: [
("GOES", "The Geostationary Operational Environmental Satellite Program."),
("XRS", "GOES X-ray Flux")],
attrs.goes.SatelliteNumber: [(str(x), f"GOES Satellite Number {x}") for x in goes_number]}
attrs.goes.SatelliteNumber: [(str(x), f"GOES Satellite Number {x}") for x in goes_number],
attrs.goes.VersionData: [('old', "old calibrated XRS data before NOAA re-processed it")]}
return adict


Expand Down
29 changes: 26 additions & 3 deletions sunpy/net/dataretriever/sources/tests/test_goes_ud.py
Expand Up @@ -28,7 +28,12 @@ def LCClient():
'https://umbra.nascom.nasa.gov/goes/fits/1995/go07950605.fits'),
(TimeRange('2008/06/02', '2008/06/04'),
'https://umbra.nascom.nasa.gov/goes/fits/2008/go1020080602.fits',
'https://umbra.nascom.nasa.gov/goes/fits/2008/go1020080604.fits')])
'https://umbra.nascom.nasa.gov/goes/fits/2008/go1020080604.fits'),
(TimeRange('2020/02/02', '2020/02/04'),
'https://satdat.ngdc.noaa.gov/sem/goes/data/science/xrs/goes15/gxrs-l2-irrad_science/2020/02/'
'sci_gxrs-l2-irrad_g15_d20200202_v0-0-0.nc',
'https://satdat.ngdc.noaa.gov/sem/goes/data/science/xrs/goes15/gxrs-l2-irrad_science/2020/02/'
'sci_gxrs-l2-irrad_g15_d20200204_v0-0-0.nc')])
def test_get_url_for_time_range(LCClient, timerange, url_start, url_end):
urls = LCClient._get_url_for_timerange(timerange)
assert isinstance(urls, list)
Expand All @@ -48,6 +53,24 @@ def test_get_overlap_urls(LCClient, timerange, url_start, url_end):
assert urls[-1] == url_end


@pytest.mark.remote_data
@pytest.mark.parametrize("timerange, url_old, url_new",
[(Time('2013/10/28', '2013/10/29'),
"https://umbra.nascom.nasa.gov/goes/fits/2013/go1520131028.fits",
"https://satdat.ngdc.noaa.gov/sem/goes/data/science/xrs/goes15/gxrs-l2-irrad_science/"
"2013/10/sci_gxrs-l2-irrad_g15_d20131028_v0-0-0.nc")])
def test_old_data_access(timerange, url_old, url_new):
# test first for old data
qr = Fido.search(timerange, a.Instrument("XRS"), a.goes.VersionData("old"))
urls = [r.url for r in qr.get_response(0)]
assert urls[0] == url_old

# now test for new data
qr = Fido.search(timerange, a.Instrument("XRS"))
urls = [r.url for r in qr.get_response(0)]
assert urls[0] == url_new


@given(goes_time())
def test_can_handle_query(time):
ans1 = goes.XRSClient._can_handle_query(time, Instrument('XRS'))
Expand All @@ -70,14 +93,14 @@ def test_fixed_satellite(LCClient):
a.Instrument.xrs)

for resp in ans1:
assert "go15" in resp.url
assert "g15" in resp.url

ans1 = LCClient.search(a.Time("2017/01/01", "2017/01/02"),
a.Instrument.xrs,
a.goes.SatelliteNumber(13))

for resp in ans1:
assert "go13" in resp.url
assert "g13" in resp.url

ans1 = LCClient.search(a.Time("1999/1/13", "1999/1/16"),
a.Instrument.xrs,
Expand Down