-
-
Notifications
You must be signed in to change notification settings - Fork 585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Download from fido search result implemented #1877
Changes from all commits
e64af01
88d8696
ac31163
f51ab0f
bfc4507
119f0c7
b84e54b
70e0fb8
5332702
cd02451
123ec3a
970021c
f9cf9d3
10d2204
cc69d10
9d22610
eb0be02
9fc7613
b785e2b
f0c4e58
df2640e
5c7eacb
52e9713
87b99d5
ebceee1
05da5f0
8319f51
785d43c
95bb4b0
017668a
16fb15c
829809d
7d87eef
999b4cd
b623a57
818f5ae
29c9fd1
8825bc4
930ac97
46fbd46
a1617cc
b5f124d
19857d8
2e3b393
98d169c
8fb9a64
6cf48ba
8240ae1
da6183f
c872b46
98309b1
eae1c13
4097c4b
048702d
945985a
417c181
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -765,3 +765,52 @@ to 10 and therefore removes the 5 entries that been used least recently. | |
21 2011-06-07 06:33:29 ... 17.400000000000002 17.400000000000002 | ||
22 2014-04-09 06:00:12 ... 17.1 17.1 | ||
58 2011-06-06 00:00:00 ... N/A N/A | ||
|
||
|
||
9. Adding entries using the Fido interface | ||
------------------------------------------ | ||
|
||
9.1 Adding entries from a Fido search result | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
A Fido search result can also be used to add new entries to the database. | ||
This is similar to adding entries from VSO query result as shown previously. | ||
The method :meth:`Database.add_from_fido_search_result()` does not download | ||
any files. | ||
|
||
Let's clear the database first. | ||
|
||
>>> database.clear() | ||
|
||
Now get the Fido search result and pass it into the | ||
:meth:`Database.add_from_fido_search_result()` method. | ||
|
||
>>> from sunpy.net import Fido, attrs as a # doctest: +REMOTE_DATA | ||
>>> search_result = Fido.search(a.Time("2012/1/1", "2012/1/2"), | ||
... a.Instrument('lyra')) # doctest: +REMOTE_DATA | ||
>>> database.add_from_fido_search_result(search_result) # doctest: +REMOTE_DATA | ||
>>> len(database) # doctest: +REMOTE_DATA | ||
2 | ||
|
||
9.2 Downloading | ||
~~~~~~~~~~~~~~~ | ||
The method :meth:`Database.download_from_fido_search_result()` downloads the | ||
files from a Fido search result and adds the corresponding entries to the | ||
database. Again, similar to VSO downloading, not the number of records of the | ||
resulting search result determines the number of entries that will be added | ||
to the database. The number of entries that will be added depends on the total | ||
number of FITS headers. The :meth:`Database.download_from_fido_search_result()` | ||
method also accepts an optional keyword argument `path` which determines the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be two backticks as it's not a code reference |
||
download path of each file. Here, the first 2 entries are from the `add` operation | ||
shown above, and the next 4 entries are from the `download` operation. | ||
|
||
>>> database.download_from_fido_search_result(search_result) # doctest: +REMOTE_DATA | ||
>>> print(display_entries(database, ['id', 'observation_time_start', | ||
... 'observation_time_end', 'instrument', 'source'])) # doctest: +REMOTE_DATA | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indentation |
||
id observation_time_start observation_time_end instrument source | ||
--- ---------------------- -------------------- ---------- ------ | ||
1 2012-01-01 00:00:00 2012-01-02 00:00:00 lyra Proba2 | ||
2 2012-01-01 00:00:00 2012-01-02 00:00:00 lyra Proba2 | ||
3 2012-01-01 00:00:00 2012-01-02 00:00:00 lyra Proba2 | ||
4 2012-01-01 00:00:00 2012-01-02 00:00:00 lyra Proba2 | ||
5 2012-01-01 00:00:00 2012-01-02 00:00:00 lyra Proba2 | ||
6 2012-01-01 00:00:00 2012-01-02 00:00:00 lyra Proba2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,12 +19,15 @@ | |
import sunpy | ||
from sunpy.database import commands, tables | ||
from sunpy.database.tables import _create_display_table | ||
from sunpy.io import read_file_header | ||
from sunpy.io.file_tools import UnrecognizedFileTypeError | ||
from sunpy.database.caching import LRUCache | ||
from sunpy.database.commands import CompositeOperation | ||
from sunpy.database.attrs import walker | ||
from sunpy.net.hek2vso import H2VClient | ||
from sunpy.net.attr import and_ | ||
from sunpy.net.vso import VSOClient | ||
from sunpy.net import Fido | ||
from sunpy.extern.six.moves import range | ||
from sunpy.util import deprecated | ||
|
||
|
@@ -446,6 +449,72 @@ def _download_and_collect_entries(self, query_result, **kwargs): | |
entry.download_time = datetime.utcnow() | ||
yield entry | ||
|
||
def _download_and_collect_fido_entries(self, search_result, **kwargs): | ||
|
||
client = kwargs.pop('client', None) | ||
path = kwargs.pop('path', None) | ||
progress = kwargs.pop('progress', False) | ||
methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE')) | ||
overwrite = kwargs.pop('overwrite', False) | ||
|
||
if kwargs: | ||
k, v = kwargs.popitem() | ||
raise TypeError('unexpected keyword argument {0!r}'.format(k)) | ||
|
||
entries_list = tables.entries_from_fido_search_result(search_result, | ||
default_waveunit=self.default_waveunit) | ||
entries_list = list(entries_list) | ||
|
||
remove_list = [] | ||
delete_entries = [] | ||
for sr_entry, temp in zip(search_result, entries_list): | ||
for database_entry in self: | ||
if database_entry.path is not None and sr_entry._compare_attributes(database_entry, | ||
["source", "provider", "physobs", "fileid", "observation_time_start", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would indent this a but more so it dosen't align to the following lines. |
||
"observation_time_end", "instrument", "size", "wavemin", "wavemax"]): | ||
if not overwrite: | ||
remove_list.append(qr) | ||
else: | ||
delete_entries.append(database_entry) | ||
|
||
for temp in remove_list: | ||
search_result = [x for x in search_result if x != temp] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could re-write this using sets rather than the for loop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remembered the reason not to use set difference for this. It will not maintain the ordering of the elements. Should I leave it as it is ? |
||
|
||
for temp in delete_entries: | ||
self.remove(temp) | ||
|
||
paths = Fido.fetch(search_result, progress=progress, path=path) | ||
|
||
for (path, sr_entry) in zip(paths, entries_list): | ||
|
||
try: | ||
read_file_header(path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed this probably shouldn't be here, as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My commit means you don't need this line anymore, I think |
||
if os.path.isfile(path): | ||
entries = tables.entries_from_file(path, self.default_waveunit) | ||
elif os.path.isdir(path): | ||
entries = tables.entries_from_dir(path, self.default_waveunit) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should modify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, so path should never actually be a directory, so we can probably just drop this line or replace it with an exception. |
||
else: | ||
raise ValueError('The path is neither a file nor directory') | ||
|
||
for entry in entries: | ||
entry.source = sr_entry.source | ||
entry.provider = sr_entry.provider | ||
entry.physobs = sr_entry.physobs | ||
entry.fileid = sr_entry.fileid | ||
entry.observation_time_start = sr_entry.observation_time_start | ||
entry.observation_time_end = sr_entry.observation_time_end | ||
entry.instrument = sr_entry.instrument | ||
entry.size = sr_entry.size | ||
entry.wavemin = sr_entry.wavemin | ||
entry.wavemax = sr_entry.wavemax | ||
entry.path = path | ||
entry.download_time = datetime.utcnow() | ||
yield entry | ||
except UnrecognizedFileTypeError: | ||
entry = sr_entry | ||
entry.path = path | ||
yield entry | ||
|
||
@deprecated('0.8', alternative='database.fetch()') | ||
def download(self, *query, **kwargs): | ||
""" | ||
|
@@ -845,6 +914,17 @@ def download_from_vso_query_result(self, query_result, client=None, | |
self.add_many(self._download_and_collect_entries( | ||
query_result, client=client, path=path, progress=progress, overwrite=overwrite)) | ||
|
||
def download_from_fido_search_result(self, search_result, | ||
path=None, wait=True, progress=False, | ||
ignore_already_added=False, overwrite=False): | ||
if not search_result: | ||
return | ||
self.add_many( | ||
self._download_and_collect_fido_entries( | ||
search_result=search_result, path=path, | ||
progress=progress, overwrite=overwrite), | ||
ignore_already_added=ignore_already_added) | ||
|
||
def add_from_vso_query_result(self, query_result, | ||
ignore_already_added=False): | ||
"""Generate database entries from a VSO query result and add all the | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,8 @@ | |
import numpy as np | ||
|
||
from sunpy.time import parse_time, TimeRange | ||
from sunpy.io import fits, file_tools as sunpy_filetools | ||
import sunpy.io | ||
from sunpy.io import file_tools as sunpy_filetools | ||
from sunpy.util import print_table | ||
from sunpy.extern.six.moves import map | ||
from sunpy.extern import six | ||
|
@@ -353,20 +354,41 @@ def _from_query_result_block(cls, qr_block, default_waveunit=None): | |
|
||
@classmethod | ||
def _from_fido_search_result_block(cls, sr_block, default_waveunit=None): | ||
""" | ||
Make a new :class:`DatabaseEntry` instance from a Fido search | ||
"""Make a new :class:`DatabaseEntry` instance from a Fido search | ||
result block. | ||
|
||
Parameters | ||
---------- | ||
sr_block : `sunpy.net.dataretriever.client.QueryResponseBlock` | ||
sr_block : sunpy.net.dataretriever.client.QueryResponseBlock | ||
A query result block is usually not created directly; instead, | ||
one gets instances of | ||
``sunpy.net.dataretriever.client.QueryResponseBlock`` by iterating | ||
over each element of a Fido search result. | ||
default_waveunit : `str`, optional | ||
default_waveunit : str, optional | ||
The wavelength unit that is used if it cannot be found in the | ||
`sr_block`. | ||
|
||
Examples | ||
-------- | ||
>>> from sunpy.net import Fido, attrs | ||
>>> from sunpy.database.tables import DatabaseEntry | ||
>>> sr = Fido.search(attrs.Time("2012/1/1", "2012/1/2"), | ||
... attrs.Instrument('lyra')) # doctest: +REMOTE_DATA | ||
>>> qrbs = list(sr.get_response(0)) # doctest: +REMOTE_DATA | ||
>>> entry = DatabaseEntry._from_fido_search_result_block(qrbs[0]) # doctest: +REMOTE_DATA | ||
>>> entry.source # doctest: +REMOTE_DATA | ||
'Proba2' | ||
>>> entry.provider # doctest: +REMOTE_DATA | ||
'esa' | ||
>>> entry.physobs # doctest: +REMOTE_DATA | ||
'irradiance' | ||
>>> entry.fileid # doctest: +REMOTE_DATA | ||
'http://proba2.oma.be/lyra/data/bsd/2012/01/01/lyra_20120101-000000_lev2_std.fits' | ||
>>> entry.observation_time_start, entry.observation_time_end # doctest: +REMOTE_DATA | ||
(datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 2, 0, 0)) | ||
>>> entry.instrument # doctest: +REMOTE_DATA | ||
'lyra' | ||
|
||
""" | ||
# All attributes of DatabaseEntry that are not in QueryResponseBlock | ||
# are set as None for now. | ||
|
@@ -405,8 +427,9 @@ def _from_fido_search_result_block(cls, sr_block, default_waveunit=None): | |
wavemin = final_values['wavemin'] | ||
wavemax = final_values['wavemax'] | ||
|
||
# sr_block.url of a QueryResponseBlock attribute is stored in fileid | ||
fileid = str(sr_block.url) if sr_block.url is not None else None | ||
#sr_block.url of a QueryResponseBlock attribute is stored in fileid | ||
fileid = getattr(sr_block, 'url', None) | ||
#fileid = str(url_name) if url_name is not None else None | ||
size = None | ||
return cls( | ||
source=source, provider=provider, physobs=physobs, fileid=fileid, | ||
|
@@ -595,8 +618,7 @@ def entries_from_fido_search_result(sr, default_waveunit=None): | |
yield DatabaseEntry._from_fido_search_result_block(block, default_waveunit) | ||
|
||
|
||
def entries_from_file(file, default_waveunit=None, | ||
time_string_parse_format=None): | ||
def entries_from_file(file, default_waveunit=None, time_string_parse_format=None): | ||
"""Use the headers of a FITS file to generate an iterator of | ||
:class:`sunpy.database.tables.DatabaseEntry` instances. Gathered | ||
information will be saved in the attribute `fits_header_entries`. If the | ||
|
@@ -653,11 +675,12 @@ def entries_from_file(file, default_waveunit=None, | |
111 | ||
|
||
""" | ||
headers = fits.get_header(file) | ||
headers = sunpy.io.read_file_header(file) | ||
if isinstance(file, (str, six.text_type)): | ||
filename = file | ||
else: | ||
filename = getattr(file, 'name', None) | ||
|
||
for header in headers: | ||
entry = DatabaseEntry(path=filename) | ||
for key, value in six.iteritems(header): | ||
|
@@ -671,7 +694,7 @@ def entries_from_file(file, default_waveunit=None, | |
entry.fits_key_comments.append(FitsKeyComment(k, v)) | ||
continue | ||
entry.fits_header_entries.append(FitsHeaderEntry(key, value)) | ||
waveunit = fits.extract_waveunit(header) | ||
waveunit = sunpy.io.fits.extract_waveunit(header) | ||
entry.hdu_index = headers.index(header) | ||
if waveunit is None: | ||
waveunit = default_waveunit | ||
|
@@ -681,7 +704,13 @@ def entries_from_file(file, default_waveunit=None, | |
unit = Unit(waveunit) | ||
except ValueError: | ||
raise WaveunitNotConvertibleError(waveunit) | ||
try: | ||
instrument_name = next(x for x in entry.fits_header_entries if x.key == 'TELESCOP').value | ||
except Exception: | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be |
||
|
||
for header_entry in entry.fits_header_entries: | ||
|
||
key, value = header_entry.key, header_entry.value | ||
if key == 'INSTRUME': | ||
entry.instrument = value | ||
|
@@ -694,15 +723,23 @@ def entries_from_file(file, default_waveunit=None, | |
# NOTE: the key DATE-END or DATE_END is not part of the official | ||
# FITS standard, but many FITS files use it in their header | ||
elif key in ('DATE-END', 'DATE_END'): | ||
entry.observation_time_end = parse_time( | ||
value, | ||
_time_string_parse_format=time_string_parse_format | ||
) | ||
try: | ||
entry.observation_time_end = parse_time(value, _time_string_parse_format=time_string_parse_format) | ||
except ValueError: | ||
if 'goes' in instrument_name.lower(): | ||
entry.observation_time_end = datetime.strptime(value, | ||
'%d/%m/%Y') | ||
# else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. uncomment this |
||
# raise | ||
elif key in ('DATE-OBS', 'DATE_OBS'): | ||
entry.observation_time_start = parse_time( | ||
value, | ||
_time_string_parse_format=time_string_parse_format | ||
) | ||
try: | ||
entry.observation_time_start = parse_time(value, _time_string_parse_format=time_string_parse_format) | ||
except ValueError: | ||
if 'goes' in instrument_name.lower(): | ||
entry.observation_time_start = datetime.strptime(value, | ||
'%d/%m/%Y') | ||
# else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and this |
||
# raise | ||
yield entry | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
indentation error