Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for file-handlers and file-type in Map and TimeSeries #5193

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/5193.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support for file-object and file-type in `~sunpy.map.Map` and `~sunpy.timeseries.TimeSeries`.
4 changes: 4 additions & 0 deletions sunpy/io/ana.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

The reading and writing of ana files is not supported under Windows.
"""
import io
import os
import collections

Expand Down Expand Up @@ -44,6 +45,9 @@ def read(filename, debug=False, **kwargs):
--------
>>> data = sunpy.io.ana.read(filename) # doctest: +SKIP
"""
if isinstance(filename, io.IOBase):
raise TypeError("Reader does not support file-handler")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
raise TypeError("Reader does not support file-handler")
raise TypeError("The ANA reader does not support file handles.")


if not os.path.isfile(filename):
raise OSError("File does not exist!")

Expand Down
19 changes: 13 additions & 6 deletions sunpy/io/file_tools.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""
This module provides a generic file reader.
"""
import io
import os
import re
import pathlib
from itertools import chain

try:
from . import fits
Expand Down Expand Up @@ -59,7 +61,7 @@ def read_file(filepath, filetype=None, **kwargs):

Parameters
----------
filepath : path-like
filepath : `str` or file-like
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does a pathlib.Path not work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no it works,
will update👍

The file to be read.
filetype : `str`, optional
Supported reader or extension to manually specify the filetype.
Expand All @@ -77,19 +79,24 @@ def read_file(filepath, filetype=None, **kwargs):
-----
Other keyword arguments are passed to the reader used.
"""
filepath = str(pathlib.Path(filepath))
_filepath = filepath.name if isinstance(filepath, io.IOBase) else filepath
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not all objects which implement io.IOBase have a .name attribute:

>>> import io
>>> isinstance(io.BytesIO(), io.IOBase)
True
>>> io.BytesIO().name
AttributeError: '_io.BytesIO' object has no attribute 'name'

I don't think we should do this at all when dealing with io objects, I think it would be better to work out the type from the magic bytes at the start of the file and not any reference to the filename. (Which already happens if this filename doesn't match anything I think).

_filepath = pathlib.Path(_filepath)

# Use the explicitly passed filetype
if filetype is not None:
if filetype:
if filetype not in chain(*_known_extensions):
raise UnrecognizedFileTypeError(
"The requested filetype is not currently supported by SunPy.")
return _readers[filetype].read(filepath, **kwargs)

# Go through the known extensions
for extension, readername in _known_extensions.items():
if filepath.endswith(extension) or filetype in extension:
if _filepath.suffix.endswith(extension):
return _readers[readername].read(filepath, **kwargs)

# If filetype is not apparent from the extension, attempt to detect it
readername = _detect_filetype(filepath)
return _readers[readername].read(filepath, **kwargs)
readername = _detect_filetype(_filepath)
return _readers[readername].read(_filepath, **kwargs)


def read_file_header(filepath, filetype=None, **kwargs):
Expand Down
4 changes: 4 additions & 0 deletions sunpy/io/jp2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module provides a JPEG 2000 file reader.
"""
import io
import collections
from xml.etree import cElementTree as ET

Expand All @@ -26,6 +27,9 @@ def read(filepath, **kwargs):
pairs : `list`
A list of (data, header) tuples.
"""
if isinstance(filepath, io.IOBase):
raise TypeError("Reader does not support file-handler")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
raise TypeError("Reader does not support file-handler")
raise TypeError("The ANA reader does not support file handles.")


# Put import here to speed up sunpy.io import time
from glymur import Jp2k
header = get_header(filepath)
Expand Down
46 changes: 42 additions & 4 deletions sunpy/io/tests/test_filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ def test_read_file_fits(self):
assert all([isinstance(p[1],
sunpy.io.header.FileHeader) for p in pairs])

# Test file-object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add a test here to check it works as expected with other file objects, something like this:

with open(AIA_171_IMAGE, 'rb') as fd:
    fits_bytes = fd.read()

sunpy.io.read_file(io.BytesIO(fits_bytes))
...

with open(AIA_171_IMAGE, 'rb') as fd:
aiapair = sunpy.io.read_file(fd)
assert isinstance(aiapair, list)
assert len(aiapair) == 1
assert len(aiapair[0]) == 2
assert isinstance(aiapair[0][0], np.ndarray)
assert isinstance(aiapair[0][1], sunpy.io.header.FileHeader)

def test_read_file_fits_gzip(self):
# Test read gzipped fits file
for fits_extension in [".fts", ".fit", ".fits"]:
Expand All @@ -59,22 +68,43 @@ def test_read_file_fits_gzip(self):
@skip_glymur
def test_read_file_jp2(self):
# Test read jp2
pair = sunpy.io.read_file(os.path.join(sunpy.data.test.rootdir,
"2013_06_24__17_31_30_84__SDO_AIA_AIA_193.jp2"))

sdo_aia_jp2 = os.path.join(sunpy.data.test.rootdir, "2013_06_24__17_31_30_84__SDO_AIA_AIA_193.jp2")
# Test filepath
pair = sunpy.io.read_file(sdo_aia_jp2)
assert isinstance(pair, list)
assert len(pair) == 1
assert len(pair[0]) == 2
assert isinstance(pair[0][0], np.ndarray)
assert isinstance(pair[0][1], sunpy.io.header.FileHeader)

# Test exception for file object
with pytest.raises(TypeError, match="Reader does not support file-handler"):
with open(sdo_aia_jp2, 'rb') as fd:
pair = sunpy.io.read_file(fd)

def test_read_file_header_fits(self):
# Test FITS
hlist = sunpy.io.read_file_header(AIA_171_IMAGE)
assert isinstance(hlist, list)
assert len(hlist) == 1
assert isinstance(hlist[0], sunpy.io.header.FileHeader)

def test_read_file_exceptions(self):
# Test invalid filetype
with pytest.raises(sunpy.io.file_tools.UnrecognizedFileTypeError,
match="The requested filetype is not currently supported by SunPy"):
sunpy.io.read_file(AIA_171_IMAGE, "invalid_extension")

def test_detect_filetype(self):
# Test the detection logic
assert sunpy.io.detect_filetype(AIA_171_IMAGE) == "fits"

sdo_aia_jp2 = os.path.join(sunpy.data.test.rootdir, "2013_06_24__17_31_30_84__SDO_AIA_AIA_193.jp2")
assert sunpy.io.detect_filetype(sdo_aia_jp2) == "jp2"

goes_truncated = os.path.join(sunpy.data.test.rootdir, 'goes_truncated_test_goes15.nc')
assert sunpy.io.detect_filetype(goes_truncated) == "hdf5"

@skip_glymur
def test_read_file_header_jp2(self):
# Test jp2
Expand Down Expand Up @@ -110,13 +140,21 @@ def test_write_file_fits_bytes(self):

@skip_ana
def test_read_file_ana(self):
ana_data = sunpy.io.read_file(os.path.join(sunpy.data.test.rootdir, "test_ana.fz"))
# Test read ana
ana_test_file = os.path.join(sunpy.data.test.rootdir, "test_ana.fz")
# Test filepath
ana_data = sunpy.io.read_file(ana_test_file)
assert isinstance(ana_data, list)
assert len(ana_data) == 1
assert len(ana_data[0]) == 2
assert isinstance(ana_data[0][0], np.ndarray)
assert isinstance(ana_data[0][1], sunpy.io.header.FileHeader)

# Test file object
with pytest.raises(TypeError, match="Reader does not support file-handler"):
with open(ana_test_file, 'rb') as fd:
ana_data = sunpy.io.read_file(fd)

@skip_ana
def test_read_file__header_ana(self):
ana_data = sunpy.io.read_file_header(os.path.join(sunpy.data.test.rootdir, "test_ana.fz"))
Expand Down
19 changes: 15 additions & 4 deletions sunpy/map/map_factory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
import io
import pathlib
from collections import OrderedDict
from urllib.request import Request
Expand Down Expand Up @@ -115,6 +115,11 @@ class MapFactory(BasicRegistrationFactory):

>>> mymap = sunpy.map.Map('file1.fits') # doctest: +SKIP

* File handlers

>>> with open('file1.fits', 'rb') as fd: # doctest: +SKIP
... mymap = sunpy.map.Map(fd) # doctest: +SKIP

* All fits files in a directory by giving a directory

>>> mymap = sunpy.map.Map('local_dir/sub_dir') # doctest: +SKIP
Expand Down Expand Up @@ -146,6 +151,10 @@ class MapFactory(BasicRegistrationFactory):
* Any mixture of the above not in a list

>>> mymap = sunpy.map.Map(((data, header), data2, header2, 'file1.fits', url_str, 'eit_*.fits')) # doctest: +SKIP

* Manually specify the file-type

>>> mymap = sunpy.map.Map('filename.fits', filetype='fits') # doctest: +SKIP
"""

def _read_file(self, fname, **kwargs):
Expand All @@ -154,11 +163,9 @@ def _read_file(self, fname, **kwargs):
"""
# File gets read here. This needs to be generic enough to seamlessly
# call a fits file or a jpeg2k file, etc
# NOTE: use os.fspath so that fname can be either a str or pathlib.Path
# This can be removed once read_file supports pathlib.Path
log.debug(f'Reading {fname}')
try:
pairs = read_file(os.fspath(fname), **kwargs)
pairs = read_file(fname, **kwargs)
except Exception as e:
msg = f"Failed to read {fname}."
raise IOError(msg) from e
Expand Down Expand Up @@ -289,6 +296,10 @@ def _parse_url(self, arg, **kwargs):
def _parse_path(self, arg, **kwargs):
return parse_path(arg, self._read_file, **kwargs)

@_parse_arg.register(io.IOBase)
def _parse_fileobj(self, arg, **kwargs):
return self._read_file(arg, **kwargs)

def __call__(self, *args, composite=False, sequence=False, silence_errors=False, **kwargs):
""" Method for running the factory. Takes arbitrary arguments and
keyword arguments and passes them to a sequence of pre-registered types
Expand Down
9 changes: 9 additions & 0 deletions sunpy/map/tests/test_map_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def test_patterns(self):
eitmap = sunpy.map.Map(a_fname)
assert isinstance(eitmap, sunpy.map.GenericMap)

# File name with file-type
eitmap = sunpy.map.Map(a_fname, filetype=pathlib.Path(a_fname).suffix[1:])
assert isinstance(eitmap, sunpy.map.GenericMap)
Comment on lines +84 to +85
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You aren't really testing this properly here as you are giving it a filename which it can detect the type for. You would be better off giving it a random filename and the type.

(Although it will also use magic bytes to detect file type if the filename doesn't pan out so I am not sure exactly how to make this test fail.)


# Directory
directory = pathlib.Path(filepath, "EIT")
maps = sunpy.map.Map(os.fspath(directory))
Expand All @@ -98,6 +102,11 @@ def test_patterns(self):
assert isinstance(maps, list)
assert ([isinstance(amap, sunpy.map.GenericMap) for amap in maps])

# File-like object
with open(a_fname, "rb") as fd:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if you pass random binary data to map?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it returns a ValueError

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a test for this in this file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not currently, should I add one?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you could.

fdmap = sunpy.map.Map(fd)
assert isinstance(fdmap, sunpy.map.GenericMap)

# Glob
pattern = os.path.join(filepath, "EIT", "*")
maps = sunpy.map.Map(pattern)
Expand Down
17 changes: 17 additions & 0 deletions sunpy/timeseries/tests/test_timeseries_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,18 @@ def test_table_to_ts(self):
# Test some other options
# =============================================================================

def test_file_handler(self):
# Test for file-obj
with open(fermi_gbm_filepath, 'rb') as fd:
ts_gbm = sunpy.timeseries.TimeSeries(fd)
assert isinstance(ts_gbm, sunpy.timeseries.sources.fermi_gbm.GBMSummaryTimeSeries)

def test_file_type(self):
# Test for file-type
ext = os.path.splitext(fermi_gbm_filepath)[1][1:]
ts_gbm = sunpy.timeseries.TimeSeries(fermi_gbm_filepath, filetype=ext)
assert isinstance(ts_gbm, sunpy.timeseries.sources.fermi_gbm.GBMSummaryTimeSeries)

def test_passed_ts(self):
# Test an EVE TimeSeries
with pytest.warns(SunpyUserWarning, match='Unknown units'):
Expand All @@ -456,6 +468,11 @@ def test_invalid_filepath(self):
with pytest.raises(ValueError, match='Did not find any files'):
sunpy.timeseries.TimeSeries(invalid_filepath, silence_errors=True)

def test_invalid_filetype(self):
with pytest.raises(TypeError, match="file-handler not supported for unreadable files"):
with open(fermi_gbm_filepath, 'rb') as fd:
sunpy.timeseries.TimeSeries(fd, filetype="invalid_extension")

def test_invalid_file(self):
invalid_filepath = os.path.join(filepath, 'annotation_ppt.db')
with pytest.raises(NoMatchError):
Expand Down
20 changes: 17 additions & 3 deletions sunpy/timeseries/timeseries_factory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
This module provies the `~sunpy.timeseries.TimeSeriesFactory` class.
"""
import os
import io
import copy
import pathlib
from collections import OrderedDict
Expand Down Expand Up @@ -94,6 +94,11 @@ class TimeSeriesFactory(BasicRegistrationFactory):
>>> my_timeseries = sunpy.timeseries.TimeSeries('filename.fits') # doctest: +SKIP
>>> my_timeseries = sunpy.timeseries.TimeSeries('filename.fits', source='lyra') # doctest: +SKIP

* File handlers

>>> with open('filename.fits', 'rb') as fd: # doctest: +SKIP
... my_timeseries = sunpy.timeseries.TimeSeries(fd) # doctest: +SKIP

* Multiple files can be combined into one TimeSeries, as long as they are the same source

>>> my_timeseries = sunpy.timeseries.TimeSeries(['goesfile1.fits', 'goesfile2.fits'],
Expand All @@ -120,6 +125,10 @@ class TimeSeriesFactory(BasicRegistrationFactory):

>>> my_timeseries = sunpy.timeseries.TimeSeries((data, header), data2, header2,
... 'file1.fits', url, 'eit_*.fits') # doctest: +SKIP

* Manually specify the file-type

>>> my_timeseries = sunpy.timeseries.TimeSeries('filename.fits', filetype='fits') # doctest: +SKIP
"""

@staticmethod
Expand All @@ -129,8 +138,10 @@ def _read_file(fname, **kwargs):

Parameters
----------
fname : `str`
fname : `str` or file-like
The file path to parse.
filetype : `str`
Manually specify supported reader or extension.

Returns
-------
Expand All @@ -139,7 +150,7 @@ def _read_file(fname, **kwargs):
"""
if 'source' not in kwargs.keys() or not kwargs['source']:
try:
pairs = read_file(os.fspath(fname), **kwargs)
pairs = read_file(fname, **kwargs)

new_pairs = []
for pair in pairs:
Expand Down Expand Up @@ -489,6 +500,9 @@ def _apply_result(data_header_pairs, filepaths, result):
if read:
data_header_pairs.append(result)
else:
if isinstance(result, io.IOBase):
raise TypeError("file-handler is not supported for unreadable files")

filepaths.append(result)

return data_header_pairs, filepaths
Expand Down