sunpy · Punyaslok · Jun 14, 2016 · Jun 14, 2016 · Jun 15, 2016 · Jun 15, 2016
diff --git a/docs/guide/acquiring_data/database.rst b/docs/guide/acquiring_data/database.rst
@@ -765,3 +765,52 @@ to 10 and therefore removes the 5 entries that been used least recently.
      21    2011-06-07 06:33:29 ... 17.400000000000002 17.400000000000002
      22    2014-04-09 06:00:12 ...               17.1               17.1
      58    2011-06-06 00:00:00 ...                N/A                N/A
+
+
+9. Adding entries using the Fido interface
+------------------------------------------
+
+9.1 Adding entries from a Fido search result
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A Fido search result can also be used to add new entries to the database. 
+This is similar to adding entries from VSO query result as shown previously. 
+The method :meth:`Database.add_from_fido_search_result()` does not download 
+any files.
+
+Let's clear the database first.
+
+    >>> database.clear()
+
+Now get the Fido search result and pass it into the 
+:meth:`Database.add_from_fido_search_result()` method.
+
+    >>> from sunpy.net import Fido, attrs as a   # doctest:  +REMOTE_DATA
+    >>> search_result = Fido.search(a.Time("2012/1/1", "2012/1/2"),
+    ...                              a.Instrument('lyra'))   # doctest:  +REMOTE_DATA
+    >>> database.add_from_fido_search_result(search_result)   # doctest:  +REMOTE_DATA
+    >>> len(database)   # doctest:  +REMOTE_DATA
+    2
+
+9.2 Downloading
+~~~~~~~~~~~~~~~
+The method :meth:`Database.download_from_fido_search_result()` downloads the 
+files from a Fido search result and adds the corresponding entries to the 
+database. Again, similar to VSO downloading, not the number of records of the 
+resulting search result determines the number of entries that will be added 
+to the database. The number of entries that will be added depends on the total 
+number of FITS headers. The :meth:`Database.download_from_fido_search_result()` 
+method also accepts an optional keyword argument `path` which determines the 
+download path of each file. Here, the first 2 entries are from the `add` operation
+shown above, and the next 4 entries are from the `download` operation.
+
+    >>> database.download_from_fido_search_result(search_result)   # doctest:  +REMOTE_DATA
+    >>> print(display_entries(database, ['id', 'observation_time_start', 
+    ...                     'observation_time_end', 'instrument', 'source']))   # doctest:  +REMOTE_DATA
+     id observation_time_start observation_time_end instrument source
+    --- ---------------------- -------------------- ---------- ------
+      1    2012-01-01 00:00:00  2012-01-02 00:00:00       lyra Proba2
+      2    2012-01-01 00:00:00  2012-01-02 00:00:00       lyra Proba2
+      3    2012-01-01 00:00:00  2012-01-02 00:00:00       lyra Proba2
+      4    2012-01-01 00:00:00  2012-01-02 00:00:00       lyra Proba2
+      5    2012-01-01 00:00:00  2012-01-02 00:00:00       lyra Proba2
+      6    2012-01-01 00:00:00  2012-01-02 00:00:00       lyra Proba2
diff --git a/sunpy/database/database.py b/sunpy/database/database.py
@@ -19,12 +19,15 @@
 import sunpy
 from sunpy.database import commands, tables
 from sunpy.database.tables import _create_display_table
+from sunpy.io import read_file_header
+from sunpy.io.file_tools import UnrecognizedFileTypeError
 from sunpy.database.caching import LRUCache
 from sunpy.database.commands import CompositeOperation
 from sunpy.database.attrs import walker
 from sunpy.net.hek2vso import H2VClient
 from sunpy.net.attr import and_
 from sunpy.net.vso import VSOClient
+from sunpy.net import Fido
 from sunpy.extern.six.moves import range
 from sunpy.util import deprecated
 
@@ -446,6 +449,72 @@ def _download_and_collect_entries(self, query_result, **kwargs):
                 entry.download_time = datetime.utcnow()
                 yield entry
 
+    def _download_and_collect_fido_entries(self, search_result, **kwargs):
+
+        client = kwargs.pop('client', None)
+        path = kwargs.pop('path', None)
+        progress = kwargs.pop('progress', False)
+        methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE'))
+        overwrite = kwargs.pop('overwrite', False)
+
+        if kwargs:
+            k, v = kwargs.popitem()
+            raise TypeError('unexpected keyword argument {0!r}'.format(k))
+
+        entries_list = tables.entries_from_fido_search_result(search_result,
+                                                              default_waveunit=self.default_waveunit)
+        entries_list = list(entries_list)
+
+        remove_list = []
+        delete_entries = []
+        for sr_entry, temp in zip(search_result, entries_list):
+            for database_entry in self:
+                if database_entry.path is not None and sr_entry._compare_attributes(database_entry,
+                    ["source", "provider", "physobs", "fileid", "observation_time_start",
+                     "observation_time_end", "instrument", "size", "wavemin", "wavemax"]):
+                    if not overwrite:
+                        remove_list.append(qr)
+                    else:
+                        delete_entries.append(database_entry)
+
+        for temp in remove_list:
+            search_result = [x for x in search_result if x != temp]
+
+        for temp in delete_entries:
+            self.remove(temp)
+
+        paths = Fido.fetch(search_result, progress=progress, path=path)
+
+        for (path, sr_entry) in zip(paths, entries_list):
+
+            try:
+                read_file_header(path)
+                if os.path.isfile(path):
+                    entries = tables.entries_from_file(path, self.default_waveunit)
+                elif os.path.isdir(path):
+                    entries = tables.entries_from_dir(path, self.default_waveunit)
+                else:
+                    raise ValueError('The path is neither a file nor directory')
+
+                for entry in entries:
+                    entry.source = sr_entry.source
+                    entry.provider = sr_entry.provider
+                    entry.physobs = sr_entry.physobs
+                    entry.fileid = sr_entry.fileid
+                    entry.observation_time_start = sr_entry.observation_time_start
+                    entry.observation_time_end = sr_entry.observation_time_end
+                    entry.instrument = sr_entry.instrument
+                    entry.size = sr_entry.size
+                    entry.wavemin = sr_entry.wavemin
+                    entry.wavemax = sr_entry.wavemax
+                    entry.path = path
+                    entry.download_time = datetime.utcnow()
+                    yield entry
+            except UnrecognizedFileTypeError:
+                entry = sr_entry
+                entry.path = path
+                yield entry
+
     @deprecated('0.8', alternative='database.fetch()')
     def download(self, *query, **kwargs):
         """
@@ -845,6 +914,17 @@ def download_from_vso_query_result(self, query_result, client=None,
         self.add_many(self._download_and_collect_entries(
             query_result, client=client, path=path, progress=progress, overwrite=overwrite))
 
+    def download_from_fido_search_result(self, search_result,
+                                         path=None, wait=True, progress=False,
+                                         ignore_already_added=False, overwrite=False):
+        if not search_result:
+            return
+        self.add_many(
+            self._download_and_collect_fido_entries(
+                search_result=search_result, path=path,
+                progress=progress, overwrite=overwrite),
+            ignore_already_added=ignore_already_added)
+
     def add_from_vso_query_result(self, query_result,
                                   ignore_already_added=False):
         """Generate database entries from a VSO query result and add all the

diff --git a/sunpy/database/tables.py b/sunpy/database/tables.py
@@ -18,7 +18,8 @@
 import numpy as np
 
 from sunpy.time import parse_time, TimeRange
-from sunpy.io import fits, file_tools as sunpy_filetools
+import sunpy.io
+from sunpy.io import file_tools as sunpy_filetools
 from sunpy.util import print_table
 from sunpy.extern.six.moves import map
 from sunpy.extern import six
@@ -353,20 +354,41 @@ def _from_query_result_block(cls, qr_block, default_waveunit=None):
 
     @classmethod
     def _from_fido_search_result_block(cls, sr_block, default_waveunit=None):
-        """
-        Make a new :class:`DatabaseEntry` instance from a Fido search
+        """Make a new :class:`DatabaseEntry` instance from a Fido search
         result block.
 
         Parameters
         ----------
-        sr_block : `sunpy.net.dataretriever.client.QueryResponseBlock`
+        sr_block : sunpy.net.dataretriever.client.QueryResponseBlock
             A query result block is usually not created directly; instead,
             one gets instances of
             ``sunpy.net.dataretriever.client.QueryResponseBlock`` by iterating
             over each element of a Fido search result.
-        default_waveunit : `str`, optional
+        default_waveunit : str, optional
             The wavelength unit that is used if it cannot be found in the
             `sr_block`.
+
+        Examples
+        --------
+        >>> from sunpy.net import Fido, attrs
+        >>> from sunpy.database.tables import DatabaseEntry
+        >>> sr = Fido.search(attrs.Time("2012/1/1", "2012/1/2"),
+        ...    attrs.Instrument('lyra'))  # doctest: +REMOTE_DATA
+        >>> qrbs = list(sr.get_response(0))  # doctest: +REMOTE_DATA
+        >>> entry = DatabaseEntry._from_fido_search_result_block(qrbs[0])  # doctest: +REMOTE_DATA
+        >>> entry.source  # doctest: +REMOTE_DATA
+        'Proba2'
+        >>> entry.provider  # doctest: +REMOTE_DATA
+        'esa'
+        >>> entry.physobs  # doctest: +REMOTE_DATA
+        'irradiance'
+        >>> entry.fileid  # doctest: +REMOTE_DATA
+        'http://proba2.oma.be/lyra/data/bsd/2012/01/01/lyra_20120101-000000_lev2_std.fits'
+        >>> entry.observation_time_start, entry.observation_time_end  # doctest: +REMOTE_DATA
+        (datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 2, 0, 0))
+        >>> entry.instrument  # doctest: +REMOTE_DATA
+        'lyra'
+
         """
         # All attributes of DatabaseEntry that are not in QueryResponseBlock
         # are set as None for now.
@@ -405,8 +427,9 @@ def _from_fido_search_result_block(cls, sr_block, default_waveunit=None):
         wavemin = final_values['wavemin']
         wavemax = final_values['wavemax']
 
-        # sr_block.url of a QueryResponseBlock attribute is stored in fileid
-        fileid = str(sr_block.url) if sr_block.url is not None else None
+        #sr_block.url of a QueryResponseBlock attribute is stored in fileid
+        fileid = getattr(sr_block, 'url', None)
+        #fileid = str(url_name) if url_name is not None else None
         size = None
         return cls(
             source=source, provider=provider, physobs=physobs, fileid=fileid,
@@ -595,8 +618,7 @@ def entries_from_fido_search_result(sr, default_waveunit=None):
                 yield DatabaseEntry._from_fido_search_result_block(block, default_waveunit)
 
 
-def entries_from_file(file, default_waveunit=None,
-                      time_string_parse_format=None):
+def entries_from_file(file, default_waveunit=None, time_string_parse_format=None):
     """Use the headers of a FITS file to generate an iterator of
     :class:`sunpy.database.tables.DatabaseEntry` instances. Gathered
     information will be saved in the attribute `fits_header_entries`. If the
@@ -653,11 +675,12 @@ def entries_from_file(file, default_waveunit=None,
     111
 
     """
-    headers = fits.get_header(file)
+    headers = sunpy.io.read_file_header(file)
     if isinstance(file, (str, six.text_type)):
         filename = file
     else:
         filename = getattr(file, 'name', None)
+
     for header in headers:
         entry = DatabaseEntry(path=filename)
         for key, value in six.iteritems(header):
@@ -671,7 +694,7 @@ def entries_from_file(file, default_waveunit=None,
                     entry.fits_key_comments.append(FitsKeyComment(k, v))
                 continue
             entry.fits_header_entries.append(FitsHeaderEntry(key, value))
-        waveunit = fits.extract_waveunit(header)
+        waveunit = sunpy.io.fits.extract_waveunit(header)
         entry.hdu_index = headers.index(header)
         if waveunit is None:
             waveunit = default_waveunit
@@ -681,7 +704,13 @@ def entries_from_file(file, default_waveunit=None,
                 unit = Unit(waveunit)
             except ValueError:
                 raise WaveunitNotConvertibleError(waveunit)
+        try:
+            instrument_name = next(x for x in entry.fits_header_entries if x.key == 'TELESCOP').value
+        except Exception:
+            pass
+
         for header_entry in entry.fits_header_entries:
+
             key, value = header_entry.key, header_entry.value
             if key == 'INSTRUME':
                 entry.instrument = value
@@ -694,15 +723,23 @@ def entries_from_file(file, default_waveunit=None,
             # NOTE: the key DATE-END or DATE_END is not part of the official
             # FITS standard, but many FITS files use it in their header
             elif key in ('DATE-END', 'DATE_END'):
-                entry.observation_time_end = parse_time(
-                    value,
-                    _time_string_parse_format=time_string_parse_format
-                )
+                try:
+                    entry.observation_time_end = parse_time(value, _time_string_parse_format=time_string_parse_format)
+                except ValueError:
+                    if 'goes' in instrument_name.lower():
+                        entry.observation_time_end = datetime.strptime(value,
+                            '%d/%m/%Y')
+             #       else:
+             #          raise
             elif key in ('DATE-OBS', 'DATE_OBS'):
-                entry.observation_time_start = parse_time(
-                    value,
-                    _time_string_parse_format=time_string_parse_format
-                )
+                try:
+                    entry.observation_time_start = parse_time(value, _time_string_parse_format=time_string_parse_format)
+                except ValueError:
+                        if 'goes' in instrument_name.lower():
+                            entry.observation_time_start = datetime.strptime(value,
+                                '%d/%m/%Y')
+            #            else:
+            #                raise
         yield entry