sunpy · Cadair · May 15, 2019 · Apr 19, 2019 · May 15, 2019 · May 15, 2019
diff --git a/changelog/3063.bugfix.1.rst b/changelog/3063.bugfix.1.rst
@@ -0,0 +1 @@
+Fix `sunpy.util.scraper.Scraper` failing if a directory is not found on a remote server.
diff --git a/changelog/3063.bugfix.2.rst b/changelog/3063.bugfix.2.rst
@@ -0,0 +1 @@
+Correctly zero pad milliseconds in the `sunpy.util.scraper.Scraper` formatting to prevent errors when the millisecond value was less than 100.
diff --git a/sunpy/util/scraper.py b/sunpy/util/scraper.py
@@ -5,6 +5,7 @@
 import re
 import datetime
 from ftplib import FTP
+from urllib.error import HTTPError
 from urllib.request import urlopen
 
 from bs4 import BeautifulSoup
@@ -67,9 +68,11 @@ def __init__(self, pattern, **kwargs):
         else:
             now = datetime.datetime.now()
             milliseconds_ = int(now.microsecond / 1000.)
-            self.now = now.strftime(self.pattern[0:milliseconds.start()] +
-                                    str(milliseconds_) +
-                                    self.pattern[milliseconds.end():])
+            self.now = now.strftime('{start}{milli:03d}{end}'.format(
+                start=self.pattern[0:milliseconds.start()],
+                milli=milliseconds_,
+                end=self.pattern[milliseconds.end():]
+            ))
 
     def matches(self, filepath, date):
         return date.strftime(self.pattern) == filepath
@@ -234,6 +237,11 @@ def filelist(self, timerange):
                                     filesurls.append(fullpath)
                 finally:
                     opn.close()
+            except HTTPError as http_err:
+                # Ignore missing directories (issue #2684).
+                if http_err.code == 404:
+                    continue
+                raise
             except Exception:
                 raise
         return filesurls

diff --git a/sunpy/util/tests/test_scraper.py b/sunpy/util/tests/test_scraper.py
@@ -1,3 +1,6 @@
+import datetime
+from unittest.mock import patch, Mock
+
 import pytest
 
 import astropy.units as u
@@ -140,22 +143,29 @@ def testURL_pattern():
     assert not s._URL_followsPattern('fd_20130410_ar_231211.fts.gz')
 
 
-@pytest.mark.xfail
-def testURL_patternMilliseconds():
+def testURL_patternMillisecondsGeneric():
     s = Scraper('fd_%Y%m%d_%H%M%S_%e.fts')
-    # NOTE: Seems that if below fails randomly - not understood why
-    #       with `== True` fails a bit less...
     assert s._URL_followsPattern('fd_20130410_231211_119.fts')
     assert not s._URL_followsPattern('fd_20130410_231211.fts.gz')
     assert not s._URL_followsPattern('fd_20130410_ar_231211.fts.gz')
 
 
+def testURL_patternMillisecondsZeroPadded():
+    # Asserts solution to ticket #1954.
+    # Milliseconds must be zero-padded in order to match URL lengths.
+    now_mock = Mock(return_value=datetime.datetime(2019, 4, 19, 0, 0, 0, 4009))
+    with patch('datetime.datetime', now=now_mock):
+        s = Scraper('fd_%Y%m%d_%H%M%S_%e.fts')
+    now_mock.assert_called_once()
+    assert s.now == 'fd_20190419_000000_004.fts'
+
+
 @pytest.mark.xfail
 def testFilesRange_sameDirectory_local():
+    # Fails due to an IsADirectoryError, wrapped in a URLError, after `requests`
+    # tries to open a directory as a binary file.
     s = Scraper('/'.join(['file:/', rootdir,
                           'EIT', 'efz%Y%m%d.%H%M%S_s.fits']))
-    print(s.pattern)
-    print(s.now)
     startdate = parse_time((2004, 3, 1, 4, 0))
     enddate = parse_time((2004, 3, 1, 6, 30))
     assert len(s.filelist(TimeRange(startdate, enddate))) == 3
@@ -199,3 +209,13 @@ def test_ftp():
     s = Scraper(pattern)
     timerange = TimeRange('2016/5/18 15:28:00', '2016/5/20 16:30:50')
     assert len(s.filelist(timerange)) == 2
+
+
+@pytest.mark.remote_data
+def test_filelist_url_missing_directory():
+    # Asserts solution to ticket #2684.
+    # Attempting to access data for the year 1960 results in a 404, so no files are returned.
+    pattern = 'http://lasp.colorado.edu/eve/data_access/evewebdataproducts/level2/%Y/%j/'
+    s = Scraper(pattern)
+    timerange = TimeRange('1960/01/01 00:00:00', '1960/01/02 00:00:00')
+    assert len(s.filelist(timerange)) == 0