Skip to content

Commit

Permalink
More scraper range fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Shane Maloney authored and Cadair committed Feb 16, 2021
1 parent 703f56e commit 3d210cf
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 12 deletions.
1 change: 1 addition & 0 deletions sunpy/net/dataretriever/sources/tests/test_noaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def test_srs_missing_tarball(mock_ftp_nlst):
assert urls[1].endswith('20200102SRS.txt')


@no_vso
@pytest.mark.remote_data
def test_srs_current_year():
year = datetime.date.today().year
Expand Down
57 changes: 49 additions & 8 deletions sunpy/util/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import os
import re
import calendar
import datetime
import warnings
from time import sleep
from ftplib import FTP
from datetime import datetime
from urllib.error import HTTPError
from urllib.parse import urlsplit
from urllib.request import urlopen
Expand Down Expand Up @@ -87,9 +87,9 @@ def __init__(self, pattern, regex=False, **kwargs):
self.domain = "{0.scheme}://{0.netloc}/".format(urlsplit(self.pattern))
milliseconds = re.search(r'\%e', self.pattern)
if not milliseconds:
self.now = datetime.datetime.now().strftime(self.pattern)
self.now = datetime.now().strftime(self.pattern)
else:
now = datetime.datetime.now()
now = datetime.now()
milliseconds_ = int(now.microsecond / 1000.)
self.now = now.strftime('{start}{milli:03d}{end}'.format(
start=self.pattern[0:milliseconds.start()],
Expand Down Expand Up @@ -123,13 +123,54 @@ def range(self, timerange):
return [directorypattern]
else:
directories = []
end = timerange.start.datetime
while end < timerange.end.datetime + timestep:
directories.append(end.strftime(directorypattern))
end = end + timestep
cur = self._date_floor(timerange.start, timestep)
end = self._date_floor(timerange.end, timestep) + timestep
while cur < end:
directories.append(cur.strftime(directorypattern))
cur = cur + timestep

return directories

@staticmethod
def _date_floor(date, timestep):
"""
Return the "floor" of the given date and time step.
Parameters
----------
datetime : `datetime.datetime` or `astropy.time.Time`
The date to floor
timestep : `dateutil.relativedelta.relativedelta`
The smallest time step to floor
Returns
-------
`datetime.datetime`
The time floored at the given time step
"""
date_parts = [int(p) for p in date.strftime('%Y,%m,%d,%H,%M,%S').split(',')]
date_parts[-1] = date_parts[-1] % 60
date = datetime(*date_parts)
orig_time_tup = date.timetuple()
if timestep == relativedelta(seconds=1):
new_time_tup = (orig_time_tup.tm_year, orig_time_tup.tm_mon, orig_time_tup.tm_mday,
orig_time_tup.tm_hour, orig_time_tup.tm_min, orig_time_tup.tm_sec)
elif timestep == relativedelta(minutes=1):
new_time_tup = (orig_time_tup.tm_year, orig_time_tup.tm_mon, orig_time_tup.tm_mday,
orig_time_tup.tm_hour, orig_time_tup.tm_min, 0)
elif timestep == relativedelta(hours=1):
new_time_tup = (orig_time_tup.tm_year, orig_time_tup.tm_mon, orig_time_tup.tm_mday,
orig_time_tup.tm_hour, 0, 0)
elif timestep == relativedelta(days=1):
new_time_tup = (orig_time_tup.tm_year, orig_time_tup.tm_mon, orig_time_tup.tm_mday,
0, 0, 0)
elif timestep == relativedelta(months=1):
new_time_tup = (orig_time_tup.tm_year, orig_time_tup.tm_mon, 1, 0, 0, 0)
elif timestep == relativedelta(years=1):
new_time_tup = (orig_time_tup.tm_year, 1, 1, 0, 0, 0)

return datetime(*new_time_tup)

def _URL_followsPattern(self, url):
"""
Check whether the url provided follows the pattern.
Expand Down Expand Up @@ -432,7 +473,7 @@ def get_timerange_from_exdict(exdict):
timetypes = ['hour', 'minute', 'second', 'millisecond']
dtlist = [int(exdict.get(d, 1)) for d in datetypes]
dtlist.extend([int(exdict.get(t, 0)) for t in timetypes])
startTime = Time(datetime.datetime(*dtlist))
startTime = Time(datetime(*dtlist))

tdelta = 1*u.millisecond
if "year" in exdict:
Expand Down
7 changes: 3 additions & 4 deletions sunpy/util/tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def testDirectoryRangeHours():
def testDirectoryRange_single():
s = Scraper('%Y%m%d/%H_%M.csv')
startdate = parse_time((2010, 10, 10, 5, 0))
enddate = parse_time((2010, 10, 10, 6, 0))
enddate = parse_time((2010, 10, 10, 7, 0))
timerange = TimeRange(startdate, enddate)
assert len(s.range(timerange)) == 2
assert len(s.range(timerange)) == 1


def testDirectoryRange_Month():
Expand Down Expand Up @@ -152,7 +152,7 @@ def testURL_patternMillisecondsZeroPadded():
# Asserts solution to ticket #1954.
# Milliseconds must be zero-padded in order to match URL lengths.
now_mock = Mock(return_value=datetime.datetime(2019, 4, 19, 0, 0, 0, 4009))
with patch('datetime.datetime', now=now_mock):
with patch('sunpy.util.scraper.datetime', now=now_mock):
s = Scraper('fd_%Y%m%d_%H%M%S_%e.fts')
now_mock.assert_called_once()
assert s.now == 'fd_20190419_000000_004.fts'
Expand Down Expand Up @@ -207,7 +207,6 @@ def test_ftp():
assert len(urls) == 2


@pytest.mark.xfail
@pytest.mark.remote_data
def test_filelist_url_missing_directory():
# Asserts solution to ticket #2684.
Expand Down

0 comments on commit 3d210cf

Please sign in to comment.