-
Notifications
You must be signed in to change notification settings - Fork 29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unify download for antenna models / proposal tables / shower library with additional fallback servers #673
Changes from 4 commits
92e6f89
1092199
d17c2c4
b14225f
5d2bec0
6c0772a
91637f1
9f5065c
e2c17b4
d5ac37e
c13a13c
d00d205
d837dce
0dcc5e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import requests | ||
import os | ||
|
||
import logging | ||
logger = logging.getLogger('NuRadioReco.dataservers') | ||
|
||
dataservers = ["https://rnog-data.zeuthen.desy.de", "https://rno-g.uchicago.edu/data/desy-mirror"] | ||
|
||
def get_available_dataservers_by_responsetime(dataservers=dataservers): | ||
""" requests a small index file from the list of dataservers and returns a list of responsive ones ordered by elapsed time """ | ||
response_times = [] | ||
available_dataservers = [] | ||
|
||
for dataserver in dataservers: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it could make sense to parallelize this, though in practice it probably doesn't matter very much with just two servers... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah... right.. I don't expect that we will ever have >> 2 servers. I suggest to leave it as is. |
||
# get the index of the shower_library directory, because it is short | ||
testdir = f"{dataserver}/shower_library/" | ||
try: | ||
response = requests.get(testdir, timeout=5) | ||
response.raise_for_status() | ||
except: | ||
continue | ||
response_times.append(response.elapsed) | ||
available_dataservers.append(dataserver) | ||
ranked_dataservers = [x for _, x in sorted(zip(response_times, available_dataservers))] | ||
return ranked_dataservers | ||
|
||
def get_available_dataservers_by_timezone(dataservers=dataservers): | ||
""" uses the server locations' timezones from the list of dataservers and returns the list of dataservers ordered by proximity """ | ||
import socket | ||
import pytz | ||
from datetime import datetime | ||
from geolite2 import geolite2 | ||
|
||
geo = geolite2.reader() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rather than using geolocation, why not just store the server time zone along with the server names? I don't think we're going to have significantly more than two ever? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the response time test is more robust anyway though... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I removed the function entirely. Just having response time around is enough, if we are not enabling the option for now anyways (we agreed with @cg-laser that since download is only once for new files, we'd not request ordering, but leave DESY the "master") |
||
|
||
naive = datetime.utcnow() | ||
utcoffset_local = naive.astimezone().utcoffset().total_seconds()/3600 | ||
server_offsets = [] | ||
for dataserver in dataservers: | ||
dataserver_ip = socket.gethostbyname(dataserver) | ||
dataserver_timezone = geo.get(dataserver_ip)["location"]["time_zone"] | ||
timezone = pytz.timezone(dataserver_timezone) | ||
utcoffset_server = timezone.localize(naive).utcoffset().total_seconds()/3600 | ||
|
||
server_offsets.append((utcoffset_local-utcoffset_server)%12) | ||
|
||
ranked_dataservers = [x for _, x in sorted(zip(server_offsets, dataservers))] | ||
return ranked_dataservers | ||
|
||
def download_from_dataserver(remote_path, target_path, dataservers=dataservers, try_ordered=False): | ||
""" download remote_path to target_path from the list of NuRadio dataservers """ | ||
if try_ordered: | ||
dataservers = get_available_dataservers_by_responsetime(dataservers) | ||
# alternatively: | ||
# dataservers = get_available_dataservers_by_timezone(dataservers) | ||
requests_status = requests.codes["not_found"] | ||
for dataserver in dataservers: | ||
URL = f'{dataserver}/{remote_path}' | ||
|
||
logger.warning( | ||
"downloading file {} from {}. This can take a while...".format(target_path, URL)) | ||
|
||
try: | ||
r = requests.get(URL) | ||
r.raise_for_status() | ||
requests_status = r.status_code | ||
break | ||
except requests.exceptions.HTTPError as errh: | ||
logger.warning(f"HTTP Error for {dataserver}. Does the file {remote_path} exist on the server?") | ||
pass | ||
except requests.exceptions.ConnectionError as errc: | ||
logger.warning(f"Error Connecting to {dataserver}. Maybe you don't have internet... or the server is down?") | ||
pass | ||
except requests.exceptions.Timeout as errt: | ||
logger.warning(f"Timeout Error for {dataserver}.") | ||
pass | ||
except requests.exceptions.RequestException as err: | ||
logger.warning(f"An unusual error for {dataserver} occurred:", err) | ||
pass | ||
|
||
logger.warning("problem downloading file {} from {}. Let's see if there is another server.".format(target_path, URL)) | ||
|
||
if requests_status != requests.codes["ok"]: | ||
logger.error(f"error in download of file {target_path}. Tried all servers in {dataservers} without success.") | ||
raise IOError | ||
|
||
folder = os.path.dirname(target_path) | ||
if not os.path.exists(folder): | ||
os.makedirs(folder) | ||
|
||
with open(target_path, "wb") as code: | ||
code.write(r.content) | ||
logger.warning("...download finished.") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did you forgot to delete this line?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I intended to leave it here as is. The logger message command starts the line above, so just this line starting with a string looks odd here.