Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify download for antenna models / proposal tables / shower library with additional fallback servers #673

Merged
merged 14 commits into from
Jun 14, 2024
Merged
18 changes: 5 additions & 13 deletions NuRadioMC/EvtGen/proposal_table_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,15 @@ def download_proposal_tables(config_file, tables_path=None):
tables_path = proposal_func._ProposalFunctions__tables_path

# does not exist yet -> download file
import requests
from NuRadioReco.utilities.dataservers import download_from_dataserver
proposal_version = proposal.__version__
URL = f'https://rnog-data.zeuthen.desy.de/proposal_tables/v{proposal_version}/{get_compiler()}/{config_file}.tar.gz'
remote_path = f'proposal_tables/v{proposal_version}/{get_compiler()}/{config_file}.tar.gz'
target_path = f"{tables_path}/{config_file}.tar.gz"

folder = tables_path #os.path.dirname(tables_path)
if not os.path.exists(folder):
os.makedirs(folder)
logger.warning(
"downloading pre-calculated proposal tables for {} from {}. This can take a while...".format(config_file, URL))
r = requests.get(URL)
if r.status_code != requests.codes.ok:
logger.error("error in download of proposal tables")
raise IOError
"downloading pre-calculated proposal tables for {}. This can take a while...".format(config_file))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you forgot to delete this line?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I intended to leave it here as is. The logger message command starts the line above, so just this line starting with a string looks odd here.

download_from_dataserver(remote_path, target_path)

with open(f"{tables_path}/{config_file}.tar.gz", "wb") as code:
code.write(r.content)
logger.warning("...download finished.")
logger.warning(f"...unpacking archive to {tables_path}")
shutil.unpack_archive(f"{tables_path}/{config_file}.tar.gz", tables_path)
os.remove(f"{tables_path}/{config_file}.tar.gz")
Expand Down
13 changes: 3 additions & 10 deletions NuRadioMC/SignalGen/ARZ/ARZ.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,17 +381,10 @@ def __check_and_get_library(self):
if not download_file:
return True
else:
import requests
URL = 'https://rnog-data.zeuthen.desy.de/shower_library/library_v{:d}.{:d}.pkl'.format(*self._version)
from NuRadioReco.utilities.dataservers import download_from_dataserver

logger.info("downloading shower library {} from {}. This can take a while...".format(self._version, URL))
r = requests.get(URL)
if (r.status_code != requests.codes.ok):
logger.error("error in download of antenna model")
raise IOError("error in download of antenna model")
with open(path, "wb") as code:
code.write(r.content)
logger.info("...download finished.")
remote_path = 'shower_library/library_v{:d}.{:d}.pkl'.format(*self._version)
download_from_dataserver(remote_path, path)

def __set_model_parameters(self, arz_version='ARZ2020'):
"""
Expand Down
22 changes: 5 additions & 17 deletions NuRadioReco/detector/antennapattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,6 @@ def save_preprocessed_WIPLD_forARA(path):
np.angle(H_theta[mask][i]) / units.deg,
np.angle(H_phi[mask][i]) / units.deg))


def get_pickle_antenna_response(path):
"""
opens and return the pickle file containing the preprocessed WIPL-D antenna simulation
Expand Down Expand Up @@ -583,23 +582,12 @@ def get_pickle_antenna_response(path):

if download_file:
# does not exist yet -> download file
import requests
from NuRadioReco.utilities.dataservers import download_from_dataserver

antenna_pattern_name = os.path.splitext(os.path.basename(path))[0]
URL = 'https://rnog-data.zeuthen.desy.de/AntennaModels/{name}/{name}.pkl'.format(
name=antenna_pattern_name)

folder = os.path.dirname(path)
if not os.path.exists(folder):
os.makedirs(folder)
logger.info(
"downloading antenna pattern {} from {}. This can take a while...".format(antenna_pattern_name, URL))
r = requests.get(URL)
if r.status_code != requests.codes.ok:
logger.error("error in download of antenna model")
raise IOError
with open(path, "wb") as code:
code.write(r.content)
logger.warning("...download finished.")
remote_path = 'AntennaModels/{name}/{name}.pkl'.format(name=antenna_pattern_name)

download_from_dataserver(remote_path, path)

# # does not exist yet -> precalculating WIPLD simulations from raw WIPLD output
# preprocess_WIPLD(path)
Expand Down
93 changes: 93 additions & 0 deletions NuRadioReco/utilities/dataservers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import requests
import os

import logging
logger = logging.getLogger('NuRadioReco.dataservers')

dataservers = ["https://rnog-data.zeuthen.desy.de", "https://rno-g.uchicago.edu/data/desy-mirror"]

def get_available_dataservers_by_responsetime(dataservers=dataservers):
""" requests a small index file from the list of dataservers and returns a list of responsive ones ordered by elapsed time """
response_times = []
available_dataservers = []

for dataserver in dataservers:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it could make sense to parallelize this, though in practice it probably doesn't matter very much with just two servers...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah... right.. I don't expect that we will ever have >> 2 servers. I suggest to leave it as is.

# get the index of the shower_library directory, because it is short
testdir = f"{dataserver}/shower_library/"
try:
response = requests.get(testdir, timeout=5)
response.raise_for_status()
except:
continue
response_times.append(response.elapsed)
available_dataservers.append(dataserver)
ranked_dataservers = [x for _, x in sorted(zip(response_times, available_dataservers))]
return ranked_dataservers

def get_available_dataservers_by_timezone(dataservers=dataservers):
""" uses the server locations' timezones from the list of dataservers and returns the list of dataservers ordered by proximity """
import socket
import pytz
from datetime import datetime
from geolite2 import geolite2

geo = geolite2.reader()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather than using geolocation, why not just store the server time zone along with the server names? I don't think we're going to have significantly more than two ever?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the response time test is more robust anyway though...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I removed the function entirely. Just having response time around is enough, if we are not enabling the option for now anyways (we agreed with @cg-laser that since download is only once for new files, we'd not request ordering, but leave DESY the "master")


naive = datetime.utcnow()
utcoffset_local = naive.astimezone().utcoffset().total_seconds()/3600
server_offsets = []
for dataserver in dataservers:
dataserver_ip = socket.gethostbyname(dataserver)
dataserver_timezone = geo.get(dataserver_ip)["location"]["time_zone"]
timezone = pytz.timezone(dataserver_timezone)
utcoffset_server = timezone.localize(naive).utcoffset().total_seconds()/3600

server_offsets.append((utcoffset_local-utcoffset_server)%12)

ranked_dataservers = [x for _, x in sorted(zip(server_offsets, dataservers))]
return ranked_dataservers

def download_from_dataserver(remote_path, target_path, dataservers=dataservers, try_ordered=False):
""" download remote_path to target_path from the list of NuRadio dataservers """
if try_ordered:
dataservers = get_available_dataservers_by_responsetime(dataservers)
# alternatively:
# dataservers = get_available_dataservers_by_timezone(dataservers)
requests_status = requests.codes["not_found"]
for dataserver in dataservers:
URL = f'{dataserver}/{remote_path}'

logger.warning(
"downloading file {} from {}. This can take a while...".format(target_path, URL))

try:
r = requests.get(URL)
r.raise_for_status()
requests_status = r.status_code
break
except requests.exceptions.HTTPError as errh:
logger.warning(f"HTTP Error for {dataserver}. Does the file {remote_path} exist on the server?")
pass
except requests.exceptions.ConnectionError as errc:
logger.warning(f"Error Connecting to {dataserver}. Maybe you don't have internet... or the server is down?")
pass
except requests.exceptions.Timeout as errt:
logger.warning(f"Timeout Error for {dataserver}.")
pass
except requests.exceptions.RequestException as err:
logger.warning(f"An unusual error for {dataserver} occurred:", err)
pass

logger.warning("problem downloading file {} from {}. Let's see if there is another server.".format(target_path, URL))

if requests_status != requests.codes["ok"]:
logger.error(f"error in download of file {target_path}. Tried all servers in {dataservers} without success.")
raise IOError

folder = os.path.dirname(target_path)
if not os.path.exists(folder):
os.makedirs(folder)

with open(target_path, "wb") as code:
code.write(r.content)
logger.warning("...download finished.")
3 changes: 3 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ please update the categories "new features" and "bugfixes" before a pull request

version 2.3.0-dev
new features:
- Added download utility unifying download of antenna models/proposal tables/shower
library with possibility to add/change fallback-server(s). Added Chicago
server as fallback.
- Added new detector class for RNO-G (+db interface) which uses a mongo-db as source.
The new class allows import/export via compressed json files and has a buffer machinary.
It comes with a class to handle responses of the different signal chain components easily.
Expand Down