diff --git a/NuRadioMC/EvtGen/proposal_table_manager.py b/NuRadioMC/EvtGen/proposal_table_manager.py index e50c64a2f..c18caa12c 100755 --- a/NuRadioMC/EvtGen/proposal_table_manager.py +++ b/NuRadioMC/EvtGen/proposal_table_manager.py @@ -89,27 +89,14 @@ def download_proposal_tables(config_file, tables_path=None): tables_path = proposal_func._ProposalFunctions__tables_path # does not exist yet -> download file - import requests + from NuRadioReco.utilities.dataservers import download_from_dataserver proposal_version = proposal.__version__ - URL = f'https://rnog-data.zeuthen.desy.de/proposal_tables/v{proposal_version}/{get_compiler()}/{config_file}.tar.gz' + remote_path = f'proposal_tables/v{proposal_version}/{get_compiler()}/{config_file}.tar.gz' + target_path = f"{tables_path}/{config_file}.tar.gz" - folder = tables_path #os.path.dirname(tables_path) - if not os.path.exists(folder): - os.makedirs(folder) logger.warning( - "downloading pre-calculated proposal tables for {} from {}. This can take a while...".format(config_file, URL)) - r = requests.get(URL) - if r.status_code != requests.codes.ok: - logger.error("error in download of proposal tables") - raise IOError - - with open(f"{tables_path}/{config_file}.tar.gz", "wb") as code: - code.write(r.content) - logger.warning("...download finished.") - logger.warning(f"...unpacking archive to {tables_path}") - shutil.unpack_archive(f"{tables_path}/{config_file}.tar.gz", tables_path) - os.remove(f"{tables_path}/{config_file}.tar.gz") - + "downloading pre-calculated proposal tables for {}. This can take a while...".format(config_file)) + download_from_dataserver(remote_path, target_path, unpack_tarball=True) if __name__ == "__main__": diff --git a/NuRadioMC/SignalGen/ARZ/ARZ.py b/NuRadioMC/SignalGen/ARZ/ARZ.py index d4db40849..f11c64c1d 100644 --- a/NuRadioMC/SignalGen/ARZ/ARZ.py +++ b/NuRadioMC/SignalGen/ARZ/ARZ.py @@ -375,23 +375,17 @@ def __check_and_get_library(self): if("{:d}.{:d}".format(*self._version) in lib_hashs.keys()): if(sha1.hexdigest() != lib_hashs["{:d}.{:d}".format(*self._version)]): logger.warning("shower library {} has changed on the server. downloading newest version...".format(self._version)) + os.remove(path) download_file = True else: logger.warning("no hash sum of {} available, skipping up-to-date check".format(os.path.basename(path))) if not download_file: return True else: - import requests - URL = 'https://rnog-data.zeuthen.desy.de/shower_library/library_v{:d}.{:d}.pkl'.format(*self._version) + from NuRadioReco.utilities.dataservers import download_from_dataserver - logger.info("downloading shower library {} from {}. This can take a while...".format(self._version, URL)) - r = requests.get(URL) - if (r.status_code != requests.codes.ok): - logger.error("error in download of antenna model") - raise IOError("error in download of antenna model") - with open(path, "wb") as code: - code.write(r.content) - logger.info("...download finished.") + remote_path = 'shower_library/library_v{:d}.{:d}.pkl'.format(*self._version) + download_from_dataserver(remote_path, path) def __set_model_parameters(self, arz_version='ARZ2020'): """ diff --git a/NuRadioReco/detector/antennapattern.py b/NuRadioReco/detector/antennapattern.py index 2b0a6dd56..2962c51f4 100644 --- a/NuRadioReco/detector/antennapattern.py +++ b/NuRadioReco/detector/antennapattern.py @@ -536,7 +536,6 @@ def save_preprocessed_WIPLD_forARA(path): np.angle(H_theta[mask][i]) / units.deg, np.angle(H_phi[mask][i]) / units.deg)) - def get_pickle_antenna_response(path): """ opens and return the pickle file containing the preprocessed WIPL-D antenna simulation @@ -577,29 +576,19 @@ def get_pickle_antenna_response(path): if sha1.hexdigest() != antenna_hashs[os.path.basename(path)]: logger.warning("antenna model {} has changed on the server. downloading newest version...".format( os.path.basename(path))) + os.remove(path) # remove outdated file download_file = True else: logger.warning("no hash sum of {} available, skipping up-to-date check".format(os.path.basename(path))) if download_file: # does not exist yet -> download file - import requests + from NuRadioReco.utilities.dataservers import download_from_dataserver + antenna_pattern_name = os.path.splitext(os.path.basename(path))[0] - URL = 'https://rnog-data.zeuthen.desy.de/AntennaModels/{name}/{name}.pkl'.format( - name=antenna_pattern_name) - - folder = os.path.dirname(path) - if not os.path.exists(folder): - os.makedirs(folder) - logger.info( - "downloading antenna pattern {} from {}. This can take a while...".format(antenna_pattern_name, URL)) - r = requests.get(URL) - if r.status_code != requests.codes.ok: - logger.error("error in download of antenna model") - raise IOError - with open(path, "wb") as code: - code.write(r.content) - logger.warning("...download finished.") + remote_path = 'AntennaModels/{name}/{name}.pkl'.format(name=antenna_pattern_name) + + download_from_dataserver(remote_path, path) # # does not exist yet -> precalculating WIPLD simulations from raw WIPLD output # preprocess_WIPLD(path) diff --git a/NuRadioReco/utilities/dataservers.py b/NuRadioReco/utilities/dataservers.py new file mode 100644 index 000000000..01b5a9c96 --- /dev/null +++ b/NuRadioReco/utilities/dataservers.py @@ -0,0 +1,92 @@ +import requests +import os +import filelock +import logging +import shutil +from glob import glob + +logger = logging.getLogger('NuRadioReco.dataservers') + +dataservers = ["https://rnog-data.zeuthen.desy.de", "https://rno-g.uchicago.edu/data/desy-mirror"] + +def get_available_dataservers_by_responsetime(dataservers=dataservers): + """ requests a small index file from the list of dataservers and returns a list of responsive ones ordered by elapsed time """ + response_times = [] + available_dataservers = [] + + for dataserver in dataservers: + # get the index of the shower_library directory, because it is short + testdir = f"{dataserver}/shower_library/" + try: + response = requests.get(testdir, timeout=5) + response.raise_for_status() + except: + continue + response_times.append(response.elapsed) + available_dataservers.append(dataserver) + ranked_dataservers = [x for _, x in sorted(zip(response_times, available_dataservers))] + return ranked_dataservers + + +def download_from_dataserver(remote_path, target_path, unpack_tarball=True, dataservers=dataservers, try_ordered=False): + """ download remote_path to target_path from the list of NuRadio dataservers """ + + folder = os.path.dirname(target_path) + if not os.path.exists(folder): + os.makedirs(folder) + + lockfile = target_path+".lock" + lock = filelock.FileLock(lockfile) + + logger.warning(f"Assuring no other process is downloading. Will wait until {lockfile} is unlocked.") + with lock: + if os.path.isfile(target_path): + logger.warning(f"{target_path} already exists. Maybe download was already completed by another instance?") + return + elif unpack_tarball and (len(glob(os.path.dirname(target_path) + "/*.dat")) > 0): #just check if any .dat files present (similar to NuRadioProposal.py) + logger.warning(f"{os.path.dirname(target_path)} contains .dat files. Maybe download was already completed by another instance?") + return + + if try_ordered: + dataservers = get_available_dataservers_by_responsetime(dataservers) + + requests_status = requests.codes["not_found"] + for dataserver in dataservers: + URL = f'{dataserver}/{remote_path}' + + logger.warning( + "downloading file {} from {}. This can take a while...".format(target_path, URL)) + + try: + r = requests.get(URL) + r.raise_for_status() + requests_status = r.status_code + break + except requests.exceptions.HTTPError as errh: + logger.warning(f"HTTP Error for {dataserver}. Does the file {remote_path} exist on the server?") + pass + except requests.exceptions.ConnectionError as errc: + logger.warning(f"Error Connecting to {dataserver}. Maybe you don't have internet... or the server is down?") + pass + except requests.exceptions.Timeout as errt: + logger.warning(f"Timeout Error for {dataserver}.") + pass + except requests.exceptions.RequestException as err: + logger.warning(f"An unusual error for {dataserver} occurred:", err) + pass + + logger.warning("problem downloading file {} from {}. Let's see if there is another server.".format(target_path, URL)) + + if requests_status != requests.codes["ok"]: + logger.error(f"error in download of file {target_path}. Tried all servers in {dataservers} without success.") + raise IOError + + with open(target_path, "wb") as code: + code.write(r.content) + logger.warning("...download finished.") + + if unpack_tarball and target_path.endswith(".tar.gz"): + target_dir = os.path.dirname(target_path) + logger.warning(f"...unpacking archive to {target_dir}") + shutil.unpack_archive(target_path, target_dir) + os.remove(target_path) diff --git a/changelog.txt b/changelog.txt index 1db8d719c..daf2e8a58 100644 --- a/changelog.txt +++ b/changelog.txt @@ -4,6 +4,9 @@ please update the categories "new features" and "bugfixes" before a pull request version 2.3.0-dev new features: +- Added download utility unifying download of antenna models/proposal tables/shower +library with possibility to add/change fallback-server(s). Added Chicago +server as fallback. - Added new detector class for RNO-G (+db interface) which uses a mongo-db as source. The new class allows import/export via compressed json files and has a buffer machinary. It comes with a class to handle responses of the different signal chain components easily. diff --git a/pyproject.toml b/pyproject.toml index a054b8c3e..aa6154fed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ toml = ">=0.10.2" uproot = "4.1.1" importlib-metadata = {version = ">=4.8.1", python = "<3.8"} numba = "*" +filelock = "*" [tool.poetry.dev-dependencies] Sphinx = "*"