diff --git a/satip/app.py b/satip/app.py index 88448ffc..5404049c 100644 --- a/satip/app.py +++ b/satip/app.py @@ -14,7 +14,7 @@ import satip from satip import utils -from satip.eumetsat import DownloadManager +from satip.eumetsat import EUMETSATDownloadManager log = structlog.stdlib.get_logger() @@ -137,7 +137,7 @@ def run( ) # 1. Get data from API, download native files with tempfile.TemporaryDirectory() as tmpdir: - download_manager = DownloadManager( + download_manager = EUMETSATDownloadManager( user_key=api_key, user_secret=api_secret, data_dir=tmpdir, diff --git a/satip/download.py b/satip/download.py index 1c7b67a2..d1057ed8 100644 --- a/satip/download.py +++ b/satip/download.py @@ -28,6 +28,7 @@ import yaml from satip import eumetsat +from satip.eumetsat import EUMETSATDownloadManager from satip.utils import format_dt_str log = structlog.stdlib.get_logger() @@ -110,7 +111,7 @@ def download_eumetsat_data( end_date = datetime.now() # Download the data - dm = eumetsat.DownloadManager(user_key, user_secret, download_directory, download_directory) + dm = EUMETSATDownloadManager(user_key, user_secret, download_directory, download_directory) products_to_use = [] if "rss" in product: products_to_use.append(RSS_ID) @@ -159,7 +160,7 @@ def download_eumetsat_data( def _download_time_range( - x: Tuple[Tuple[datetime, datetime], str, eumetsat.DownloadManager] + x: Tuple[Tuple[datetime, datetime], str, EUMETSATDownloadManager] ) -> None: time_range, product_id, download_manager = x start_time, end_time = time_range diff --git a/satip/download_manager.py b/satip/download_manager.py new file mode 100644 index 00000000..26f2e6be --- /dev/null +++ b/satip/download_manager.py @@ -0,0 +1,90 @@ +"""Satip Download Manager + +This module provides a unified interface for downloading EUMETSAT and GOES +satellite data via the `DownloadManager` class. Users specify the provider +('EUMETSAT' or 'GOES'), and the manager delegates tasks to dedicated +sub-modules for retrieval, storage, and logging. + +Key functionalities: + +* Download data for a specified time range. +* Handle user authentication (for EUMETSAT data). +* Manage data retrieval, storage, and logging for both providers. +""" + +import warnings + +import structlog + +from satip.eumetsat import EUMETSATDownloadManager +from satip.goes_download_manager import GOESDownloadManager + +log = structlog.stdlib.get_logger() + +# Suppress FutureWarning related to 'H' argument +warnings.filterwarnings('ignore', category=FutureWarning) +# constants for different data sources +EUMETSAT_PROVIDER = "EUMETSAT" +GOES_PROVIDER = "GOES" + + + +class DownloadManager: + """ + Main download manager class to handle both EUMETSAT + + and GOES data downloading based on the provider. + + Example usage: + + if __name__ == "__main__": + provider = "GOES" + user_key = "your_user_key" + user_secret = "your_user_secret" + data_dir = "path to data directory" + log_directory = "path to log directory" + + start_time = datetime.datetime(2024, 3, 1, 0, 0) + end_time = datetime.datetime(2024, 3, 1, 6, 0) + + if data_dir is not None: + manager = DownloadManager(provider, None, None, data_dir, log_directory) + manager.download_data(start_time, end_time) + else: + print("Error: 'data_dir' is not properly set.") + + """ + + def __init__(self, provider, user_key=None, + user_secret=None, data_dir=None, + log_directory=None): + """ + Initialize the DownloadManager. + + Args: + provider (str): Provider name ('EUMETSAT' or 'GOES'). + user_key (str): User key for accessing data (for EUMETSAT). + user_secret (str): User secret for accessing data (for EUMETSAT). + data_dir (str): Directory to save downloaded data. + log_directory (str): Directory to save logs. + """ + self.provider = provider + + if self.provider == "EUMETSAT": + self.download_manager = EUMETSATDownloadManager(user_key, user_secret, + data_dir, log_directory) + elif self.provider == "GOES": + self.download_manager = GOESDownloadManager(data_dir, log_directory) + else: + raise ValueError("Invalid provider. Supported providers are 'EUMETSAT' and 'GOES'.") + + def download_data(self, start_time, end_time): + """ + Download data for the specified time range. + + Args: + start_time (datetime): Start of the download period. + end_time (datetime): End of the download period. + """ + if self.provider == "GOES": + self.download_manager.download_goes_data(start_time, end_time) diff --git a/satip/eumetsat.py b/satip/eumetsat.py index 2c1ee284..48d021ba 100644 --- a/satip/eumetsat.py +++ b/satip/eumetsat.py @@ -31,6 +31,7 @@ log = structlog.stdlib.get_logger() + API_ENDPOINT = "https://api.eumetsat.int" # Data Store searching endpoint @@ -195,13 +196,41 @@ def dataset_id_to_link(collection_id, data_id, access_token): + access_token ) +def get_filesize_megabytes(filename): + """Returns filesize in megabytes""" + filesize_bytes = os.path.getsize(filename) + return filesize_bytes / 1e6 + + +def eumetsat_filename_to_datetime(inner_tar_name): + """Extracts datetime from EUMETSAT filename. + + Takes a file from the EUMETSAT API and returns + the date and time part of the filename. + + Args: + inner_tar_name: Filename part which contains the datetime information. -class DownloadManager: # noqa: D205 + Usage example: + eumetsat_filename_to_datetime(filename) """ - The DownloadManager class - provides a handler for downloading data from the EUMETSAT API, - managing: retrieval, logging and metadata + p = re.compile(r"^MSG[1234]-SEVI-MSG15-0[01]00-NA-(\d*)\.") + title_match = p.match(inner_tar_name) + date_str = title_match.group(1) + return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S") + + +def eumetsat_cloud_name_to_datetime(filename: str): + """Takes a file from the EUMETSAT API and returns the it's datetime part for Cloud mask files""" + date_str = filename.split("0100-0100-")[-1].split(".")[0] + return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S") + + + +class EUMETSATDownloadManager: + """ + Manager class for downloading EUMETSAT data. """ def __init__( @@ -648,34 +677,3 @@ def create_and_download_datatailor_data( except Exception as e: log.warn(f"Failed deleting customization {jobID}: {e}", exc_info=True) - - -def get_filesize_megabytes(filename): - """Returns filesize in megabytes""" - filesize_bytes = os.path.getsize(filename) - return filesize_bytes / 1e6 - - -def eumetsat_filename_to_datetime(inner_tar_name): - """Extracts datetime from EUMETSAT filename. - - Takes a file from the EUMETSAT API and returns - the date and time part of the filename. - - Args: - inner_tar_name: Filename part which contains the datetime information. - - Usage example: - eumetsat_filename_to_datetime(filename) - """ - - p = re.compile(r"^MSG[1234]-SEVI-MSG15-0[01]00-NA-(\d*)\.") - title_match = p.match(inner_tar_name) - date_str = title_match.group(1) - return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S") - - -def eumetsat_cloud_name_to_datetime(filename: str): - """Takes a file from the EUMETSAT API and returns the it's datetime part for Cloud mask files""" - date_str = filename.split("0100-0100-")[-1].split(".")[0] - return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S") diff --git a/satip/goes_download_manager.py b/satip/goes_download_manager.py new file mode 100644 index 00000000..40726601 --- /dev/null +++ b/satip/goes_download_manager.py @@ -0,0 +1,96 @@ +""" +Script for downloading GOES data. +""" + +import datetime +import logging +import os + +from goes2go import GOES + + +class GOESDownloadManager: + """ + Manager class for downloading GOES data. + """ + def __init__(self, data_dir, log_directory=None): + """ + Initialize the GOESDownloadManager. + + Args: + data_dir (str): Directory to save downloaded GOES data. + log_directory (str, optional): Directory to save logs. + If None, logging is printed to STDOUT. + """ + self.data_dir = data_dir + self.ensure_directory_exists(self.data_dir) + + if log_directory: + self.ensure_directory_exists(log_directory) + logging.basicConfig( + filename=os.path.join(log_directory, 'goes_download.log'), + level=logging.INFO) + else: + logging.basicConfig(level=logging.INFO) + + logging.info(f"GOESDownloadManager initialized. Data will be saved to: {data_dir}") + + @staticmethod + def ensure_directory_exists(directory): + """Ensures the specified directory exists, creating it if necessary.""" + if not os.path.exists(directory): + try: + os.makedirs(directory) + logging.info(f"Created directory: {directory}") + except Exception as e: + logging.error(f"Error creating directory {directory}: {e}") + raise + def download_goes_data(self, start_time, end_time, product='ABI-L1b-RadC', + domain='F', satellite=16): + """ + Download GOES data for a specified time range and product. + + Args: + start_time (datetime): Start of the download period. + end_time (datetime): End of the download period. + product (str): GOES product identifier. Default is 'ABI-L1b-RadC'. + domain (str): Domain for the product. Default is 'F' (Full Disk). + satellite (int): GOES satellite number. Default is 16. + """ + G = GOES(satellite=satellite, product=product, domain=domain) + current_time = start_time + + # Determine time increment based on product/domain + time_increment = 1 # Default time increment (minutes) + if product == 'ABI-L1b-RadC' and domain == 'F': + time_increment = 10 + + while current_time <= end_time: + try: + # Download the data + ds = G.nearesttime(current_time) + + # Get acquisition time from the dataset + acquisition_time = ds.time.data.item() + + # Format the acquisition time for filename + date_string = acquisition_time.strftime("%Y-%m-%d_%H-%M-%S") + filename = f"goes_data_{date_string}.nc" + filepath = os.path.join(self.data_dir, filename) + + # Check if data for current acquisition time already exists + if os.path.exists(filepath): + logging.info(f"Data for {date_string} already exists. Skipping.") + current_time += datetime.timedelta(minutes=time_increment) + continue + + # Save to NetCDF + ds.to_netcdf(filepath) + + logging.info(f"Downloaded and saved GOES data to: {filename}") + except Exception as e: + logging.error(f"Error downloading GOES data for {current_time}: {e}") + + current_time += datetime.timedelta(minutes=time_increment) + + logging.info("Completed GOES data download.") diff --git a/scripts/extend_gcp_zarr.py b/scripts/extend_gcp_zarr.py index 7d7db9a9..fedf950c 100644 --- a/scripts/extend_gcp_zarr.py +++ b/scripts/extend_gcp_zarr.py @@ -2,7 +2,7 @@ import xarray as xr import satpy from satpy import Scene -from satip.eumetsat import DownloadManager +from satip.eumetsat import EUMETSATDownloadManager from satip.scale_to_zero_to_one import ScaleToZeroToOne from satip.serialize import serialize_attrs from satip.utils import convert_scene_to_dataarray @@ -17,7 +17,7 @@ def download_data(last_zarr_time): api_key = os.environ["SAT_API_KEY"] api_secret = os.environ["SAT_API_SECRET"] - download_manager = DownloadManager(user_key=api_key, user_secret=api_secret, data_dir="/mnt/disks/data/native_files/") + download_manager = EUMETSATDownloadManager(user_key=api_key, user_secret=api_secret, data_dir="/mnt/disks/data/native_files/") start_date = pd.Timestamp.utcnow().tz_convert('UTC').to_pydatetime().replace(tzinfo=None) last_zarr_time = pd.Timestamp(last_zarr_time).to_pydatetime().replace(tzinfo=None) start_str = last_zarr_time.strftime("%Y-%m-%d") diff --git a/scripts/generate_test_plots.py b/scripts/generate_test_plots.py index 98211ed6..535a048d 100644 --- a/scripts/generate_test_plots.py +++ b/scripts/generate_test_plots.py @@ -15,7 +15,7 @@ import matplotlib.pyplot as plt import xarray as xr -from satip import eumetsat +from satip import EUMETSATDownloadManager from satip.utils import ( load_cloudmask_to_dataarray, load_native_to_dataarray, @@ -34,7 +34,7 @@ def generate_test_plots(): user_key = os.environ.get("EUMETSAT_USER_KEY") user_secret = os.environ.get("EUMETSAT_USER_SECRET") - download_manager = eumetsat.DownloadManager( + download_manager = EUMETSATDownloadManager( user_key=user_key, user_secret=user_secret, data_dir=os.getcwd(), diff --git a/scripts/process_monthly_zarrs.py b/scripts/process_monthly_zarrs.py index 01ae1cfa..0e4d23c8 100644 --- a/scripts/process_monthly_zarrs.py +++ b/scripts/process_monthly_zarrs.py @@ -12,7 +12,7 @@ from satpy import Scene from tqdm import tqdm -from satip.eumetsat import DownloadManager, eumetsat_filename_to_datetime +from satip.eumetsat import EUMETSATDownloadManager, eumetsat_filename_to_datetime from satip.jpeg_xl_float_with_nans import JpegXlFloatWithNaNs from satip.scale_to_zero_to_one import ScaleToZeroToOne from satip.serialize import serialize_attrs @@ -32,7 +32,7 @@ def func(datasets_and_tuples_and_return_data): datasets = [datasets] api_key = os.environ["SAT_API_KEY"] api_secret = os.environ["SAT_API_SECRET"] - download_manager = DownloadManager( + download_manager = EUMETSATDownloadManager( user_key=api_key, user_secret=api_secret, data_dir=tmpdir ) download_manager.download_datasets(datasets) @@ -279,7 +279,7 @@ def create_dummy_zarr(datasets, base_path): date_range = pd.date_range(start="2011-01-01 00:00", end="2019-01-01 00:00", freq="1M") api_key = os.environ["SAT_API_KEY"] api_secret = os.environ["SAT_API_SECRET"] - download_manager = DownloadManager(user_key=api_key, user_secret=api_secret, data_dir="./") + download_manager = EUMETSATDownloadManager(user_key=api_key, user_secret=api_secret, data_dir="./") first = True for date in date_range[::-1]: start_date = pd.Timestamp(date) - pd.Timedelta("1M") diff --git a/tests/test_eumetsat.py b/tests/test_eumetsat.py index a210cb79..a6ecfe4d 100644 --- a/tests/test_eumetsat.py +++ b/tests/test_eumetsat.py @@ -5,7 +5,7 @@ from datetime import datetime, timezone, timedelta import pandas as pd -from satip.eumetsat import DownloadManager, eumetsat_filename_to_datetime +from satip.eumetsat import EUMETSATDownloadManager, eumetsat_filename_to_datetime def test_download_manager_setup(): @@ -14,7 +14,7 @@ def test_download_manager_setup(): user_secret = os.environ.get("EUMETSAT_USER_SECRET") with tempfile.TemporaryDirectory() as tmpdirname: - _ = DownloadManager( + _ = EUMETSATDownloadManager( user_key=user_key, user_secret=user_secret, data_dir=tmpdirname, @@ -43,7 +43,7 @@ def test_data_tailor_identify_available_datasets(): end_date = datetime.now(tz=timezone.utc) with tempfile.TemporaryDirectory() as tmpdirname: - download_manager = DownloadManager( + download_manager = EUMETSATDownloadManager( user_key=user_key, user_secret=user_secret, data_dir=tmpdirname, @@ -69,7 +69,7 @@ def test_data_tailor(): end_date = datetime.now(tz=timezone.utc) with tempfile.TemporaryDirectory() as tmpdirname: - download_manager = DownloadManager( + download_manager = EUMETSATDownloadManager( user_key=user_key, user_secret=user_secret, data_dir=tmpdirname, diff --git a/tests/test_utils.py b/tests/test_utils.py index ca98d3e1..347f028a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -24,6 +24,7 @@ LATEST_DIR_NAME, ) +from satip.eumetsat import EUMETSATDownloadManager USER_KEY = os.environ.get("EUMETSAT_USER_KEY") USER_SECRET = os.environ.get("EUMETSAT_USER_SECRET") RSS_ID = "EO:EUM:DAT:MSG:MSG15-RSS" @@ -37,7 +38,7 @@ def setUp() -> None: # noqa D102 if len(list(glob.glob(os.path.join(os.getcwd(), "*.nat")))) == 0: from satip import eumetsat - download_manager = eumetsat.DownloadManager( + download_manager = EUMETSATDownloadManager( user_key=USER_KEY, user_secret=USER_SECRET, data_dir=os.getcwd(), @@ -63,7 +64,7 @@ def setUp() -> None: # noqa D102 cloud_mask_filename = list(glob.glob(os.path.join(os.getcwd(), "*.grb")))[0] return rss_filename, cloud_mask_filename -@pytest.mark.usefixtures("setup") +@pytest.mark.usefixtures("setUp") class TestSatipUtils: """Tests for satip.utils.""" @@ -75,21 +76,23 @@ def test_load_cloudmask_to_dataarray(self): # noqa D102 ) assert isinstance(cloudmask_dataarray, xarray.DataArray) - def test_load_native_to_dataarray(self): # noqa D102 + def test_load_native_to_dataarray(self, setUp): # noqa D102 + rss_filename, _ = setUp for area in ["UK", "RSS"]: rss_dataarray, hrv_dataarray = load_native_to_dataarray( - Path(self.rss_filename), temp_directory=Path(os.getcwd()), area=area + Path(rss_filename), temp_directory=Path(os.getcwd()), area=area ) assert isinstance(rss_dataarray, xarray.DataArray) assert isinstance(hrv_dataarray, xarray.DataArray) - def test_save_dataarray_to_zarr(self): # noqa D102 + def test_save_dataarray_to_zarr(self, setUp): # noqa D102 + rss_filename, _ = setUp # The following is a bit ugly, but since we do not want to lump two tests into one # test function but save_dataarray_to_zarr depends on a dataarray being loaded, # we have to reload the dataarray here. This means that this test can theoretically # fail for two reasons: Either the data-loading failed, or the data-saving failed. rss_dataarray, _ = load_native_to_dataarray( - Path(self.rss_filename), temp_directory=Path(os.getcwd()), area="UK" + Path(rss_filename), temp_directory=Path(os.getcwd()), area="UK" ) zarr_path = os.path.join(os.getcwd(), "tmp.zarr")