Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GOES Data Download Manager Script #240

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions satip/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import satip
from satip import utils
from satip.eumetsat import DownloadManager
from satip.eumetsat import EUMETSATDownloadManager

log = structlog.stdlib.get_logger()

Expand Down Expand Up @@ -137,7 +137,7 @@ def run(
)
# 1. Get data from API, download native files
with tempfile.TemporaryDirectory() as tmpdir:
download_manager = DownloadManager(
download_manager = EUMETSATDownloadManager(
user_key=api_key,
user_secret=api_secret,
data_dir=tmpdir,
Expand Down
5 changes: 3 additions & 2 deletions satip/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import yaml

from satip import eumetsat
from satip.eumetsat import EUMETSATDownloadManager
from satip.utils import format_dt_str

log = structlog.stdlib.get_logger()
Expand Down Expand Up @@ -110,7 +111,7 @@ def download_eumetsat_data(
end_date = datetime.now()

# Download the data
dm = eumetsat.DownloadManager(user_key, user_secret, download_directory, download_directory)
dm = EUMETSATDownloadManager(user_key, user_secret, download_directory, download_directory)
products_to_use = []
if "rss" in product:
products_to_use.append(RSS_ID)
Expand Down Expand Up @@ -159,7 +160,7 @@ def download_eumetsat_data(


def _download_time_range(
x: Tuple[Tuple[datetime, datetime], str, eumetsat.DownloadManager]
x: Tuple[Tuple[datetime, datetime], str, EUMETSATDownloadManager]
) -> None:
time_range, product_id, download_manager = x
start_time, end_time = time_range
Expand Down
90 changes: 90 additions & 0 deletions satip/download_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Satip Download Manager

This module provides a unified interface for downloading EUMETSAT and GOES
satellite data via the `DownloadManager` class. Users specify the provider
('EUMETSAT' or 'GOES'), and the manager delegates tasks to dedicated
sub-modules for retrieval, storage, and logging.

Key functionalities:

* Download data for a specified time range.
* Handle user authentication (for EUMETSAT data).
* Manage data retrieval, storage, and logging for both providers.
"""

import warnings

import structlog

from satip.eumetsat import EUMETSATDownloadManager
from satip.goes_download_manager import GOESDownloadManager

log = structlog.stdlib.get_logger()

# Suppress FutureWarning related to 'H' argument
warnings.filterwarnings('ignore', category=FutureWarning)
# constants for different data sources
EUMETSAT_PROVIDER = "EUMETSAT"
GOES_PROVIDER = "GOES"



class DownloadManager:
"""
Main download manager class to handle both EUMETSAT

and GOES data downloading based on the provider.

Example usage:

if __name__ == "__main__":
provider = "GOES"
user_key = "your_user_key"
user_secret = "your_user_secret"
data_dir = "path to data directory"
log_directory = "path to log directory"

start_time = datetime.datetime(2024, 3, 1, 0, 0)
end_time = datetime.datetime(2024, 3, 1, 6, 0)

if data_dir is not None:
manager = DownloadManager(provider, None, None, data_dir, log_directory)
manager.download_data(start_time, end_time)
else:
print("Error: 'data_dir' is not properly set.")

"""

def __init__(self, provider, user_key=None,
user_secret=None, data_dir=None,
log_directory=None):
"""
Initialize the DownloadManager.

Args:
provider (str): Provider name ('EUMETSAT' or 'GOES').
user_key (str): User key for accessing data (for EUMETSAT).
user_secret (str): User secret for accessing data (for EUMETSAT).
data_dir (str): Directory to save downloaded data.
log_directory (str): Directory to save logs.
"""
self.provider = provider

if self.provider == "EUMETSAT":
self.download_manager = EUMETSATDownloadManager(user_key, user_secret,
data_dir, log_directory)
elif self.provider == "GOES":
self.download_manager = GOESDownloadManager(data_dir, log_directory)
else:
raise ValueError("Invalid provider. Supported providers are 'EUMETSAT' and 'GOES'.")

def download_data(self, start_time, end_time):
"""
Download data for the specified time range.

Args:
start_time (datetime): Start of the download period.
end_time (datetime): End of the download period.
"""
if self.provider == "GOES":
self.download_manager.download_goes_data(start_time, end_time)
68 changes: 33 additions & 35 deletions satip/eumetsat.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

log = structlog.stdlib.get_logger()


API_ENDPOINT = "https://api.eumetsat.int"

# Data Store searching endpoint
Expand Down Expand Up @@ -195,13 +196,41 @@ def dataset_id_to_link(collection_id, data_id, access_token):
+ access_token
)

def get_filesize_megabytes(filename):
"""Returns filesize in megabytes"""
filesize_bytes = os.path.getsize(filename)
return filesize_bytes / 1e6


def eumetsat_filename_to_datetime(inner_tar_name):
"""Extracts datetime from EUMETSAT filename.

Takes a file from the EUMETSAT API and returns
the date and time part of the filename.

Args:
inner_tar_name: Filename part which contains the datetime information.

class DownloadManager: # noqa: D205
Usage example:
eumetsat_filename_to_datetime(filename)
"""
The DownloadManager class

provides a handler for downloading data from the EUMETSAT API,
managing: retrieval, logging and metadata
p = re.compile(r"^MSG[1234]-SEVI-MSG15-0[01]00-NA-(\d*)\.")
title_match = p.match(inner_tar_name)
date_str = title_match.group(1)
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")


def eumetsat_cloud_name_to_datetime(filename: str):
"""Takes a file from the EUMETSAT API and returns the it's datetime part for Cloud mask files"""
date_str = filename.split("0100-0100-")[-1].split(".")[0]
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")



class EUMETSATDownloadManager:
"""
Manager class for downloading EUMETSAT data.
"""

def __init__(
Expand Down Expand Up @@ -648,34 +677,3 @@ def create_and_download_datatailor_data(

except Exception as e:
log.warn(f"Failed deleting customization {jobID}: {e}", exc_info=True)


def get_filesize_megabytes(filename):
"""Returns filesize in megabytes"""
filesize_bytes = os.path.getsize(filename)
return filesize_bytes / 1e6


def eumetsat_filename_to_datetime(inner_tar_name):
14Richa marked this conversation as resolved.
Show resolved Hide resolved
"""Extracts datetime from EUMETSAT filename.

Takes a file from the EUMETSAT API and returns
the date and time part of the filename.

Args:
inner_tar_name: Filename part which contains the datetime information.

Usage example:
eumetsat_filename_to_datetime(filename)
"""

p = re.compile(r"^MSG[1234]-SEVI-MSG15-0[01]00-NA-(\d*)\.")
title_match = p.match(inner_tar_name)
date_str = title_match.group(1)
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")


def eumetsat_cloud_name_to_datetime(filename: str):
"""Takes a file from the EUMETSAT API and returns the it's datetime part for Cloud mask files"""
date_str = filename.split("0100-0100-")[-1].split(".")[0]
return datetime.datetime.strptime(date_str, "%Y%m%d%H%M%S")
96 changes: 96 additions & 0 deletions satip/goes_download_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Script for downloading GOES data.
"""

import datetime
import logging
import os

from goes2go import GOES


class GOESDownloadManager:
14Richa marked this conversation as resolved.
Show resolved Hide resolved
"""
Manager class for downloading GOES data.
"""
def __init__(self, data_dir, log_directory=None):
"""
Initialize the GOESDownloadManager.

Args:
data_dir (str): Directory to save downloaded GOES data.
log_directory (str, optional): Directory to save logs.
If None, logging is printed to STDOUT.
"""
self.data_dir = data_dir
self.ensure_directory_exists(self.data_dir)

if log_directory:
self.ensure_directory_exists(log_directory)
logging.basicConfig(
filename=os.path.join(log_directory, 'goes_download.log'),
level=logging.INFO)
else:
logging.basicConfig(level=logging.INFO)

logging.info(f"GOESDownloadManager initialized. Data will be saved to: {data_dir}")

@staticmethod
def ensure_directory_exists(directory):
"""Ensures the specified directory exists, creating it if necessary."""
if not os.path.exists(directory):
try:
os.makedirs(directory)
logging.info(f"Created directory: {directory}")
except Exception as e:
logging.error(f"Error creating directory {directory}: {e}")
raise
def download_goes_data(self, start_time, end_time, product='ABI-L1b-RadC',
domain='F', satellite=16):
"""
Download GOES data for a specified time range and product.

Args:
start_time (datetime): Start of the download period.
end_time (datetime): End of the download period.
product (str): GOES product identifier. Default is 'ABI-L1b-RadC'.
domain (str): Domain for the product. Default is 'F' (Full Disk).
satellite (int): GOES satellite number. Default is 16.
"""
G = GOES(satellite=satellite, product=product, domain=domain)
current_time = start_time

# Determine time increment based on product/domain
time_increment = 1 # Default time increment (minutes)
if product == 'ABI-L1b-RadC' and domain == 'F':
time_increment = 10

while current_time <= end_time:
try:
# Download the data
ds = G.nearesttime(current_time)
14Richa marked this conversation as resolved.
Show resolved Hide resolved

# Get acquisition time from the dataset
acquisition_time = ds.time.data.item()

# Format the acquisition time for filename
date_string = acquisition_time.strftime("%Y-%m-%d_%H-%M-%S")
filename = f"goes_data_{date_string}.nc"
filepath = os.path.join(self.data_dir, filename)

# Check if data for current acquisition time already exists
if os.path.exists(filepath):
logging.info(f"Data for {date_string} already exists. Skipping.")
current_time += datetime.timedelta(minutes=time_increment)
continue

# Save to NetCDF
ds.to_netcdf(filepath)

logging.info(f"Downloaded and saved GOES data to: {filename}")
except Exception as e:
logging.error(f"Error downloading GOES data for {current_time}: {e}")

current_time += datetime.timedelta(minutes=time_increment)

logging.info("Completed GOES data download.")
4 changes: 2 additions & 2 deletions scripts/extend_gcp_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import xarray as xr
import satpy
from satpy import Scene
from satip.eumetsat import DownloadManager
from satip.eumetsat import EUMETSATDownloadManager
from satip.scale_to_zero_to_one import ScaleToZeroToOne
from satip.serialize import serialize_attrs
from satip.utils import convert_scene_to_dataarray
Expand All @@ -17,7 +17,7 @@
def download_data(last_zarr_time):
api_key = os.environ["SAT_API_KEY"]
api_secret = os.environ["SAT_API_SECRET"]
download_manager = DownloadManager(user_key=api_key, user_secret=api_secret, data_dir="/mnt/disks/data/native_files/")
download_manager = EUMETSATDownloadManager(user_key=api_key, user_secret=api_secret, data_dir="/mnt/disks/data/native_files/")
start_date = pd.Timestamp.utcnow().tz_convert('UTC').to_pydatetime().replace(tzinfo=None)
last_zarr_time = pd.Timestamp(last_zarr_time).to_pydatetime().replace(tzinfo=None)
start_str = last_zarr_time.strftime("%Y-%m-%d")
Expand Down
4 changes: 2 additions & 2 deletions scripts/generate_test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import matplotlib.pyplot as plt
import xarray as xr

from satip import eumetsat
from satip import EUMETSATDownloadManager
from satip.utils import (
load_cloudmask_to_dataarray,
load_native_to_dataarray,
Expand All @@ -34,7 +34,7 @@ def generate_test_plots():
user_key = os.environ.get("EUMETSAT_USER_KEY")
user_secret = os.environ.get("EUMETSAT_USER_SECRET")

download_manager = eumetsat.DownloadManager(
download_manager = EUMETSATDownloadManager(
user_key=user_key,
user_secret=user_secret,
data_dir=os.getcwd(),
Expand Down
6 changes: 3 additions & 3 deletions scripts/process_monthly_zarrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from satpy import Scene
from tqdm import tqdm

from satip.eumetsat import DownloadManager, eumetsat_filename_to_datetime
from satip.eumetsat import EUMETSATDownloadManager, eumetsat_filename_to_datetime
from satip.jpeg_xl_float_with_nans import JpegXlFloatWithNaNs
from satip.scale_to_zero_to_one import ScaleToZeroToOne
from satip.serialize import serialize_attrs
Expand All @@ -32,7 +32,7 @@ def func(datasets_and_tuples_and_return_data):
datasets = [datasets]
api_key = os.environ["SAT_API_KEY"]
api_secret = os.environ["SAT_API_SECRET"]
download_manager = DownloadManager(
download_manager = EUMETSATDownloadManager(
user_key=api_key, user_secret=api_secret, data_dir=tmpdir
)
download_manager.download_datasets(datasets)
Expand Down Expand Up @@ -279,7 +279,7 @@ def create_dummy_zarr(datasets, base_path):
date_range = pd.date_range(start="2011-01-01 00:00", end="2019-01-01 00:00", freq="1M")
api_key = os.environ["SAT_API_KEY"]
api_secret = os.environ["SAT_API_SECRET"]
download_manager = DownloadManager(user_key=api_key, user_secret=api_secret, data_dir="./")
download_manager = EUMETSATDownloadManager(user_key=api_key, user_secret=api_secret, data_dir="./")
first = True
for date in date_range[::-1]:
start_date = pd.Timestamp(date) - pd.Timedelta("1M")
Expand Down
Loading