<a href="https://colab.research.google.com/github/sanAkel/ocean-hurricane/blob/main/prep_data/download_SAT_retr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Purpose is to Download
-  Hurricane data (track, surface pressure, wind) from [tropycal](https://tropycal.github.io/tropycal/install.html).
- Data will be gathered from the [CMEMS](https://data.marine.copernicus.eu/products); dataset specific info (may change with dataset, you need to fill it in carefully by looking up CMEMS).
- Satellite (retrieved) data:
  - [AVISO Sea Surface Height (SSH).](https://data.marine.copernicus.eu/product/SEALEVEL_GLO_PHY_L4_NRT_008_046/services)
  - [OSTIA Sea Surface Temperature (SST).](https://data.marine.copernicus.eu/product/SST_GLO_SST_L4_NRT_OBSERVATIONS_010_001/services)
  - [CNS Sea Surface Salinity (SSS).](https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_S_SURFACE_MYNRT_015_013/description)
  - [GlobCurrent surface currents.](https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_MYNRT_015_003/description)
  - [Glob color chlorophyll concentration.](https://data.marine.copernicus.eu/product/OCEANCOLOUR_GLO_BGC_L4_MY_009_104/description)

  **Needed inputs**:
  1. basin (e.g., `north_atlantic`)
  2. year (e.g., `2024`)
  3. cat_thr (e.g., `4`)
  4. time_delta (e.g., `5`)
  5. dLon, dLat (e.g., `5, 5`)
  6. List of _priority_ storms, see below.

### Install modules

In [None]:
!pip install tropycal
!pip install cartopy
!pip install copernicusmarine

In [None]:
import copernicusmarine
from tropycal import tracks
import xarray as xr
import numpy as np
import pandas as pd

In [None]:
def get_cmems_data(dsetID, vNames, lon_start, lon_end, lat_start, lat_end, time_start, time_end):

  data_request = {"dataset_id" : dsetID,
    "longitude" : [lon_start, lon_end],
    "latitude" : [lat_start, lat_end],
    "time" : [time_start, time_end],
    "variables" : vNames}

  cms_data =copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    start_datetime = data_request["time"][0],
    end_datetime = data_request["time"][1],
    variables = data_request["variables"])

  return cms_data

In [None]:
# CMEMS dataset IDs

# AVISO L4 SSH (ADT)/SLA/Geo currents
def aviso_l4_ssh(year):

  if year < 2022:
    dsetID = "c3s_obs-sl_glo_phy-ssh_my_twosat-l4-duacs-0.25deg_P1D"
    vNames = ["adt", "sla", "err_sla", "ugos", "vgos"]
  else:
    dsetID = "cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.25deg_P1D"
    vNames = ["adt", "sla", "err_sla", "ugos", "vgos"]

  return dsetID, vNames

# OSTIA SST
sst={'dsetID':'METOFFICE-GLO-SST-L4-NRT-OBS-SST-V2',
     'vNames':['analysed_sst'],
     'varName': 'SST'}

# CNS SSS NRT
sss={'dsetID':'cmems_obs-mob_glo_phy-sss_nrt_multi_P1D',
     'vNames':['sos', 'dos'],
     'varName': 'SSS'}

# GlobCurrent NRT
curr={'dsetID':'cmems_obs-mob_glo_phy-cur_nrt_0.25deg_P1D-m',
     'vNames':['uo', 'vo'],
     'varName': 'CURRENT'}

# GlobColor
chla={'dsetID':'cmems_obs-oc_glo_bgc-plankton_my_l4-gapfree-multi-4km_P1D',
      'vNames':['CHL'],
      'varName': 'CHLa'}

### Inputs

In [None]:
myBasin = 'north_atlantic' # basin
year = 2024 # year
cat_thr = 4 # ignore hurricanes below this threshold.
time_delta = 5 # days before/after storm
dLon, dLat = [5, 5] # plot extra data outside track bounds (in degrees)

In [None]:
# List of storms (highlighted are _priority_) are from: https://docs.google.com/spreadsheets/d/1iXmzHsz0liWpJKxDz_KR1-hwW-FPzLLYF4hFvw7m1t4/edit?usp=sharing
# Thanks to zhan.zhang@noaa.gov
# Added a few extra ones (open ocean). Kirk (2024); Franklin (2023), Lee (2023)

if year == 2024:
  pr_st = ["oscar", "milton", "helene", "francine", "ernesto", "debby", "beryl", "kirk"]
elif year == 2023:
  pr_st = ["philippe", "ophelia", "nigel", "idalia", "gert p2", "gert p1", "don p2", "don p1", "bret", "franklin", "lee"]
elif year == 2022:
  pr_st = ["nicole", "ian", "gaston", "fiona", "earl", "danielle"]
else:
  print("List of priority storms not yet coded for year:\t {}"%format(year))

In [None]:
# Path to save data:
dPath = f'data/{myBasin}/{year}/'
!mkdir -p $dPath
print(dPath)

### Download hurricane track data (entire chosen `year`)

In [None]:
# Initialize
# https://tropycal.github.io/tropycal/api/generated/tropycal.tracks.TrackDataset.html#tropycal.tracks.TrackDataset

basin = tracks.TrackDataset(basin=myBasin, source='hurdat', include_btk=True)
season = basin.get_season(year)
print(f'Downloading data for...{season}')

# save data
season_data=season.to_dataframe()
fName = dPath + 'hurdat2_{}_{}.csv'.format(myBasin, year)
season_data.to_csv(fName)
print(f'Saved to hurricane track data:\t{fName}')

In [None]:
# Set Copernicus credentials
CMEMS_username, CMEMS_passwd = ["sakella", "HbFPyP9M"]
copernicusmarine.login(username=CMEMS_username, password=CMEMS_passwd)

In [None]:
hurr_names = season_data['name']
hurr_ids = season_data['id']

for idx, hurr_id in enumerate(hurr_ids):
  if hurr_names.iloc[idx].lower() in pr_st:
    print(f'{hurr_names.iloc[idx]} was listed as a priority storm.')

### Focus on _priority_ storms (as defined above) only.

In [None]:
hurr_names = season_data['name']
hurr_ids = season_data['id']

print("\n\nGathering details of following storms:\n")
for idx, hurr_id in enumerate(hurr_ids):
    if hurr_names.iloc[idx].lower() in pr_st:
      print("\nDownload and saving...\t{}, [{}]".format(hurr_names.iloc[idx], hurr_id))
      hurr=basin.get_storm(hurr_id)
      # Hurricane formation and dissipation dates (yyyymmdd)
      print("Formed on: {},\t dissipated on: {}".
      format(hurr.time[0].strftime('%Y-%m-%d'), hurr.time[-1].strftime('%Y-%m-%d')))
      t0 = hurr.time[0] - pd.Timedelta(days=time_delta)
      t1 = hurr.time[-1] + pd.Timedelta(days=time_delta)

      storm_fName = str(year) + "_" + myBasin + "_" + hurr_names.iloc[idx] + '.nc'
      track_file = dPath + storm_fName
      hurr.to_xarray().to_netcdf(track_file)
      print("Saved track info to:\n{}".format(track_file))

      # Read back the saved data (for formatting) and use it here onward
      hurr=xr.open_dataset(track_file)

      # download SSH data, nuanced than other datasets- needs special treatment
      ssh_data = get_cmems_data(*aviso_l4_ssh(year),
                        hurr.lon.values.min()-dLon, hurr.lon.values.max()+dLon,
                        hurr.lat.values.min()-dLat, hurr.lat.values.max()+dLat,
                        t0.strftime('%Y-%m-%d'), t1.strftime('%Y-%m-%d'))

      # add geostrophic currents to the dataset- eases our life!
      ssh_data['surf_curr'] = xr.DataArray(np.sqrt(ssh_data.ugos**2 + ssh_data.vgos**2),\
                                         coords=ssh_data.ugos.coords,
                                         dims=ssh_data.ugos.dims,
                                         name='surf_curr',
                                         attrs={'units':'m/s'})

      ssh_data_file = dPath +\
      'AVISO_' + str(year)+'_'+hurr_names.iloc[idx]+'.nc'
      ssh_data.to_netcdf(ssh_data_file)
      print("\nSaved SSH data to:\n{}".format(ssh_data_file))

      # download other datasets
      for dtype in [sst, sss, curr, chla]:
        sat_data = get_cmems_data(dtype['dsetID'], dtype['vNames'],
                        hurr.lon.values.min()-dLon, hurr.lon.values.max()+dLon,
                        hurr.lat.values.min()-dLat, hurr.lat.values.max()+dLat,
                        t0.strftime('%Y-%m-%d'), t1.strftime('%Y-%m-%d'))

        sat_data_file = dPath +\
                      dtype['varName'] +'_'+\
                      str(year)+'_'+hurr_names.iloc[idx]+'.nc'
        sat_data.to_netcdf(sat_data_file)
        print("\nSaved {} data to:\n{}".format(dtype['varName'],sat_data_file))

### Save to drive

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

!mkdir -p /content/drive/MyDrive/datasets/hurr/$myBasin/$year/
!mv $dPath/* /content/drive/MyDrive/datasets/hurr/$myBasin/$year/