<a href="https://colab.research.google.com/github/sanAkel/ocean-hurricane/blob/main/download_along_track.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Download following data along TC track:
- Mean sea level pressure (MSLP), wind speed from the [NHC Hurtdat](https://www.nhc.noaa.gov/data/#hurdat).
- From: [AVISO L4](https://data.marine.copernicus.eu/products?facets=mainVariables%7ESea+surface+height--areas%7EGlobal+Ocean--tempResolutions%7EDaily--sources%7ESatellite+observations):
  - Geostrophic currents.
  - ADT.
  - SLA and error.
- From [Mercator GLORYS12](https://data.marine.copernicus.eu/product/GLOBAL_ANALYSISFORECAST_PHY_001_024/description).
  - Currents.
  - Potential temperature.
  - Salinity.
  - SSH.

Save to a `.nc` file.

## User inputs

In [None]:
# Path to data
# If not using colab/google drive, modify following data_path_root which has
# processed track data or download on the fly, see below note.
data_path_root = '/content/drive/MyDrive/' + 'datasets/hurr/'

# Basin, year and category
myBasin = 'north_atlantic'
year = 2017
cat_threshold = 4

### AVISO L4 dataset ID changes across year 2022.

In [None]:
# AVISO L4 dataset id
if year < 2022:
  dsetID_aviso = "c3s_obs-sl_glo_phy-ssh_my_twosat-l4-duacs-0.25deg_P1D"
  vNames_aviso = ["adt", "sla", "err_sla", "ugos", "vgos"]
else:
  dsetID_aviso = "cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.25deg_P1D"
  vNames_aviso = ["adt", "sla", "err_sla", "ugos", "vgos"]

### GLORYS12 dataset does not provide hourly data, changes from daily mean to hourly.
See [this notes for details.](https://github.com/sanAkel/ocean-hurricane/blob/main/cmems-api.md)

In [None]:
if year < 2021:
  dsetID_glorys12 = "cmems_mod_glo_phy_my_0.083deg_P1D-m"
  vNames_glorys12 = ["thetao", "so", "uo", "vo", "zos"]
elif year ==2021: # Handling cat >=4 storms, 2021/08 onwards ok.
  dsetID_glorys12 = "cmems_mod_glo_phy_myint_0.083deg_P1D-m"
  vNames_glorys12 = ["thetao", "so", "uo", "vo", "zos"]
else: # 2022/06 onward, hourly and fields are split across dataSets.

  # Surface
  dsetID_glorys12_surf = "cmems_mod_glo_phy_anfc_0.083deg_PT1H-m"
  vNames_glorys12_surf = ["thetao", "so", "uo", "vo", "zos"]

  # 3D
  dsetID_glorys12_curr = "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i"
  vNames_glorys12_curr = ["uo", "vo"]
  dsetID_glorys12_thetao = "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i"
  vNames_glorys12_thetao = ["thetao"]
  dsetID_glorys12_so = "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i"
  vNames_glorys12_so = ["so"]

### Install [Copernicus Marine Toolbox](https://help.marine.copernicus.eu/en/articles/7949409-copernicus-marine-toolbox-introduction)

In [None]:
!pip install copernicusmarine

In [None]:
import copernicusmarine

import pandas as pd
import numpy as np
import xarray as xr

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Read downloaded hurricane season (per year) summary data
def get_hurr_season_data(data_path_root, year, myBasin, cat_threshold):
  track_data_path = data_path_root + '/{}/'.format(year)
  track_fName = track_data_path + 'hurdat2_{}_{}.csv'.format(myBasin, year)
  print("Reading {} summary data from:\n{}".format(year, track_fName))
  season_data=pd.read_csv(track_fName)
  major_hurr_names = season_data['name'][season_data['category'] >=cat_threshold]
  major_hurr_ids = season_data['id'][season_data['category'] >=cat_threshold]
  return major_hurr_names, major_hurr_ids

# Read individual storm track data
def get_hurr_data(data_path_root, year, myBasin, hurr_name):
  data_path = data_path_root + '/{}/'.format(year)
  hurr_file = str(year) + "_" + myBasin + "_" + hurr_name + '.nc'
  print("Processed file name:\t{}".format(data_path+hurr_file))
  track_ds = xr.open_dataset(data_path + hurr_file)
  print("Start and end days:\n{} --> {}.\n".format(track_ds.time[0].values, track_ds.time[-1].values))
  return track_ds

In [None]:
# Lazily load CMEMS dataset
def get_cmems_dataset(dsetID, vNames):
  # Set parameters
  data_request = {"dataset_id" : dsetID,
                  "longitude" : [-180, 180],
                  "latitude" : [-80, 90],
                  "variables" : vNames}

  # Open dataset
  cmems_ds =copernicusmarine.open_dataset(dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    variables = data_request["variables"])

  return cmems_ds

# Subset data along track
def subset_dataset(cmems_ds, track_ds):
  subset_ds=cmems_ds.sel(time=track_ds.time, latitude=track_ds.lat, longitude=track_ds.lon, method="nearest")
  return subset_ds

### Set up API env

In [None]:
print(copernicusmarine.__version__)
copernicusmarine.login(username="sakella", password="HbFPyP9M")

### Read the processed/downloaded hurricane data.
- It was generated using [this notebook.](https://github.com/sanAkel/ocean-hurricane/blob/main/get_track.ipynb)
- Or fetch on the fly- follow instructions in above notebook.

In [None]:
# Mount drive - to read/save files once done
# If not using colab/google drive, comment following 2 lines.
from google.colab import drive
drive.mount('/content/drive')

### Sequence of steps:
1. Read track data for the year/season.
2. Download AVISO L4 and GLORYS12 data along the track (date and location).

In [None]:
# Read downloaded track data
major_hurr_names, major_hurr_ids = get_hurr_season_data(data_path_root, year, myBasin, cat_threshold)

print("\n\n{} {} storms that had a category >= {} are following:\n".format(myBasin, year, cat_threshold))
for hurr in major_hurr_names:
    print(hurr)

# For each storm, fetch CMEMS data

for idx, hurr_id in enumerate(major_hurr_ids):
    print("\n{}, ID: [{}]".format(major_hurr_names.iloc[idx], hurr_id))
    track_ds=get_hurr_data(data_path_root, year, myBasin, major_hurr_names.iloc[idx])

    aviso_ds = get_cmems_dataset(dsetID_aviso, vNames_aviso)
    aviso_track_ds=subset_dataset(aviso_ds, track_ds)

    if year <= 2021:
      glorys12_ds = get_cmems_dataset(dsetID_glorys12, vNames_glorys12)
      glorys12_track_ds=subset_dataset(glorys12_ds, track_ds)
    else:
      glorys12_ds_surf = get_cmems_dataset(dsetID_glorys12_surf, vNames_glorys12_surf) # surface fields
      glorys12_track_ds_surf=subset_dataset(glorys12_ds_surf, track_ds)

      glorys12_ds_curr = get_cmems_dataset(dsetID_glorys12_curr, vNames_glorys12_curr) # currents
      glorys12_track_ds_curr=subset_dataset(glorys12_ds_curr, track_ds)

      glorys12_ds_thetao = get_cmems_dataset(dsetID_glorys12_thetao, vNames_glorys12_thetao) # potential temperature
      glorys12_track_ds_thetao=subset_dataset(glorys12_ds_thetao, track_ds)

      glorys12_ds_so = get_cmems_dataset(dsetID_glorys12_so, vNames_glorys12_so) # salinity
      glorys12_track_ds_so=subset_dataset(glorys12_ds_so, track_ds)

      # merge subset datasets
      glorys12_track_ds = xr.merge([glorys12_track_ds_surf, glorys12_track_ds_curr, glorys12_track_ds_thetao, glorys12_track_ds_so])

    # Save data
    aviso_hurr_file = str(year) + "_" + myBasin + "_" + major_hurr_names.iloc[idx] + '_AVISO.nc'
    glorys12_hurr_file = str(year) + "_" + myBasin + "_" + major_hurr_names.iloc[idx] + '_GLORYS12.nc'
    print("Saving output files:\nAVISO:\t{}\nGLORYS12:\t{}\n".format(aviso_hurr_file, glorys12_hurr_file))
    aviso_track_ds.to_netcdf(data_path_root+'/{}/'.format(year)+ aviso_hurr_file)
    glorys12_track_ds.to_netcdf(data_path_root+'/{}/'.format(year)+ glorys12_hurr_file)