<a href="https://colab.research.google.com/github/sanAkel/ocean-hurricane/blob/main/prep_data/download_subSurf_GLORYS12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Purpose is to Download
- [Ocean-reanalysis data from GLorys12](https://data.marine.copernicus.eu/product/GLOBAL_ANALYSISFORECAST_PHY_001_024/services). Dataset specific info may change with dataset, you need to fill it in carefully by [looking up details](https://data.marine.copernicus.eu/product/GLOBAL_ANALYSISFORECAST_PHY_001_024/description).

### Needed inputs:
- basin (e.g., `north_atlantic`)
- year (e.g., `2024`)
- time_delta (e.g., `10`)
- dLon, dLat (e.g., `5, 5`)
- path to hurricane track data (see below for details)

## Install modules

In [None]:
!pip install tropycal
!pip install cartopy
!pip install copernicusmarine

In [None]:
import copernicusmarine
from tropycal import tracks
import xarray as xr
import numpy as np
import pandas as pd
import glob as glob

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Read individual storm track data
def get_hurr_data(hurr_file):
  #print("Reading storm track info from:\n{}".format(hurr_file))
  track_ds = xr.open_dataset(hurr_file)
  print("{}, start and end days:\n{} --> {}.\n".format(track_ds.attrs['name'], track_ds.time[0].values, track_ds.time[-1].values))
  return track_ds

In [None]:
def get_subSetDataset(track_file, dtype, time_delta):
  track_ds = get_hurr_data(track_file)
  lat_s, lat_e = [track_ds.lat.min().values, track_ds.lat.max().values]
  lon_s, lon_e = [track_ds.lon.min().values, track_ds.lon.max().values]
  t_s= (track_ds.time[0] - pd.Timedelta(days=time_delta)).values
  t_e= (track_ds.time[-1] + pd.Timedelta(days=time_delta)).values

  dsetID, vNames, varNames = [ dtype['dsetID'], dtype['vNames'], dtype['varName']]
  tmp = f"{year}_{track_ds.attrs['name']}_{varNames}"
  fName = tmp + '_full.nc'
  !copernicusmarine subset --dataset-id $dsetID --username $CMEMS_username --password $CMEMS_passwd -o $tmp --variable $vNames -x $lon_s -X $lon_e -y $lat_s -Y $lat_e -t $t_s -T $t_e --service arco-geo-series
  !mv $tmp/* .config/$fName

  dsFull = xr.open_dataset('.config/'+fName)
  ds_track=subset_dataset(dsFull, track_ds, time_delta)
  ds_track.to_netcdf(f"{tmp}_track.nc")
  !rm -f .config/$fName

In [None]:
# Subset data along track
def subset_dataset(cmems_ds, track_ds, time_delta=0):
  subset_ds=cmems_ds.sel(time=track_ds.time+np.timedelta64(time_delta, 'D'),
                         latitude=track_ds.lat,
                         longitude=track_ds.lon, method="nearest")
  return subset_ds

### User Inputs

In [None]:
# Path to data
# If not using colab/google drive, modify following data_path_root which has
# processed track data or download on the fly, see [this example](https://github.com/sanAkel/ocean-hurricane/blob/main/prep_data/download_SAT_retr.ipynb)

# Basin, year and category
myBasin = 'north_atlantic'
year = 2024
time_delta = 0 # days before/after storm
dLon, dLat = [1, 1] # plot extra data outside track bounds (in degrees)

data_path = '/content/drive/MyDrive/' + f'datasets/hurr/{myBasin}/{year}/'
# Mount drive - to read/save files once done
# If not using colab/google drive, comment following 2 lines.
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
# Set Copernicus credentials
CMEMS_username, CMEMS_passwd = ["sakella", "HbFPyP9M"]

# CMEMS dataset IDs - Reanalysis GLORYS12
# Currents
uCurr={'dsetID':'cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i',
      'vNames':"uo",
      'varName':'SSU'}

vCurr={'dsetID':'cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i',
      'vNames':"vo",
      'varName':'SSV'}

# potential temperature
potT={'dsetID':'cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i',
      'vNames':"thetao",
      'varName':'potT'}

# salinity
sal={'dsetID':'cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i',
      'vNames':"so",
      'varName':'sal'}

## Read (already downloaded) track dataset and download profiles

In [None]:
track_files = sorted(glob.glob(data_path + f'{year}_{myBasin}*.nc'))
print(f"Found {len(track_files)} track files:")

for i, track_file in enumerate(track_files):
  print(f"{i+1}. {track_file}")

  for dtype in [potT, sal, uCurr, vCurr]:
    get_subSetDataset(track_file, dtype, time_delta)
  print("\n")