<a href="https://colab.research.google.com/github/sanAkel/ocean-hurricane/blob/main/mercator_glorys12_cmems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# To download data from [Mercator Glorys 1/12-deg reanalysis](https://data.marine.copernicus.eu/product/GLOBAL_ANALYSISFORECAST_PHY_001_024/description)



In [None]:
# User inputs, should be consistent with those in:
# https://github.com/sanAkel/ocean-hurricane/blob/main/get_track.ipynb

# Basin, year and category
myBasin = 'north_atlantic'
year = 2024
cat_threshold = 4

# Buffer (time and space)
day_buffer = 2 # extract CMEMS data hurricane start to end date plus/minus buffer
lat_buffer, lon_buffer = [10, 10] # extent of data to extract with respect to storm track latitude/longitude extent

## Install API

In [None]:
!pip install copernicusmarine

In [None]:
import copernicusmarine

import pandas as pd
import numpy as np
import xarray as xr

import warnings
warnings.filterwarnings("ignore")

## Set up environment for the [Copernicus Marine Toolbox.](https://help.marine.copernicus.eu/en/articles/7949409-copernicus-marine-toolbox-introduction)

In [None]:
print(copernicusmarine.__version__)
copernicusmarine.login(username="sakella", password="HbFPyP9M")

In [None]:
# Copernicus marine dataset
# https://catalogue.marine.copernicus.eu/documents/PUM/CMEMS-GLO-PUM-001-024.pdf
# ----
# Which ocean dataset to read?
# Hourly mean surface (2d) fields: cmems_mod_glo_phy_anfc_0.083deg_PT1H-m
# Instantaneous (inst) 6-hourly 3d potential temperature: cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i
# inst 6hr 3d salinity: cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i
# inst 6hr 3d currents: cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i
# ----

# Set parameters
data_request = {
    "dataset_id" : "cmems_mod_glo_phy_anfc_0.083deg_PT1H-m",
    "longitude" : [-180, 180],
    "latitude" : [-80, 90],
    "variables" : ["thetao", "so", "uo", "vo", "zos"] # changes based on dataset_id
}

In [None]:
cms_data =copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    variables = data_request["variables"]
)

In [None]:
print("\nFirst available date for this dataset:\t{}".format(cms_data.isel(time=0).time.values.astype(str).split('T')[0]))

## Read the processed/downloaded hurricane data. It was generated using [this notebook.](https://github.com/sanAkel/ocean-hurricane/blob/main/get_track.ipynb)

In [None]:
# Mount drive - to save files once done
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Read downloaded track data, subset and save CMEMS data for the hurricane dates
input_data_path = '/content/drive/MyDrive/datasets/hurr/{}/'.format(year)
input_fName = input_data_path + 'hurdat2_{}_{}.csv'.format(myBasin, year)
print("Reading {} summary data from:\n{}".format(year, input_fName))

season_data=pd.read_csv(input_fName)
major_hurr_names = season_data['name'][season_data['category'] >=cat_threshold]
major_hurr_ids = season_data['id'][season_data['category'] >=cat_threshold]

print("\n\nStorms that had a category >= {} are following:\n".format(cat_threshold))
for hurr in major_hurr_names:
    print(hurr)

In [None]:
# Read storm data and "download" data
for idx, hurr_id in enumerate(major_hurr_ids):
    print("{}, ID: [{}]".format(major_hurr_names.iloc[idx], hurr_id))
    input_hurr_file = str(year) + "_" + myBasin + "_" + major_hurr_names.iloc[idx] + '.nc'
    print("Processed file name:\t{}".format(input_data_path + input_hurr_file))
    track_ds = xr.open_dataset(input_data_path + input_hurr_file)
    print("Start and end days:\n{}--\t{}.\n".format(track_ds.time[0].values, track_ds.time[-1].values))

    ts, te = [track_ds.time[0].values- np.timedelta64(day_buffer, 'D'), track_ds.time[-1].values + np.timedelta64(day_buffer, 'D')]
    lat_s, lat_e = [track_ds.lat[0].values-lat_buffer, track_ds.lat[-1].values+lat_buffer]
    lon_s, lon_e = [track_ds.lon[-1].values-lon_buffer, track_ds.lon[0].values+lon_buffer] # Western hemisphere negative lon
    hurr_subset=cms_data.sel(time=slice(ts, te), latitude=slice(lat_s, lat_e), longitude=slice(lon_s, lon_e))

    output_hurr_file = str(year) + "_" + myBasin + "_" + major_hurr_names.iloc[idx] + '_GLORYS12.nc'
    print("Output file name:\t{}\n\n".format(input_data_path + output_hurr_file))
    hurr_subset.to_netcdf(input_data_path + output_hurr_file)