# Download data from [Mercator 1/12-deg reanalysis](https://data.marine.copernicus.eu/product/GLOBAL_ANALYSISFORECAST_PHY_001_024/description)

In [None]:
import copernicusmarine

import numpy as np
import xarray as xr
import pandas as pd

import cartopy.crs as ccrs
import cartopy.feature as cfeature

import glob as glob

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

# User inputs
## To- do: parse following user inputs, including which ocean datasets to read

In [None]:
# days before/after storm passage
nDays = 30

# storm info
data_path_tc = '/Users/sakella/10june2024/intern-summer2024/get_track_and_vars/saved_data/' # path to saved data
year = 2023 # year
tc_name = 'franklin' # name of tropical cyclone (TC)

In [None]:
# Copernicus marine dataset
# https://catalogue.marine.copernicus.eu/documents/PUM/CMEMS-GLO-PUM-001-024.pdf
# ----
# Which ocean dataset to read?
# Hourly mean surface (2d) fields: cmems_mod_glo_phy_anfc_0.083deg_PT1H-m
# Instantaneous (inst) 6-hourly 3d potential temperature: cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i
# inst 6hr 3d salinity: cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i
# inst 6hr 3d currents: cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i
# ----

# Set parameters
data_request = {
    "dataset_id" : "cmems_mod_glo_phy_anfc_0.083deg_PT1H-m", 
    "longitude" : [-180, 180], 
    "latitude" : [-80, 90],
    "variables" : ["thetao", "so", "uo", "vo", "zos"] # changes based on dataset_id
}

# Gather datasets:

## Storm dataset that was already prepared [using this notebook.](https://github.com/sanAkel/intern-summer2024/blob/main/get_track_and_vars/get_track.ipynb)

In [None]:
tc_data = xr.open_dataset(data_path_tc + tc_name + '_' + str(year) + '.nc')

## Load ocean reanalysis dataset (surface fields).

In [None]:
glo12_hrly_sfc=copernicusmarine.open_dataset(
    dataset_id = data_request["dataset_id"],
    minimum_longitude = data_request["longitude"][0],
    maximum_longitude = data_request["longitude"][1],
    minimum_latitude = data_request["latitude"][0],
    maximum_latitude = data_request["latitude"][1],
    variables = data_request["variables"]
)

## Subset data along path of the tropical cyclone (Lagrangian sampling)

In [None]:
for tk in range(0, len(tc_data.time)):
    xk = tc_data.isel(time=tk)['lon'].values
    yk = tc_data.isel(time=tk)['lat'].values
    Tk = tc_data.isel(time=tk)['time'].values
    ds_subset = glo12_hrly_sfc.isel(depth=0).sel(time=Tk,longitude=xk,latitude=yk, method='nearest')

    # Save each of the subset files and concatenate later
    timeStr = tc_data.time.dt.strftime("%Y%m%d_%H%M").values[tk]
    fName_save = data_path_tc + tc_name + '_' + str(year) + '_' +\
                 data_request['dataset_id'] + '_' + timeStr + '.nc'
    ds_subset.to_netcdf(fName_save)
    print("Saved data for:\t{}; lon, lat=[{}, {}]".format(timeStr, xk, yk))

In [None]:
dss = xr.open_mfdataset("saved_data/franklin_2023_cmems_mod_glo_phy_anfc_0.083deg_PT1H-m_*.nc", combine='nested', concat_dim="time")

In [None]:
plt.scatter(dss.longitude.values, dss.latitude.values, s=4, c=dss.thetao.values, marker='o', cmap=plt.cm.Spectral)
plt.colorbar()

#dss.thetao.plot()

## Sanity check to make sure all output files were saved and combine them

In [None]:
fStr = data_path_tc + tc_name + '_' + str(year) + '_' + data_request['dataset_id'] + '_*' + '.nc'
nFiles_saved = len( glob.glob(fStr))
if  nFiles_saved == len(tc_data.time):
    ds = xr.open_mfdataset(fStr)
    fName_save = data_path_tc + tc_name + '_' + str(year) + '_' + data_request['dataset_id']+'.nc'
    ds.to_netcdf(fName_save)
    print("Saved combined output to:\n{}".format(fName_save))

## Subset data for +/- `nDays` before after storm (Eulerian sampling) - to do later.

In [None]:
#tc_date_s, tc_date_e = [tc_data["time"].dt.strftime("%Y-%m-%d").values[0], tc_data["time"].dt.strftime("%Y-%m-%d").values[0]]

#sample_date_s,  sample_date_e = [pd.date_range(end=tc_date_s, periods=nDays)[0], 
#                pd.date_range(start=tc_date_e, periods=nDays)[-1]]
#print("\nSubsetting ocean reanalysis data between:\n{}\t-\t{}.\n".format(sample_date_s,  sample_date_e))
#glo12_hrly_sfc_subset = glo12_hrly_sfc.sel(time=slice(sample_date_s, sample_date_e))
#fName_save = data_path_tc + tc_name + '_' + str(year) + '_' + data_request['dataset_id'] + '.nc'
#glo12_hrly_sfc_subset.to_netcdf(fName_save)
#print("Saved data to:\n{}".format(fName_save))