# **Code to retrieve data from Marine Copernicus**

#### Product: Arctic Ocean Wave Hindcast
#### Dataset: cmems_mod_arc_wav_my_3km_PT1H-i
##### Check link: https://data.marine.copernicus.eu/product/ARCTIC_MULTIYEAR_WAV_002_013/files?subdataset=cmems_mod_arc_wav_my_3km_PT1H-i_202012

In [1]:
import copernicusmarine 
import xarray as xr
from pprint import pprint
import os
import time
import pandas as pd
import numpy as np
import pyproj
import shutil
from datetime import datetime, timedelta
import glob
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import folium
import ipywidgets as widgets
from IPython.display import display

In [2]:
# Set the root of your project
os.chdir(r"C:\Users\LuisEnriqueBarrenoSu\OneDrive - CorPower Ocean AB\Desktop\Copernicus_Project")

In [6]:
from dotenv import load_dotenv

load_dotenv(".env")

Copernicus_username = os.getenv("COPERNICUS_USERNAME")
Copernicus_password = os.getenv("COPERNICUS_PASSWORD")

## We define first a window of data

In [7]:
#Open a file from any day to explore
#Input the file name that you downloaded and colocated in the "data" folder.

file_name = "20170101_MyWam3km_hindcast-cmems.nc"
relative_path = os.path.join("data", "raw_data", file_name)
file_path = os.path.abspath(relative_path)
DS = xr.open_dataset(file_path)
DS

### Input the coordinates and define the area:

In [8]:
# Define a window of data that contains the coordinates that we want:
point_coord = 58.97230640333324, -3.370402302654759         # coordinates of interest
min_lat = 58.9
max_lat = 59
min_lon = -4
max_lon = -3


In [9]:
# Define the polar stereographic projection 
proj_stere = pyproj.Proj("+proj=stere +lon_0=-45 +lat_0=90 +k=1 +R=6371000 +no_defs")

# Transform geographic coordinates into polar stereographic grid coordinates

rlon_min, rlat_min = proj_stere(min_lon, max_lat)
rlon_max, rlat_max = proj_stere(max_lon, min_lat)

reg_lon = slice(rlon_min, rlon_max)     #longitude
reg_lat = slice(rlat_min, rlat_max)    #latitude

In [10]:
#Check that the date is fine according to the downloaded file
VHM0_example = DS["VHM0"].sel(rlon = reg_lon, rlat = reg_lat)
VHM0_example

In [11]:
lat_ = VHM0_example.lat.values
lon_ = VHM0_example.lon.values
row, col = lat_.shape
lst_coord = []
for i in range(row):

    for j in range(col):
        lst_coord.append((lat_[i, j], lon_[i, j]))

In [12]:
# Confirm in the map that these are the points that you want, Otherwise you should adjust the window define by the max and min longitute and latitude.

# Create a map centered on the average of the coordinates
center_lat = sum(lat for lat, lon in lst_coord) / len(lst_coord)
center_lon = sum(lon for lat, lon in lst_coord) / len(lst_coord)
m = folium.Map(location=[center_lat, center_lon], zoom_start=10)

# Blue points available in the marine weather model
for lat, lon in lst_coord:
    folium.CircleMarker(
        location=(lat, lon),
        radius=4,  # Size of the circle
        color='blue',
        popup=(lat, lon),
        fill=True,
        fill_color='blue',
        fill_opacity=0.7
    ).add_to(m)


# Point of interest in red
for lat, lon in [point_coord]:
    folium.CircleMarker(
        location=(lat, lon),
        radius=4,  # Size of the circle
        color='red',
        popup=(lat, lon),
        fill=True,
        fill_color='red',
        fill_opacity=0.7
    ).add_to(m)

# Save or display the map
m

##### We confirm that this is the area of data that we want, otherwise we change min_lat, max_lat, min_lon, max_lon

## Data Retrieval

### Variables

1.   **VHM0** [m]:
Total significant wave height (Hm0)
2.   **VTM10** [s]:
Total mean period
3.   **VTM02** [s]:
Total m2-period
4.   **VTPK** [s]:
Total peak period
5.   **VMDR** [degree]:
Total mean wave direction
6.   **VPED** [degree]:
peak direction
7.   **VSDX** [m s-1]:
x-comp. Stokes drift
8.   **VSDY** [m s-1]:
y-comp. Stokes drift
9.   **VHM0_WW** [m]:
Sea significant wave height
10.   **VTM01_WW** [s]:
Sea m1-period
11.   **VMDR_WW** [degree]:
Sea mean wind direction
12.   **VHM0_SW1** [m]:
first swell significant wave height
13.   **VTM01_SW1** [s]:
first swell mean period
14.   **VMDR_SW1** [degree]:
first swell direction
15.   **VHM0_SW2** [m]:
second swell significant wave height
16.   **VTM01_SW2** [s]:
second swell mean period
17.   **VMDR_SW2** [degree]:
second swell direction
18.   **SIC** [1]:
sea ice concentration
19.   **SIT** [m]:
sea_ice_area_thickness




### Select the variables:

In [13]:
# Define your variables
variables = ['VHM0', 'VTM10', 'VTM02', 'VTPK', 'VMDR',
             'VPED', 'VSDX', 'VSDY', 'VHM0_WW', 'VTM01_WW',
             'VMDR_WW', 'VHM0_SW1', 'VTM01_SW1', 'VMDR_SW1', 'VHM0_SW2',
             'VTM01_SW2', 'VMDR_SW2', 'SIC', 'SIT']

# Create a SelectMultiple widget
multi_select = widgets.SelectMultiple(
    options=variables,
    value=[],  # Initial selection
    description='Variables:',
    disabled=False
)

# Display the widget
display(multi_select)

# Get the selected variables
def on_change(change):
    print(f"Selected variables: {list(multi_select.value)}")

# Attach an observer
multi_select.observe(on_change, names='value')

# Select the variables with Control + clic on the list

SelectMultiple(description='Variables:', options=('VHM0', 'VTM10', 'VTM02', 'VTPK', 'VMDR', 'VPED', 'VSDX', 'V…

In [14]:
lst_variables = list(multi_select.value)
lst_variables

['VHM0', 'VTM10']

#### Data available from **January 1977** to **June 2024**.
### Input time range:

In [16]:
# There are one file per day 
# Input the start_day and end_day
start_day = "2021-10-03"
end_day = "2021-10-04"

In [17]:
a = pd.DataFrame(pd.date_range(start = start_day, end = end_day, freq = "d"))
series_dates = a[0].apply(lambda x: str(x.date()).split("-"))
b = list(series_dates.apply(lambda x: ("").join(x)))
days_ = len(b)

### Input your credentials:

In [18]:
for i in range(0, days_, 1):

    try:
    
        copernicusmarine.get(
            dataset_id = "cmems_mod_arc_wav_my_3km_PT1H-i",
            username = Copernicus_username,
            password = Copernicus_password,
            force_download = True,
            filter = f"*{b[i]}*"
        )

        file_name = f"{b[i]}_MyWam3km_hindcast-cmems.nc"
    
        relative_path = os.path.join(
            "ARCTIC_MULTIYEAR_WAV_002_013", 
            "cmems_mod_arc_wav_my_3km_PT1H-i_202012", # latest version is used 202012
            series_dates[0][0], # the year is fixed
            series_dates[i][1], 
            file_name
        )
        
        file_path = os.path.abspath(relative_path)
        DS = xr.open_dataset(file_path)
        
    except Exception as e: 
        print(f"File is not available in the cmems_mod_arc_wav_my_3km_PT1H-i dataset, check the date (before June 2024 after January 1977).\n")
        print(f"Error: {e}")
    
    else:
        # I filter the data only ofor the selected variables and for the previous defined region.
        subset = DS[lst_variables]
        filtered_dataset = subset.sel(rlon = reg_lon, rlat = reg_lat)
    
        # data storage in NetCDF format 
        folder_path = "./data/filtered_data"
        os.makedirs(folder_path, exist_ok=True)
        file_name = os.path.join(folder_path, b[i] + ".nc")
        filtered_dataset.to_netcdf(file_name)
    
        # Close the dataset before deleting the file
        DS.close()
    
        # Delete the original Dataset to not save  unwanted data
        if os.path.exists(file_path):
            os.remove(file_path)
            print(f"{file_path} was removed.")
        else:
            print(f"{file_path} not found.")


Fetching catalog: 100%|██████████████████████████████████████████████████████████████████| 3/3 [00:18<00:00,  6.07s/it]
INFO - 2025-01-10T06:26:13Z - Dataset version was not specified, the latest one was selected: "202012"
INFO - 2025-01-10T06:26:13Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2025-01-10T06:26:13Z - Service was not specified, the default one was selected: "original-files"
INFO - 2025-01-10T06:26:13Z - Downloading using service original-files...
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:58<00:00, 58.93s/it]


C:\Users\LuisEnriqueBarrenoSu\OneDrive - CorPower Ocean AB\Desktop\Copernicus_Project\ARCTIC_MULTIYEAR_WAV_002_013\cmems_mod_arc_wav_my_3km_PT1H-i_202012\2021\10\20211003_MyWam3km_hindcast-cmems.nc was removed.


INFO - 2025-01-10T06:27:25Z - Dataset version was not specified, the latest one was selected: "202012"
INFO - 2025-01-10T06:27:25Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2025-01-10T06:27:25Z - Service was not specified, the default one was selected: "original-files"
INFO - 2025-01-10T06:27:25Z - Downloading using service original-files...
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:57<00:00, 57.32s/it]


C:\Users\LuisEnriqueBarrenoSu\OneDrive - CorPower Ocean AB\Desktop\Copernicus_Project\ARCTIC_MULTIYEAR_WAV_002_013\cmems_mod_arc_wav_my_3km_PT1H-i_202012\2021\10\20211004_MyWam3km_hindcast-cmems.nc was removed.
