# **Processing downloaded files**

#### Product: Arctic Ocean Wave Hindcast
#### Dataset: cmems_mod_arc_wav_my_3km_PT1H-i
##### Check link: https://data.marine.copernicus.eu/product/ARCTIC_MULTIYEAR_WAV_002_013/files?subdataset=cmems_mod_arc_wav_my_3km_PT1H-i_202012

In [1]:
import copernicusmarine 
import xarray as xr
from pprint import pprint
import os
import time
import pandas as pd
import numpy as np
import pyproj
import shutil
from datetime import datetime, timedelta
import glob
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import folium
import ipywidgets as widgets
from IPython.display import display

In [3]:
# Set the root of your project
os.chdir(r"C:\Users\Usuario\Projects\artic_wave_model_data_retriever")

In [4]:
class FileProcessor:
    def __init__(self, start_day, end_day):
        """
        Initialize the FileProcessor class.
        
        :param start_day: Begin of the dataset in str format (e.g., "2015-07-04").
        :param end_day: End of the dataset in str format (e.g., "2015-08-04").
        :param variables: List of variables to extract (e.g., ['VHM0', 'VTM10', 'VTM02'...]).
        """
        self.start_day = start_day
        self.end_day = end_day
        self.variables = None
        self.file_names = []
        self.data = None

    def load_files(self):
        """Load the downloaded files."""
        a1 = pd.DataFrame(pd.date_range(start = self.start_day, end = self.end_day, freq = "d"))
        series_dates = a1[0].apply(lambda x: str(x.date()).split("-"))
        file_names = list(series_dates.apply(lambda x: ("").join(x)).apply(lambda x: x+".nc"))
        self.file_names = file_names
        print(f"Files to concat: {file_names}")

    def concat_files(self):
        """Concatenate all files into a single Dataset."""
        if not self.file_names:
            raise ValueError("No files to process. Please run load_files() first.")
        list_size = len(self.file_names)
        ds_merge = xr.Dataset()
        for i in range(list_size):
            try:
                relative_path = os.path.join("data", "filtered_data", self.file_names[i])
                path = os.path.abspath(relative_path)
                ds = xr.open_dataset(path)
            except Exception as e:
                print(f"File is not available. Error: {e}")
            else:
                ds_merge = xr.merge([ds_merge, ds])
        self.data = ds_merge
        self.variables = list(file_.data.data_vars)
        print("Files concatenated successfully.")


    def get_variables(self, var_):
        """
        Concatenate all files into a single Dataset.
        :param var_: variable name in str format (e.g., 'VHM0').
        """
        if self.data is None:
            raise ValueError("No data available. Please run concat_files() first.")

        var = self.data[var_]
        var = var.values
        
        lat = pd.DataFrame(self.data.lat.values)
        lon = pd.DataFrame(self.data.lon.values)
        
        a, b = lat.shape
        len_ = a * b
        coords_array = np.empty((len_), dtype=object)
        
        
        lat = lat.values.flatten()
        lon = lon.values.flatten()
        
        for i in range(len_):
            coords_array[i] = (lat[i], lon[i])
        df = pd.DataFrame()
        for row in range(a):
            for col in range(b):
                df = pd.concat([df, pd.DataFrame(var[:, row, col], index = pd.to_datetime(self.data.time.values))], axis = 1)
        
        df.columns = list(coords_array)
        return df

### We input the file names that we want to process

In [5]:
start_day = "2021-10-03"
end_day = "2021-10-04"
file_ = FileProcessor(start_day, end_day)

In [6]:
file_.load_files()

Files to concat: ['20211003.nc', '20211004.nc']


In [7]:
file_.concat_files()

Files concatenated successfully.


In [8]:
file_.data

In [9]:
list(file_.data.data_vars)

['VHM0', 'VTM10']

In [10]:
VHM0 = file_.get_variables('VHM0')
VHM0

Unnamed: 0,"(59.00807532400741, -3.971224742186061)","(58.99162363678341, -3.934540047252207)","(58.97516117412991, -3.897896236738876)","(58.95868795965881, -3.86129327226354)","(58.94220401694944, -3.8247311153958443)","(58.925709369548585, -3.7882097276581375)","(58.90920404097047, -3.7517290705259887)","(58.89268805469672, -3.7152891054287807)","(58.87616143417641, -3.6788897937502085)","(58.85962420282604, -3.6425310968288174)",...,"(59.06382168388977, -3.3570145989215123)","(59.04716396481478, -3.3206057076233266)","(59.03049570125254, -3.2842379671425754)","(59.013816916763155, -3.2479113370079844)","(58.99712763487293, -3.211625776707494)","(58.98042787907466, -3.1753812456888597)","(58.963717672827386, -3.139177703360112)","(58.9469970395566, -3.10301510909013)","(58.93026600265408, -3.0668934222091604)","(58.913524585477994, -3.0308126020093415)"
2021-10-03 00:00:00,1.596015,1.554973,1.515626,1.476385,1.433619,1.482739,1.482739,1.464469,1.453067,1.449237,...,1.130216,,,,,,,,0.372861,0.294203
2021-10-03 01:00:00,1.439327,1.401836,1.367349,1.333672,1.297113,1.344235,1.344235,1.328519,1.318983,1.316782,...,1.063631,,,,,,,,0.219474,0.169379
2021-10-03 02:00:00,1.323691,1.292456,1.263741,1.23542,1.204308,1.241406,1.241406,1.226075,1.216195,1.212865,...,1.002697,,,,,,,,0.116553,0.095351
2021-10-03 03:00:00,1.260976,1.23636,1.213906,1.192244,1.167498,1.197389,1.197389,1.174728,1.158574,1.151007,...,0.953209,,,,,,,,0.077676,0.075067
2021-10-03 04:00:00,1.229525,1.210613,1.195442,1.187782,1.173942,1.209639,1.209639,1.199511,1.195728,1.184647,...,0.923641,,,,,,,,0.110459,0.178031
2021-10-03 05:00:00,1.307501,1.296053,1.286791,1.279256,1.269924,1.300906,1.300906,1.293964,1.291469,1.294591,...,0.99092,,,,,,,,0.3448,0.363622
2021-10-03 06:00:00,1.481881,1.477032,1.473994,1.473686,1.472541,1.504785,1.504785,1.522374,1.541256,1.562181,...,1.23437,,,,,,,,0.41469,0.468749
2021-10-03 07:00:00,1.785567,1.792661,1.798585,1.803468,1.806874,1.802698,1.802698,1.802624,1.800272,1.802897,...,1.731721,,,,,,,,0.514338,0.59509
2021-10-03 08:00:00,2.086766,2.084356,2.081857,2.070845,2.058556,2.026905,2.026905,2.016697,1.998457,1.981619,...,2.030934,,,,,,,,0.664811,0.710953
2021-10-03 09:00:00,2.374278,2.352397,2.324003,2.280247,2.24279,2.173449,2.173449,2.130054,2.077386,2.02779,...,2.176679,,,,,,,,0.708042,0.685632


In [11]:
VTM10 = file_.get_variables('VTM10')
VTM10

Unnamed: 0,"(59.00807532400741, -3.971224742186061)","(58.99162363678341, -3.934540047252207)","(58.97516117412991, -3.897896236738876)","(58.95868795965881, -3.86129327226354)","(58.94220401694944, -3.8247311153958443)","(58.925709369548585, -3.7882097276581375)","(58.90920404097047, -3.7517290705259887)","(58.89268805469672, -3.7152891054287807)","(58.87616143417641, -3.6788897937502085)","(58.85962420282604, -3.6425310968288174)",...,"(59.06382168388977, -3.3570145989215123)","(59.04716396481478, -3.3206057076233266)","(59.03049570125254, -3.2842379671425754)","(59.013816916763155, -3.2479113370079844)","(58.99712763487293, -3.211625776707494)","(58.98042787907466, -3.1753812456888597)","(58.963717672827386, -3.139177703360112)","(58.9469970395566, -3.10301510909013)","(58.93026600265408, -3.0668934222091604)","(58.913524585477994, -3.0308126020093415)"
2021-10-03 00:00:00,7.557685,7.63255,7.72419,7.822552,7.916584,7.991724,7.991724,8.043538,8.096572,8.147543,...,9.013878,,,,,,,,2.71335,2.62429
2021-10-03 01:00:00,7.783062,7.87768,7.983575,8.093501,8.196687,8.233541,8.233541,8.281526,8.325846,8.359483,...,8.99369,,,,,,,,2.764838,2.817108
2021-10-03 02:00:00,7.932912,8.027678,8.130668,8.234233,8.328894,8.32559,8.32559,8.36644,8.400282,8.420898,...,8.987943,,,,,,,,3.226628,3.513907
2021-10-03 03:00:00,7.830904,7.888777,7.948225,7.996599,8.034964,7.971189,7.971189,8.074048,8.158908,8.208648,...,8.904863,,,,,,,,3.92987,3.773921
2021-10-03 04:00:00,7.62946,7.64674,7.64741,7.580526,7.532952,7.390951,7.390951,7.369312,7.327556,7.387359,...,8.660188,,,,,,,,2.434951,1.853307
2021-10-03 05:00:00,6.899475,6.870988,6.841574,6.802554,6.752551,6.703015,6.703015,6.677056,6.651811,6.62243,...,7.468988,,,,,,,,2.209787,2.320894
2021-10-03 06:00:00,6.212608,6.175075,6.137016,6.086462,6.02468,5.970464,5.970464,5.864546,5.785298,5.723865,...,6.182804,,,,,,,,2.437872,2.521598
2021-10-03 07:00:00,5.638175,5.609641,5.585284,5.560485,5.529508,5.538452,5.538452,5.523015,5.518322,5.514637,...,5.396562,,,,,,,,2.60198,2.682933
2021-10-03 08:00:00,5.521145,5.506173,5.491102,5.479487,5.461992,5.448322,5.448322,5.437823,5.445506,5.457989,...,5.454087,,,,,,,,2.754695,2.845783
2021-10-03 09:00:00,5.608267,5.591861,5.577915,5.568995,5.549715,5.528897,5.528897,5.529287,5.543168,5.548315,...,5.626083,,,,,,,,2.911693,2.934298
