# **Processing downloaded files**

#### Product: Arctic Ocean Wave Hindcast
#### Dataset: cmems_mod_arc_wav_my_3km_PT1H-i
##### Check link: https://data.marine.copernicus.eu/product/ARCTIC_MULTIYEAR_WAV_002_013/files?subdataset=cmems_mod_arc_wav_my_3km_PT1H-i_202012

In [1]:
import copernicusmarine 
import xarray as xr
from pprint import pprint
import os
import time
import pandas as pd
import numpy as np
import pyproj
import shutil
from datetime import datetime, timedelta
import glob
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import folium
import ipywidgets as widgets
from IPython.display import display

In [2]:
# Set the root of your project
os.chdir(r"C:\Users\LuisEnriqueBarrenoSu\OneDrive - CorPower Ocean AB\Desktop\Copernicus_Project")

In [3]:
class FileProcessor:
    def __init__(self, start_day, end_day):
        """
        Initialize the FileProcessor class.
        
        :param start_day: Begin of the dataset in str format (e.g., "2015-07-04").
        :param end_day: End of the dataset in str format (e.g., "2015-08-04").
        :param variables: List of variables to extract (e.g., ['VHM0', 'VTM10', 'VTM02'...]).
        """
        self.start_day = start_day
        self.end_day = end_day
        self.variables = None
        self.file_names = []
        self.data = None

    def load_files(self):
        """Load the downloaded files."""
        a1 = pd.DataFrame(pd.date_range(start = self.start_day, end = self.end_day, freq = "d"))
        series_dates = a1[0].apply(lambda x: str(x.date()).split("-"))
        file_names = list(series_dates.apply(lambda x: ("").join(x)).apply(lambda x: x+".nc"))
        self.file_names = file_names
        print(f"Files to concat: {file_names}")

    def concat_files(self):
        """Concatenate all files into a single Dataset."""
        if not self.file_names:
            raise ValueError("No files to process. Please run load_files() first.")
        list_size = len(self.file_names)
        ds_merge = xr.Dataset()
        for i in range(list_size):
            try:
                relative_path = os.path.join("data", "filtered_data", self.file_names[i])
                path = os.path.abspath(relative_path)
                ds = xr.open_dataset(path)
            except Exception as e:
                print(f"File is not available. Error: {e}")
            else:
                ds_merge = xr.merge([ds_merge, ds])
        self.data = ds_merge
        self.variables = list(file_.data.data_vars)
        print("Files concatenated successfully.")


    def get_variables(self, var_):
        """
        Concatenate all files into a single Dataset.
        :param var_: variable name in str format (e.g., 'VHM0').
        """
        if self.data is None:
            raise ValueError("No data available. Please run concat_files() first.")

        var = self.data[var_]
        var = var.values
        
        lat = pd.DataFrame(self.data.lat.values)
        lon = pd.DataFrame(self.data.lon.values)
        
        a, b = lat.shape
        len_ = a * b
        coords_array = np.empty((len_), dtype=object)
        
        
        lat = lat.values.flatten()
        lon = lon.values.flatten()
        
        for i in range(len_):
            coords_array[i] = (lat[i], lon[i])
        df = pd.DataFrame()
        for row in range(a):
            for col in range(b):
                df = pd.concat([df, pd.DataFrame(var[:, row, col], index = pd.to_datetime(self.data.time.values))], axis = 1)
        
        df.columns = list(coords_array)
        return df

### We input the file names that we want to process

In [4]:
start_day = "2023-07-18"
end_day = "2023-07-20"
file_ = FileProcessor(start_day, end_day)

In [5]:
file_.load_files()

Files to concat: ['20230718.nc', '20230719.nc', '20230720.nc']


In [6]:
file_.concat_files()

Files concatenated successfully.


In [7]:
file_.data

In [8]:
list(file_.data.data_vars)

['VHM0', 'VTM10']

In [None]:
VHM0 = file_.get_variables('VHM0')
VHM0

Unnamed: 0,"(67.82002401780537, 11.596249921698863)","(67.79833048824537, 11.634100831654624)","(67.77662914111656, 11.671875950675812)","(67.75492000819499, 11.70957547343371)","(67.73320312115125, 11.747199594083042)","(67.71147851155068, 11.784748506262789)","(67.68974621085398, 11.822222403096958)","(67.66800625041746, 11.859621477195402)","(67.64625866149346, 11.896945920654622)","(67.62450347523078, 11.934195925058571)",...,"(67.94444461900373, 12.615520222746914)","(67.92249100323113, 12.652556500557955)","(67.9005300490677, 12.689517331015502)","(67.87856178737093, 12.72640291376011)","(67.85658624889335, 12.76321344786976)","(67.83460346428299, 12.79994913186097)","(67.81261346408365, 12.836610163689926)","(67.79061627873547, 12.873196740753581)","(67.76861193857516, 12.909709059890805)","(67.74660047383658, 12.946147317383483)"
2023-07-18 00:00:00,2.615168,2.622952,2.627368,2.622814,2.622814,2.610756,2.617378,2.595221,2.554720,2.486232,...,2.288572,2.222660,2.092506,2.092506,1.758326,,1.473068,1.442969,1.708094,2.150317
2023-07-18 01:00:00,2.616325,2.613712,2.609567,2.601732,2.601732,2.590388,2.592201,2.569077,2.523785,2.449682,...,2.268365,2.201735,2.080913,2.080913,1.756021,,1.465225,1.431695,1.686991,2.119074
2023-07-18 02:00:00,2.580246,2.571202,2.561779,2.551995,2.551995,2.537822,2.528987,2.501410,2.457763,2.391636,...,2.237016,2.164135,2.036739,2.036739,1.721806,,1.425509,1.388045,1.639783,2.079247
2023-07-18 03:00:00,2.477655,2.470436,2.459048,2.445237,2.445237,2.427436,2.411520,2.384743,2.345636,2.285725,...,2.202765,2.132472,2.006058,2.006058,1.702696,,1.344742,1.308498,1.520229,1.942551
2023-07-18 04:00:00,2.321154,2.317076,2.309932,2.299633,2.299633,2.285333,2.266659,2.241788,2.205777,2.152813,...,2.126645,2.058323,1.942776,1.942776,1.662092,,1.256250,1.218585,1.382363,1.768140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-20 19:00:00,1.918520,1.919748,1.919763,1.917300,1.917300,1.911677,1.908753,1.896107,1.877315,1.854043,...,1.757398,1.658532,1.502440,1.502440,1.230544,,0.304474,0.692873,0.629571,0.647461
2023-07-20 20:00:00,1.872662,1.873768,1.872915,1.870895,1.870895,1.866686,1.861001,1.850972,1.833950,1.812432,...,1.734664,1.639024,1.486042,1.486042,1.216470,,0.294698,0.692953,0.630648,0.645879
2023-07-20 21:00:00,1.828584,1.828551,1.828424,1.827640,1.827640,1.824532,1.819689,1.811327,1.796044,1.776539,...,1.705053,1.611660,1.462903,1.462903,1.197318,,0.287054,0.687053,0.629081,0.647035
2023-07-20 22:00:00,1.773743,1.772628,1.770367,1.767591,1.767591,1.765360,1.763649,1.759379,1.749053,1.734400,...,1.646075,1.556006,1.413744,1.413744,1.163056,,0.281915,0.679897,0.631122,0.658638


In [None]:
VTM10 = file_.get_variables('VTM10')
VTM10

Unnamed: 0,"(67.82002401780537, 11.596249921698863)","(67.79833048824537, 11.634100831654624)","(67.77662914111656, 11.671875950675812)","(67.75492000819499, 11.70957547343371)","(67.73320312115125, 11.747199594083042)","(67.71147851155068, 11.784748506262789)","(67.68974621085398, 11.822222403096958)","(67.66800625041746, 11.859621477195402)","(67.64625866149346, 11.896945920654622)","(67.62450347523078, 11.934195925058571)",...,"(67.94444461900373, 12.615520222746914)","(67.92249100323113, 12.652556500557955)","(67.9005300490677, 12.689517331015502)","(67.87856178737093, 12.72640291376011)","(67.85658624889335, 12.76321344786976)","(67.83460346428299, 12.79994913186097)","(67.81261346408365, 12.836610163689926)","(67.79061627873547, 12.873196740753581)","(67.76861193857516, 12.909709059890805)","(67.74660047383658, 12.946147317383483)"
2023-07-18 00:00:00,6.905824,6.913879,6.913346,6.917279,6.917279,6.921254,6.951661,6.952192,6.945463,6.923929,...,6.639269,6.613203,6.577817,6.577817,6.495595,,5.686031,5.615003,6.016979,6.529229
2023-07-18 01:00:00,6.990593,6.996905,7.001951,7.004814,7.004814,7.002323,7.030142,7.024226,7.023811,7.016047,...,6.754084,6.732090,6.680487,6.680487,6.598019,,5.768083,5.662688,6.039974,6.549832
2023-07-18 02:00:00,7.021675,7.032774,7.040187,7.038907,7.038907,7.037067,7.062613,7.068048,7.073586,7.065328,...,6.842959,6.831786,6.799483,6.799483,6.716395,,5.852685,5.716874,6.051912,6.540352
2023-07-18 03:00:00,6.972935,6.983125,6.999479,7.015279,7.015279,7.030815,7.049826,7.062039,7.071271,7.070439,...,6.927399,6.910674,6.888073,6.888073,6.799206,,6.023452,5.823945,6.126927,6.611423
2023-07-18 04:00:00,6.939055,6.945759,6.957383,6.971176,6.971176,6.985401,7.000437,7.020175,7.038277,7.043854,...,7.024140,7.012159,6.983110,6.983110,6.886019,,6.160702,5.871530,6.140556,6.648013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-20 19:00:00,5.911673,5.915331,5.914666,5.910637,5.910637,5.904454,5.906828,5.893962,5.876389,5.856171,...,5.725721,5.667870,5.576345,5.576345,5.457417,,5.667077,4.720312,4.450857,4.504470
2023-07-20 20:00:00,5.844525,5.846999,5.846968,5.842049,5.842049,5.834516,5.837545,5.824404,5.808540,5.789388,...,5.687479,5.628290,5.538143,5.538143,5.426962,,5.709198,4.693627,4.429438,4.472822
2023-07-20 21:00:00,5.800584,5.804669,5.803648,5.797329,5.797329,5.789660,5.790232,5.777354,5.761841,5.742310,...,5.666687,5.608418,5.516812,5.516812,5.412615,,5.723362,4.678860,4.402325,4.416543
2023-07-20 22:00:00,5.778098,5.781297,5.781219,5.778125,5.778125,5.769115,5.764488,5.747372,5.727373,5.704070,...,5.661788,5.604997,5.514461,5.514461,5.411837,,5.696846,4.651954,4.344083,4.300489


In [11]:
# VHM0.to_pickle("VHM0.pkl")
# VTM10.to_pickle("VTM10.pkl")