In [1]:
# NB Change saving names of dsi_forprediction_cmems depending on whether the all-sat or the
# two-sat solution is chosen

import pandas as pd
import xarray as xr
import glob
import os
import netCDF4
import scipy
from scipy import stats
import numpy as np
# import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import cartopy as cart
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import cartopy.crs as ccrs
from datetime import datetime, timedelta
import math

In [2]:
#Choose region #BALTIC or #NORTHSEA
# Choose time window
# Choose grid frequency

region = 'NORTHSEA'

start_time = '01/01/2004'

end_time = '12/31/2004'

if region in {'BALTIC'} :
    max_lat = 66.0
    min_lat = 53.0
    max_lon = 31.0
    min_lon = 9.0
    
    #choose how to save cmems assembled grid
    saving_cmems_grid='/DGFI8/H/work_marcello/machine_learning_altimetry/test_prediction_newpoints_surge_cmems_allsat.csv'

elif region in {'NORTHSEA'} :
    
    max_lat = 61.0
    min_lat = 50.0
    max_lon = 12.2
    min_lon = -4.0  
    
    saving_cmems_grid='/DGFI8/H/work_marcello/machine_learning_altimetry/test_prediction_newpoints_surge_cmems_NORTHSEA_allsat.csv'
    

In [3]:
#Choose Data Location

# FOR A USER MANUAL ON DUACS PRODUCTS: http://apdrc.soest.hawaii.edu/doc/CMEMS-SL-QUID-008-032-051.pdf,
# notice the difference between SLA filtered and unfiltered

location = 'work'

if location in {'work'} :
    main_path = r"/DGFI8/H/work_marcello/giussani_machinelearning_data/"
    #directory = main_path + 'DMI_HBM'
    directory = main_path + 'BALTICSEA_REANALYSIS_PHY_003_011'
    directory_cmems=main_path + 'SEALEVEL_GLO_PHY_L4_REP_OBSERVATIONS_008_047/2004' #(The all-sat solution containing GFO and TOPEX Interleaved)
    #directory_cmems=main_path + 'SEALEVEL_GLO_PHY_CLIMATE_L4_REP_OBSERVATIONS_008_057/2004' #The two-sat solution
    directory_dac=main_path + 'DAC'
    directory_grid = main_path + 'grid'

elif location in {'laptop'} :
    main_path = r"C:\Users\ne62rut\Documents\giussani_machinelearning_data"

In [4]:
# LOAD  DATA GLOBAL COPERNICUS
#glob.glob('./[0-9].*')
cur_dir = os.getcwd()
#print(cur_dir)
parent_dir = os.path.dirname(cur_dir)
#print(parent_dir)

file_list=[]

#for root, dirs, files in os.walk(path):
#    for name in files:



#for file in os.listdir(directory_cmems):
for root, dirs, files in os.walk(directory_cmems):  
    for file in files :
        if (file.endswith(".nc")) :
            file_list.append(os.path.join(root, file))

track_counter = 1

for z in file_list[0:np.size(file_list)]: #Try with 100 tracks
    
    if track_counter == 1 :
        ds_cmems = xr.open_dataset(z)
        ds_cmems = ds_cmems.where( ( (ds_cmems.longitude < max_lon) & (ds_cmems.longitude > min_lon) &   \
                                   (ds_cmems.latitude < max_lat) & (ds_cmems.latitude > min_lat) )  , drop=True) 
        ds_cmems=ds_cmems.drop({'crs','lon_bnds','lat_bnds','adt','err_sla','ugos','vgos','ugosa','vgosa','err_ugosa','err_vgosa','tpa_correction','flag_ice'})
        
        track_counter = track_counter +1
    else :
        
        temp = xr.open_dataset(z)
        temp = temp.where( ( (temp.longitude < max_lon) & (temp.longitude > min_lon) &   \
                                   (temp.latitude < max_lat) & (temp.latitude > min_lat) )  , drop=True)   
        temp=temp.drop({'crs','lon_bnds','lat_bnds','adt','err_sla','ugos','vgos','ugosa','vgosa','err_ugosa','err_vgosa','tpa_correction','flag_ice'})
        
        ds_cmems = xr.concat([ ds_cmems , temp ], dim='time')
        track_counter = track_counter +1
        
# ds_cmems = ds_cmems.where( ( (ds_cmems.longitude < 31.0) & (ds_cmems.longitude > 9.0) &   \
#                            (ds_cmems.latitude < 66.0) & (ds_cmems.latitude > 53.0) )  , drop=True)

# ds_cmems=ds_cmems.drop({'crs','lon_bnds','lat_bnds','adt','err','ugos','vgos','ugosa','vgosa'})

Use the following block to transform the CMEMS grid into an unstructured, downsampled version of it

In [5]:
unstruc_coord = np.stack(np.meshgrid(ds_cmems.longitude[::], ds_cmems.latitude[::]), -1).reshape(-1, 2)




time_surge_cmems = pd.date_range(start=start_time, end=end_time, freq='1D')

ds_forprediction_cmems = xr.Dataset(
    {
        "sla": (["times"], np.tile( np.ones(np.shape(time_surge_cmems)),np.size(unstruc_coord[:,0])) ),
        "lon": (["times"], np.repeat(unstruc_coord[:,0], np.size(time_surge_cmems) ) ),
        "lat": (["times"], np.repeat(unstruc_coord[:,1], np.size(time_surge_cmems) ) ),
        "time_model": (["times"], np.tile(time_surge_cmems,np.size(unstruc_coord[:,0])) )
    },
    coords={
        "longitude": (["times"],np.repeat(unstruc_coord[:,0], np.size(time_surge_cmems)  ) ),
        "latitude": (["times"], np.repeat(unstruc_coord[:,1], np.size(time_surge_cmems)  ) ),
        "time": (["times"],np.tile(time_surge_cmems,np.size(unstruc_coord[:,0])))
    },
)


In [6]:
dsi_forprediction_cmems = ds_cmems.interp(longitude=ds_forprediction_cmems.lon, latitude=ds_forprediction_cmems.lat, time = ds_forprediction_cmems.time_model)

In [7]:
# Turn into dataframe and drop NaN

dsi_forprediction_cmems = dsi_forprediction_cmems.to_dataframe()


dsi_forprediction_cmems = dsi_forprediction_cmems.dropna()

In [8]:
dsi_forprediction_cmems=dsi_forprediction_cmems.rename(columns={"sla": "sla_predicted", "longitude": "lon", "latitude": "lat"})

dsi_forprediction_cmems.to_csv(saving_cmems_grid)

In [9]:
ds_cmems