# NCEP

## Fill Missing

In [1]:
import os
import gc
import sys
import glob
import xarray as xr
import pandas as pd
import xesmf as xe
import numpy as np
sys.path.append("/home/brayan/mnsun/")
from utils import check_dir

In [2]:
def dates_download(init_, end_):
    
    """
    Esta funcion permite listar las fechas que seran utilizadas para la descarga.
    
    Input: 
            init_: fecha de inicio [string]
            end_ : fecha final [string]
    Output: 
            : fechas listadas [list]    
    """
    
    if init_ != end_:
        dates_ = pd.date_range(init_, end_, freq="D")
    else:
        dates_ = pd.date_range(init_, periods=1, freq="D")
        
    return [str(i)[:10] for i in dates_.values]   

In [3]:
def create_xarray(data_, dim1, dim2, dim3, dim1_name, dim2_name, dim3_name):
    """
    Se crea un xarray que tiene la siguiente estructura estandar:
    'time, latitud, longitud'
    
    Output: Informacion de data_Xarray en formato float_32 [Xarray] 
    """
    Array  =  xr.DataArray( np.float32(data_) ,
                            coords=[ dim1, dim2, dim3],
                            dims=[ dim1_name, dim2_name, dim3_name])     
    return Array

In [4]:
INPUT_DIR        = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/raw_data/NCEP/"
OUTPUT_DIR       = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/"
date_update      = "2000-06-01"

In [5]:
dates_for_update = dates_download(date_update.split("-")[0]+"-01-01",date_update )   #pd.date_range( date_update.split("-")[0]+"-01-01", date_update, freq="D")  #.shift(-lag, freq="D").values[0])[:10]

In [6]:
file_ncep = []
for date_ in dates_for_update:
    file_ncep += glob.glob(INPUT_DIR+f"TAU/"+date_update.split("-")[0]+"/"+"".join(date_.split("-"))+".nc")
files_exists = [asc_[-11:-7] +"-"+ asc_[-7:-5] +"-"+ asc_[-5:-3] for asc_ in file_ncep]

In [7]:
files_to_update   = list(set(dates_for_update) - set(files_exists))

In [8]:
if file_ncep[-1][-11:] !=  "".join(dates_for_update[-1].split("-")) + ".nc":
    print("Nada para actualizar")
    os.environ["pass"] = "false"
else:
    print("pass")
    os.environ["pass"] = "true"
    DS_NCEP    = xr.open_mfdataset(file_ncep,  concat_dim = 'time', parallel=True)
    dates_fill = pd.date_range(date_update.split("-")[0]+"-01-01", date_update, freq="D")
    TAUX_MISS  = create_xarray(DS_NCEP.taux.values, DS_NCEP.time, DS_NCEP.lat, DS_NCEP.lon, "time", "lat", "lon")
    TAUX_FILL  = TAUX_MISS.interp(time=dates_fill, method="linear").interpolate_na(dim="time")
    ds_MOM     = xr.Dataset(
                            {"lon": np.arange( 90, 299, 1), "lat":np.arange( -30, 30.5, 0.5)}
                            )        
    regridder  = xe.Regridder( TAUX_FILL, ds_MOM, "bilinear")
    regridder.clean_weight_file()
    TAUX_FILL_MOM    = regridder(TAUX_FILL)
    TAUX_FILL_32_MOM = create_xarray(TAUX_FILL_MOM.values, TAUX_FILL_MOM.time, np.float32(ds_MOM.lat), np.float32(ds_MOM.lon), "time", "lat", "lon")    

    TAUY_MISS  = create_xarray(DS_NCEP.tauy.values, DS_NCEP.time, DS_NCEP.lat, DS_NCEP.lon, "time", "lat", "lon")
    TAUY_FILL  = TAUY_MISS.interp(time=dates_fill, method="linear").interpolate_na(dim="time")  
    TAUY_FILL_MOM    = regridder(TAUY_FILL)
    TAUY_FILL_32_MOM = create_xarray(TAUY_FILL_MOM.values, TAUY_FILL_MOM.time, np.float32(ds_MOM.lat), np.float32(ds_MOM.lon), "time", "lat", "lon")        
    DATASET          = xr.Dataset({"taux": TAUX_FILL_32_MOM, "tauy": TAUY_FILL_32_MOM})
    check_dir(OUTPUT_DIR+ "TAU/"+str(date_update)[:4]+"/")
    
    for t, date_2 in enumerate(DATASET.time.values):
        date_compact = "".join(str(date_2)[:10].split("-"))
        DATASET.isel(time=t).to_netcdf(OUTPUT_DIR + "TAU/" + date_compact[:4] + "/" + date_compact + ".nc")    
    
    

pass


will change. To retain the existing behavior, pass
combine='nested'. To use future default behavior, pass
combine='by_coords'. See
http://xarray.pydata.org/en/stable/combining.html#combining-multi

  import sys
To get equivalent behaviour from now on please use the new
`combine_nested` function instead (or the `combine='nested'` option to
`open_mfdataset`).The datasets supplied do not have global dimension coordinates. In
future, to continue concatenating without supplying dimension
coordinates, please use the new `combine_nested` function (or the
`combine='nested'` option to open_mfdataset.
  from_openmfds=True,


Create weight file: bilinear_34x113_121x209.nc
Remove file bilinear_34x113_121x209.nc




### Climatologia y Anomalias

In [2]:
def dates_download(init_, end_):
    
    """
    Esta funcion permite listar las fechas que seran utilizadas para la descarga.
    
    Input: 
            init_: fecha de inicio [string]
            end_ : fecha final [string]
    Output: 
            : fechas listadas [list]    
    """
    
    if init_ != end_:
        dates_ = pd.date_range(init_, end_, freq="D")
    else:
        dates_ = pd.date_range(init_, periods=1, freq="D")
        
    return [str(i)[:10] for i in dates_.values]  

In [3]:
import os
import gc
import sys
import glob
import xarray as xr
import pandas as pd
import xesmf as xe
import numpy as np
sys.path.append("/home/brayan/mnsun/")
from utils import check_dir
import datetime 

In [4]:
def create_xarray(data_, dim1, dim2, dim3, dim1_name, dim2_name, dim3_name):
    """
    Se crea un xarray que tiene la siguiente estructura estandar:
    'time, latitud, longitud'
    
    Output: Informacion de data_Xarray en formato float_32 [Xarray] 
    """
    Array  =  xr.DataArray( np.float32(data_) ,
                            coords=[ dim1, dim2, dim3],
                            dims=[ dim1_name, dim2_name, dim3_name])     
    return Array

In [5]:
DIR_             = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/" 

In [8]:
file_dates_t = dates_download("2022-01-01", "2022-07-31")
file_dates_t.sort()
# date_update      = "2022-01-01"

In [10]:
file_dates_t[-5:]

['2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30', '2022-07-31']

In [11]:
for date_update in file_dates_t:
    print(date_update)
    file_tau         = glob.glob(DIR_+f"TAU/{date_update[:4]}/"+"".join(date_update.split("-"))+".nc")[0]
    DS_ASCAT         = xr.open_dataset(file_tau).interpolate_na(dim="lat", method="nearest", fill_value="extrapolate")
    DS_CLIM          = xr.open_dataset(DIR_+f"CLIMATOLOGY/TAU_CLIM.nc")
    if date_update[5:] == "02-29":
        number_day_of_year = 58
    else:
        number_day_of_year  = datetime.datetime.strptime(date_update, "%Y-%m-%d").date().timetuple().tm_yday-1
    TAUX_ANOM        = DS_ASCAT.taux-DS_CLIM.clim_taux.isel(dayofyear=number_day_of_year)
    TAUY_ANOM        = DS_ASCAT.tauy-DS_CLIM.clim_tauy.isel(dayofyear=number_day_of_year)
    ATAUX_CLEAN      = create_xarray(TAUX_ANOM.values.reshape(1, TAUX_ANOM.lat.shape[0], TAUX_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUX_ANOM.lat, TAUX_ANOM.lon, "time", "lat", "lon")
    ATAUY_CLEAN      = create_xarray(TAUY_ANOM.values.reshape(1, TAUY_ANOM.lat.shape[0], TAUY_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUY_ANOM.lat, TAUY_ANOM.lon, "time", "lat", "lon")
    DATASET_ANOM     = xr.Dataset({"ataux": ATAUX_CLEAN, "tauy": ATAUY_CLEAN})
    
    check_dir(DIR_+"ANOMALY/" +str(date_update)[:4]+"/")
    DATASET_ANOM.to_netcdf(DIR_+"ANOMALY/"+str(date_update)[:4]+"/"+"".join(str(date_update)[:10].split("-"))+".nc")    

2022-01-01
2022-01-02
2022-01-03
2022-01-04
2022-01-05
2022-01-06
2022-01-07
2022-01-08
2022-01-09
2022-01-10
2022-01-11
2022-01-12
2022-01-13
2022-01-14
2022-01-15
2022-01-16
2022-01-17
2022-01-18
2022-01-19
2022-01-20
2022-01-21
2022-01-22
2022-01-23
2022-01-24
2022-01-25
2022-01-26
2022-01-27
2022-01-28
2022-01-29
2022-01-30
2022-01-31
2022-02-01
2022-02-02
2022-02-03
2022-02-04
2022-02-05
2022-02-06
2022-02-07
2022-02-08
2022-02-09
2022-02-10
2022-02-11
2022-02-12
2022-02-13
2022-02-14
2022-02-15
2022-02-16
2022-02-17
2022-02-18
2022-02-19
2022-02-20
2022-02-21
2022-02-22
2022-02-23
2022-02-24
2022-02-25
2022-02-26
2022-02-27
2022-02-28
2022-03-01
2022-03-02
2022-03-03
2022-03-04
2022-03-05
2022-03-06
2022-03-07
2022-03-08
2022-03-09
2022-03-10
2022-03-11
2022-03-12
2022-03-13
2022-03-14
2022-03-15
2022-03-16
2022-03-17
2022-03-18
2022-03-19
2022-03-20
2022-03-21
2022-03-22
2022-03-23
2022-03-24
2022-03-25
2022-03-26
2022-03-27
2022-03-28
2022-03-29
2022-03-30
2022-03-31
2022-04-01

In [None]:
file_tau         = glob.glob(DIR_+f"TAU/{date_update[:4]}/"+"".join(date_update.split("-"))+".nc")[0]
DS_NCEP          = xr.open_dataset(file_tau).interpolate_na(dim="lat", method="nearest", fill_value="extrapolate")
DS_CLIM          = xr.open_dataset(DIR_+f"CLIMATOLOGY/TAU_CLIM.nc")
if date_update[5:] == "02-29":
    number_day_of_year = 58
else:
    number_day_of_year  = datetime.datetime.strptime(date_update, "%Y-%m-%d").date().timetuple().tm_yday-1
TAUX_ANOM        = DS_NCEP.taux-DS_CLIM.clim_taux.isel(dayofyear=number_day_of_year)
TAUY_ANOM        = DS_NCEP.tauy-DS_CLIM.clim_tauy.isel(dayofyear=number_day_of_year)

ATAUX_CLEAN      = create_xarray(TAUX_ANOM.values.reshape(1, TAUX_ANOM.lat.shape[0], TAUX_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUX_ANOM.lat, TAUX_ANOM.lon, "time", "lat", "lon")
ATAUY_CLEAN      = create_xarray(TAUY_ANOM.values.reshape(1, TAUY_ANOM.lat.shape[0], TAUY_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUY_ANOM.lat, TAUY_ANOM.lon, "time", "lat", "lon")
check_dir(DIR_+"ANOMALY/" +str(date_update)[:4]+"/")
DATASET_ANOM.to_netcdf(DIR_+"ANOMALY/"+str(date_update)[:4]+"/"+"".join(str(date_update)[:10].split("-"))+".nc")    

### Climatologia y Anomalias

In [1]:
import os
import gc
import sys
import glob
import xarray as xr
import pandas as pd
import xesmf as xe
import numpy as np
sys.path.append("/home/brayan/mnsun/")
from utils import check_dir

In [2]:
INPUT_DIR        = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/"
init_date_update = "2000-01-01"
end_date_update  = "2021-12-31"   

In [3]:
file_ncep = []
for year in range(int(init_date_update.split("-")[0]), int(end_date_update.split("-")[0])+1):
    file_ncep += glob.glob(INPUT_DIR+f"TAU/{str(year)}/*.nc")

In [4]:
file_ncep.sort()

In [5]:
file_ncep[-5:]

['/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/TAU/2021/20211227.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/TAU/2021/20211228.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/TAU/2021/20211229.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/TAU/2021/20211230.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/NCEP/TAU/2021/20211231.nc']

In [6]:
DS_NCEP = xr.open_mfdataset(file_ncep, concat_dim="time", parallel=True)

will change. To retain the existing behavior, pass
combine='nested'. To use future default behavior, pass
combine='by_coords'. See
http://xarray.pydata.org/en/stable/combining.html#combining-multi

  """Entry point for launching an IPython kernel.
To get equivalent behaviour from now on please use the new
`combine_nested` function instead (or the `combine='nested'` option to
`open_mfdataset`).The datasets supplied do not have global dimension coordinates. In
future, to continue concatenating without supplying dimension
coordinates, please use the new `combine_nested` function (or the
`combine='nested'` option to open_mfdataset.
  from_openmfds=True,


In [7]:
def Climatology_ANnomaly(data_, init_, end_):
    interv_date     = f"{init_}-01-01",f"{end_}-12-31"
    DATA_0          = data_.sel(time=slice(interv_date[0], interv_date[1])).resample(time="1MS").mean(dim="time")
    DATA_0["time"]  = pd.date_range( interv_date[0], interv_date[1], freq="MS").shift(14, freq="D")
    DATA_1          = DATA_0-DATA_0.mean(dim=("time"))
    DATA_2          = DATA_1.groupby("time.month").sum(dim="time")/(DATA_0.time.shape[0]/12)
    DATA_2_forward  = DATA_2.copy()
    DATA_2_back     = DATA_2.copy()    
    DATA_2_back["month"]    = pd.date_range("1903-01-01", "1903-12-31", freq="MS").shift(14, freq="D")
    DATA_2["month"]         = pd.date_range("1904-01-01", "1904-12-31", freq="MS").shift(14, freq="D")
    DATA_2_forward["month"] = pd.date_range("1905-01-01", "1905-12-31", freq="MS").shift(14, freq="D")
    DATA_3          = xr.concat([DATA_2_back, DATA_2, DATA_2_forward], dim="month").chunk(chunks={"month":36})
    DATA_4          = DATA_3.interp( month=pd.date_range("1903-01-01", "1905-12-31", freq="D"), method="cubic").sel(month=slice("1904-01-01", "1904-12-31"))
    DATA_4          = DATA_4.rename({"month":"dayofyear"})
    DATA_4["dayofyear"]     = np.arange(1, 366+1, 1, dtype=int)
    DATA_5          = DATA_4.chunk(chunks={"dayofyear":1}) + DATA_0.mean(dim=("time"))
    ANOMALIAS       = data_.groupby("time.dayofyear") - DATA_5
    return ANOMALIAS, DATA_5

In [8]:
TAUX_ANOM, TAUX_CLIM        = Climatology_ANnomaly( DS_NCEP.taux, "2000", "2013")

In [9]:
TAUY_ANOM, TAUY_CLIM        = Climatology_ANnomaly( DS_NCEP.tauy, "2000", "2013")

In [10]:
def create_xarray(data_, dim1, dim2, dim3, dim1_name, dim2_name, dim3_name):
    """
    Se crea un xarray que tiene la siguiente estructura estandar:
    'time, latitud, longitud'
    
    Output: Informacion de data_Xarray en formato float_32 [Xarray] 
    """
    Array  =  xr.DataArray( np.float32(data_) ,
                            coords=[ dim1, dim2, dim3],
                            dims=[ dim1_name, dim2_name, dim3_name])     
    return Array

In [11]:
TAUX_ANOM_32 = create_xarray( TAUX_ANOM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUX_ANOM.time, np.float32(TAUX_ANOM.lat), np.float32(TAUX_ANOM.lon), "time", "lat", "lon")
print("pass")
TAUY_ANOM_32 = create_xarray( TAUY_ANOM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUY_ANOM.time, np.float32(TAUY_ANOM.lat), np.float32(TAUY_ANOM.lon), "time", "lat", "lon")

pass


In [23]:
TAUX_CLIM_32 = create_xarray( TAUX_CLIM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUX_CLIM.dayofyear, np.float32(TAUX_ANOM.lat), np.float32(TAUX_ANOM.lon), "dayofyear", "lat", "lon")
print("pass")
TAUY_CLIM_32 = create_xarray( TAUY_CLIM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUY_CLIM.dayofyear, np.float32(TAUY_ANOM.lat), np.float32(TAUY_ANOM.lon), "dayofyear", "lat", "lon")

pass


In [24]:
DATASET_ANOM = xr.Dataset({"ataux": TAUX_ANOM_32, "tauy": TAUY_ANOM_32})
DATASET_CLIM = xr.Dataset({"clim_taux": TAUX_CLIM_32, "clim_tauy": TAUY_CLIM_32})

In [26]:
for enum, date_val in enumerate(DATASET_ANOM.time.values):
    check_dir(INPUT_DIR+"ANOMALY/" +str(date_val)[:4]+"/")
    DATASET_ANOM.isel(time=enum).to_netcdf(INPUT_DIR+"ANOMALY/" +str(date_val)[:4]+"/"+"".join(str(date_val)[:10].split("-"))+".nc")

In [25]:
check_dir(INPUT_DIR+"CLIMATOLOGY/")
DATASET_CLIM.to_netcdf(INPUT_DIR+"CLIMATOLOGY/" +"TAU_CLIM.nc")

# ASCAT

## Fill Missing

In [1]:
import os
import gc
import sys
import glob
import xarray as xr
import pandas as pd
import xesmf as xe
import numpy as np
sys.path.append("/home/brayan/mnsun/")
from utils import check_dir

In [2]:
def create_xarray(data_, dim1, dim2, dim3, dim1_name, dim2_name, dim3_name):
    """
    Se crea un xarray que tiene la siguiente estructura estandar:
    'time, latitud, longitud'
    
    Output: Informacion de data_Xarray en formato float_32 [Xarray] 
    """
    Array  =  xr.DataArray( np.float32(data_) ,
                            coords=[ dim1, dim2, dim3],
                            dims=[ dim1_name, dim2_name, dim3_name])     
    return Array

In [3]:
def dates_download(init_, end_):
    
    """
    Esta funcion permite listar las fechas que seran utilizadas para la descarga.
    
    Input: 
            init_: fecha de inicio [string]
            end_ : fecha final [string]
    Output: 
            : fechas listadas [list]    
    """
    
    if init_ != end_:
        dates_ = pd.date_range(init_, end_, freq="D")
    else:
        dates_ = pd.date_range(init_, periods=1, freq="D")
        
    return [str(i)[:10] for i in dates_.values]   

In [8]:
INPUT_DIR        = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/raw_data/"
OUTPUT_DIR       = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/"
date_update      = "2022-08-01"
lag              = 30

In [9]:
date_update_back = str(pd.date_range( date_update, periods=1, freq="D").shift(-14, freq="D").values[0])[:10]

In [10]:
dates_update     = dates_download( date_update_back, date_update)

In [11]:
file_ascat = []
for date_ in dates_update:
    file_ascat += glob.glob(INPUT_DIR+f"TAU/"+date_.split("-")[0]+"/"+"".join(date_.split("-"))+".nc")

In [12]:
files_exists = [asc_[-11:-7] +"-"+ asc_[-7:-5] +"-"+ asc_[-5:-3] for asc_ in file_ascat]

In [13]:
files_fill   = list(set(dates_update) - set(files_exists))

In [13]:
if file_ascat[-1][-11:] !=  "".join(dates_update[-1].split("-")) + ".nc":
    print("Aqui se acaba el proceso de actualizacion")
    os.environ["pass"] = "false"
else:
    print("pass")
    DS_ASCAT   = xr.open_mfdataset(file_ascat, parallel=True)
    dates_fill = pd.date_range(date_update_back, date_update, freq="D")
    TAUX_MISS  = create_xarray(DS_ASCAT.taux.values, DS_ASCAT.time, DS_ASCAT.lat, DS_ASCAT.lon, "time", "lat", "lon")
    TAUX_FILL  = TAUX_MISS.interp(time=dates_fill, method="linear").interpolate_na(dim="time")
    ds_MOM     = xr.Dataset(
                            {"lon": np.arange( 90, 299, 1), "lat":np.arange( -30, 30.5, 0.5)}
                           )    
    regridder  = xe.Regridder( TAUX_FILL, ds_MOM, "bilinear")
    regridder.clean_weight_file()
    TAUX_FILL_MOM    = regridder(TAUX_FILL)
    TAUX_FILL_32_MOM = create_xarray(TAUX_FILL_MOM.values, TAUX_FILL_MOM.time, np.float32(ds_MOM.lat), np.float32(ds_MOM.lon), "time", "lat", "lon")
    
    TAUY_MISS        = create_xarray(DS_ASCAT.tauy.values, DS_ASCAT.time, DS_ASCAT.lat, DS_ASCAT.lon, "time", "lat", "lon")
    TAUY_FILL        = TAUY_MISS.interp(time=dates_fill, method="linear").interpolate_na(dim="time")
    TAUY_FILL_MOM    = regridder(TAUY_FILL)
    TAUY_FILL_32_MOM = create_xarray(TAUY_FILL_MOM.values, TAUY_FILL_MOM.time, np.float32(ds_MOM.lat), np.float32(ds_MOM.lon), "time", "lat", "lon")
    
    DATASET = xr.Dataset({"taux": TAUX_FILL_32_MOM, "tauy": TAUY_FILL_32_MOM})
    check_dir(OUTPUT_DIR+ "TAU/"+str(date_)[:4]+"/")
    DATASET.isel(time=-1).to_netcdf(OUTPUT_DIR+"TAU/"+str(date_)[:4]+"/"+"".join(str(date_)[:10].split("-"))+".nc")
    
    for date_fill in files_fill:
        DATASET.sel(time=date_fill).to_netcdf(OUTPUT_DIR+"TAU/"+str(date_fill)[:4]+"/"+"".join(str(date_fill)[:10].split("-"))+".nc")    
    

pass


will change. To retain the existing behavior, pass
combine='nested'. To use future default behavior, pass
combine='by_coords'. See
http://xarray.pydata.org/en/stable/combining.html#combining-multi

  
to use the new `combine_by_coords` function (or the
`combine='by_coords'` option to `open_mfdataset`) to order the datasets
before concatenation. Alternatively, to continue concatenating based
on the order the datasets are supplied in future, please use the new
`combine_nested` function (or the `combine='nested'` option to
open_mfdataset).
  from_openmfds=True,


Create weight file: bilinear_244x844_121x209.nc
Remove file bilinear_244x844_121x209.nc




### Climatologia y Anomalias

In [1]:
def dates_download(init_, end_):
    
    """
    Esta funcion permite listar las fechas que seran utilizadas para la descarga.
    
    Input: 
            init_: fecha de inicio [string]
            end_ : fecha final [string]
    Output: 
            : fechas listadas [list]    
    """
    
    if init_ != end_:
        dates_ = pd.date_range(init_, end_, freq="D")
    else:
        dates_ = pd.date_range(init_, periods=1, freq="D")
        
    return [str(i)[:10] for i in dates_.values]  

In [2]:
import os
import gc
import sys
import glob
import xarray as xr
import pandas as pd
import xesmf as xe
import numpy as np
sys.path.append("/home/brayan/mnsun/")
from utils import check_dir
import datetime 

In [3]:
def create_xarray(data_, dim1, dim2, dim3, dim1_name, dim2_name, dim3_name):
    """
    Se crea un xarray que tiene la siguiente estructura estandar:
    'time, latitud, longitud'
    
    Output: Informacion de data_Xarray en formato float_32 [Xarray] 
    """
    Array  =  xr.DataArray( np.float32(data_) ,
                            coords=[ dim1, dim2, dim3],
                            dims=[ dim1_name, dim2_name, dim3_name])     
    return Array

In [4]:
DIR_             = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/" 

In [5]:
file_dates_t = dates_download("2018-01-01", "2022-07-31")
file_dates_t.sort()

In [7]:
for date_update in file_dates_t:
    print(date_update)
    file_tau         = glob.glob(DIR_+f"TAU/{date_update[:4]}/"+"".join(date_update.split("-"))+".nc")[0]
    DS_ASCAT         = xr.open_dataset(file_tau).interpolate_na(dim="lat", method="nearest", fill_value="extrapolate")
    DS_CLIM          = xr.open_dataset(DIR_+f"CLIMATOLOGY/TAU_CLIM.nc")
    if date_update[5:] == "02-29":
        number_day_of_year = 58
    else:
        number_day_of_year  = datetime.datetime.strptime(date_update, "%Y-%m-%d").date().timetuple().tm_yday-1
    TAUX_ANOM        = DS_ASCAT.taux-DS_CLIM.clim_taux.isel(dayofyear=number_day_of_year)
    TAUY_ANOM        = DS_ASCAT.tauy-DS_CLIM.clim_tauy.isel(dayofyear=number_day_of_year)
    ATAUX_CLEAN      = create_xarray(TAUX_ANOM.values.reshape(1, TAUX_ANOM.lat.shape[0], TAUX_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUX_ANOM.lat, TAUX_ANOM.lon, "time", "lat", "lon")
    ATAUY_CLEAN      = create_xarray(TAUY_ANOM.values.reshape(1, TAUY_ANOM.lat.shape[0], TAUY_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUY_ANOM.lat, TAUY_ANOM.lon, "time", "lat", "lon")
    DATASET_ANOM     = xr.Dataset({"ataux": ATAUX_CLEAN, "tauy": ATAUY_CLEAN})
    
    check_dir(DIR_+"ANOMALY/" +str(date_update)[:4]+"/")
    DATASET_ANOM.to_netcdf(DIR_+"ANOMALY/"+str(date_update)[:4]+"/"+"".join(str(date_update)[:10].split("-"))+".nc")    

2018-01-01
2018-01-02
2018-01-03
2018-01-04
2018-01-05
2018-01-06
2018-01-07
2018-01-08
2018-01-09
2018-01-10
2018-01-11
2018-01-12
2018-01-13
2018-01-14
2018-01-15
2018-01-16
2018-01-17
2018-01-18
2018-01-19
2018-01-20
2018-01-21
2018-01-22
2018-01-23
2018-01-24
2018-01-25
2018-01-26
2018-01-27
2018-01-28
2018-01-29
2018-01-30
2018-01-31
2018-02-01
2018-02-02
2018-02-03
2018-02-04
2018-02-05
2018-02-06
2018-02-07
2018-02-08
2018-02-09
2018-02-10
2018-02-11
2018-02-12
2018-02-13
2018-02-14
2018-02-15
2018-02-16
2018-02-17
2018-02-18
2018-02-19
2018-02-20
2018-02-21
2018-02-22
2018-02-23
2018-02-24
2018-02-25
2018-02-26
2018-02-27
2018-02-28
2018-03-01
2018-03-02
2018-03-03
2018-03-04
2018-03-05
2018-03-06
2018-03-07
2018-03-08
2018-03-09
2018-03-10
2018-03-11
2018-03-12
2018-03-13
2018-03-14
2018-03-15
2018-03-16
2018-03-17
2018-03-18
2018-03-19
2018-03-20
2018-03-21
2018-03-22
2018-03-23
2018-03-24
2018-03-25
2018-03-26
2018-03-27
2018-03-28
2018-03-29
2018-03-30
2018-03-31
2018-04-01

In [11]:
number_day_of_year

366

In [12]:
date_update

'2020-12-31'

In [15]:
datetime.datetime.strptime("2020-01-01", "%Y-%m-%d").date().timetuple().tm_yday

1

In [14]:
DS_CLIM.clim_taux

In [3]:
DIR_             = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/" #"/home/brayan/DATA/Modelo-Multimodal-de-Ondas/raw_data/"
date_update      = "2018-12-31"

In [4]:
file_tau         = glob.glob(DIR_+f"TAU/{date_update[:4]}/"+"".join(date_update.split("-"))+".nc")[0]

In [5]:
DS_ASCAT         = xr.open_dataset(file_tau).interpolate_na(dim="lat", method="nearest", fill_value="extrapolate")

In [6]:
DS_CLIM          = xr.open_dataset(DIR_+f"CLIMATOLOGY/TAU_CLIM.nc")

In [7]:
if date_update[5:] == "02-29":
    number_day_of_year = 59
else:
    number_day_of_year  = datetime.datetime.strptime(date_update, "%Y-%m-%d").date().timetuple().tm_yday

In [8]:
TAUX_ANOM        = DS_ASCAT.taux-DS_CLIM.clim_taux.isel(dayofyear=number_day_of_year)
TAUY_ANOM        = DS_ASCAT.tauy-DS_CLIM.clim_tauy.isel(dayofyear=number_day_of_year)

In [9]:
ATAUX_CLEAN      = create_xarray(TAUX_ANOM.values.reshape(1, TAUX_ANOM.lat.shape[0], TAUX_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUX_ANOM.lat, TAUX_ANOM.lon, "time", "lat", "lon")
ATAUY_CLEAN      = create_xarray(TAUY_ANOM.values.reshape(1, TAUY_ANOM.lat.shape[0], TAUY_ANOM.lon.shape[0]), pd.date_range(date_update, periods=1, freq="D"), TAUY_ANOM.lat, TAUY_ANOM.lon, "time", "lat", "lon")

In [10]:
DATASET_ANOM = xr.Dataset({"ataux": ATAUX_CLEAN, "tauy": ATAUY_CLEAN})

In [11]:
check_dir(DIR_+"ANOMALY/" +str(date_update)[:4]+"/")
DATASET_ANOM.to_netcdf(DIR_+"ANOMALY/"+str(date_update)[:4]+"/"+"".join(str(date_update)[:10].split("-"))+".nc")    

### Climatologia y Anomalias

In [1]:
import os
import gc
import sys
import glob
import xarray as xr
import pandas as pd
import xesmf as xe
import numpy as np
sys.path.append("/home/brayan/mnsun/")
from utils import check_dir

In [2]:
INPUT_DIR        = "/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/"
init_date_update = "2007-03-21"
end_date_update  = "2018-12-31"   

In [3]:
file_ascat = []
for year in range(int(init_date_update.split("-")[0]), int(end_date_update.split("-")[0])+1):
    file_ascat += glob.glob(INPUT_DIR+f"TAU/{str(year)}/*.nc")

In [4]:
file_ascat.sort()

In [5]:
file_ascat[-5:]

['/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/TAU/2018/20181227.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/TAU/2018/20181228.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/TAU/2018/20181229.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/TAU/2018/20181230.nc',
 '/home/brayan/DATA/Modelo-Multimodal-de-Ondas/process/ASCAT/TAU/2018/20181231.nc']

In [6]:
DS_ASCAT = xr.open_mfdataset(file_ascat, concat_dim="time", parallel=True)

will change. To retain the existing behavior, pass
combine='nested'. To use future default behavior, pass
combine='by_coords'. See
http://xarray.pydata.org/en/stable/combining.html#combining-multi

  """Entry point for launching an IPython kernel.
To get equivalent behaviour from now on please use the new
`combine_nested` function instead (or the `combine='nested'` option to
`open_mfdataset`).The datasets supplied do not have global dimension coordinates. In
future, to continue concatenating without supplying dimension
coordinates, please use the new `combine_nested` function (or the
`combine='nested'` option to open_mfdataset.
  from_openmfds=True,


In [7]:
def Climatology_ANnomaly(data_, init_, end_):
    interv_date     = f"{init_}-01-01",f"{end_}-12-31"
    DATA_0          = data_.sel(time=slice(interv_date[0], interv_date[1])).resample(time="1MS").mean(dim="time")
    DATA_0["time"]  = pd.date_range( interv_date[0], interv_date[1], freq="MS").shift(14, freq="D")
    DATA_1          = DATA_0-DATA_0.mean(dim=("time"))
    DATA_2          = DATA_1.groupby("time.month").sum(dim="time")/(DATA_0.time.shape[0]/12)
    DATA_2_forward  = DATA_2.copy()
    DATA_2_back     = DATA_2.copy()    
    DATA_2_back["month"]    = pd.date_range("1903-01-01", "1903-12-31", freq="MS").shift(14, freq="D")
    DATA_2["month"]         = pd.date_range("1904-01-01", "1904-12-31", freq="MS").shift(14, freq="D")
    DATA_2_forward["month"] = pd.date_range("1905-01-01", "1905-12-31", freq="MS").shift(14, freq="D")
    DATA_3          = xr.concat([DATA_2_back, DATA_2, DATA_2_forward], dim="month").chunk(chunks={"month":36})
    DATA_4          = DATA_3.interp( month=pd.date_range("1903-01-01", "1905-12-31", freq="D"), method="cubic").sel(month=slice("1904-01-01", "1904-12-31"))
    DATA_4          = DATA_4.rename({"month":"dayofyear"})
    DATA_4["dayofyear"]     = np.arange(1, 366+1, 1, dtype=int)
    DATA_5          = DATA_4.chunk(chunks={"dayofyear":1}) + DATA_0.mean(dim=("time"))
    ANOMALIAS       = data_.groupby("time.dayofyear") - DATA_5
    return ANOMALIAS, DATA_5

In [8]:
TAUX_ANOM, TAUX_CLIM        = Climatology_ANnomaly( DS_ASCAT.taux, "2008", "2014")

  ret = umr_sum(arr, axis, dtype, out, keepdims)
  ret = umr_sum(arr, axis, dtype, out, keepdims)
  ret = umr_sum(arr, axis, dtype, out, keepdims)
  ret = umr_sum(arr, axis, dtype, out, keepdims)
  ret = umr_sum(arr, axis, dtype, out, keepdims)


In [9]:
TAUY_ANOM, TAUY_CLIM        = Climatology_ANnomaly( DS_ASCAT.tauy, "2008", "2014")

In [14]:
def create_xarray(data_, dim1, dim2, dim3, dim1_name, dim2_name, dim3_name):
    """
    Se crea un xarray que tiene la siguiente estructura estandar:
    'time, latitud, longitud'
    
    Output: Informacion de data_Xarray en formato float_32 [Xarray] 
    """
    Array  =  xr.DataArray( np.float32(data_) ,
                            coords=[ dim1, dim2, dim3],
                            dims=[ dim1_name, dim2_name, dim3_name])     
    return Array

In [23]:
TAUX_ANOM_32 = create_xarray( TAUX_ANOM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUX_ANOM.time, np.float32(TAUX_ANOM.lat), np.float32(TAUX_ANOM.lon), "time", "lat", "lon")
print("pass")
TAUY_ANOM_32 = create_xarray( TAUY_ANOM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUY_ANOM.time, np.float32(TAUY_ANOM.lat), np.float32(TAUY_ANOM.lon), "time", "lat", "lon")

  x = np.divide(x1, x2, out)


pass


  x = np.divide(x1, x2, out)


In [25]:
TAUX_CLIM_32 = create_xarray( TAUX_CLIM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUX_CLIM.dayofyear, np.float32(TAUX_ANOM.lat), np.float32(TAUX_ANOM.lon), "dayofyear", "lat", "lon")
print("pass")
TAUY_CLIM_32 = create_xarray( TAUY_CLIM.interpolate_na(dim="lat", method="nearest", fill_value="extrapolate").values, TAUY_CLIM.dayofyear, np.float32(TAUY_ANOM.lat), np.float32(TAUY_ANOM.lon), "dayofyear", "lat", "lon")

  x = np.divide(x1, x2, out)


pass


  x = np.divide(x1, x2, out)


In [28]:
DATASET_ANOM = xr.Dataset({"ataux": TAUX_ANOM_32, "tauy": TAUY_ANOM_32})
DATASET_CLIM = xr.Dataset({"clim_taux": TAUX_CLIM_32, "clim_tauy": TAUY_CLIM_32})

In [30]:
for enum, date_val in enumerate(DATASET_ANOM.time.values):
    check_dir(INPUT_DIR+"ANOMALY/" +str(date_val)[:4]+"/")
    DATASET_ANOM.isel(time=enum).to_netcdf(INPUT_DIR+"ANOMALY/" +str(date_val)[:4]+"/"+"".join(str(date_val)[:10].split("-"))+".nc")

In [29]:
check_dir(INPUT_DIR+"CLIMATOLOGY/")
DATASET_CLIM.to_netcdf(INPUT_DIR+"CLIMATOLOGY/" +"TAU_CLIM.nc")