In [1]:
import xarray as xr
import pandas as pd
import os
import dask
import numpy as np
import cftime
from datetime import datetime
import matplotlib.pyplot as plt
import proplot as pplt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Caminho para a pasta contendo os arquivos .nc
pasta_dados_besm = 'TP2M_Eta-BESM_RCP4.5'

# Obter a lista de arquivos .nc na pasta
arquivos_besm_rcp45 = [os.path.join(pasta_dados_besm, f) for f in os.listdir(pasta_dados_besm) if f.endswith('.nc')]

# Ler e concatenar todos os arquivos com Dask
dados_besm_rcp45 = xr.open_mfdataset(arquivos_besm_rcp45, combine='by_coords', engine='netcdf4', chunks={'time': 100})
print(dados_besm_rcp45)

<xarray.Dataset> Size: 19GB
Dimensions:  (time: 34333, lat: 390, lon: 355)
Coordinates:
  * lon      (lon) float64 3kB -100.0 -99.8 -99.6 -99.4 ... -29.6 -29.4 -29.2
  * lat      (lat) float64 3kB -50.0 -49.8 -49.6 -49.4 ... 27.2 27.4 27.6 27.8
  * time     (time) datetime64[ns] 275kB 2006-01-01 2006-01-02 ... 2099-12-31
Data variables:
    tp2m     (time, lat, lon) float32 19GB dask.array<chunksize=(100, 390, 355), meta=np.ndarray>
Attributes: (12/15)
    history:        Created by Eta Model Group
    institute:      National Institute for Space Research
    institute_id:   INPE
    contact:        projeta@inpe.br
    rcm:            Regional Climate Model (RCM): Eta Model
    driving_model:  Brazilian Earth System Model (BESM)
    ...             ...
    south_degrees:  -50.0
    east_degrees:   -29.2
    west_degrees:   -100.0
    experiment:     RCP4.5
    ensemble:       r1i1p1
    frequency:      day


In [3]:
# Caminho para a pasta contendo os arquivos .nc
pasta_dados_CanESM2 = 'TP2M_Eta-CanESM2_RCP4.5'

# Obter a lista de arquivos .nc na pasta
arquivos_canesm2_rcp45 = [os.path.join(pasta_dados_CanESM2, f) for f in os.listdir(pasta_dados_CanESM2) if f.endswith('.nc')]

# Ler e concatenar todos os arquivos com Dask
dados_canesm2_rcp45 = xr.open_mfdataset(arquivos_canesm2_rcp45, combine='by_coords', engine='netcdf4', chunks={'time': 100})
print(dados_canesm2_rcp45)

<xarray.Dataset> Size: 16GB
Dimensions:  (time: 28854, lat: 390, lon: 355)
Coordinates:
  * lon      (lon) float64 3kB -100.0 -99.8 -99.6 -99.4 ... -29.6 -29.4 -29.2
  * lat      (lat) float64 3kB -50.0 -49.8 -49.6 -49.4 ... 27.2 27.4 27.6 27.8
  * time     (time) datetime64[ns] 231kB 2021-01-01 2021-01-02 ... 2099-12-31
Data variables:
    tp2m     (time, lat, lon) float32 16GB dask.array<chunksize=(100, 390, 355), meta=np.ndarray>
Attributes: (12/15)
    history:        Created by Eta Model Group
    institute:      National Institute for Space Research
    institute_id:   INPE
    contact:        projeta@inpe.br
    rcm:            Regional Climate Model (RCM): Eta Model
    driving_model:  Canadian Earth System Model, version 2 (CanESM2)
    ...             ...
    south_degrees:  -50.0
    east_degrees:   -29.2
    west_degrees:   -100.0
    experiment:     RCP4.5
    ensemble:       r1i1p1
    frequency:      day


In [4]:
# Caminho para a pasta contendo os arquivos .nc
pasta_dados_miroc5 = 'TP2M_Eta-MIROC_RCP4.5'

# Obter a lista de arquivos .nc na pasta
arquivos_miroc5_rcp45 = [os.path.join(pasta_dados_miroc5, f) for f in os.listdir(pasta_dados_miroc5) if f.endswith('.nc')]

# Ler e concatenar todos os arquivos com Dask
dados_miroc5_rcp45 = xr.open_mfdataset(arquivos_miroc5_rcp45, combine='by_coords', engine='netcdf4', chunks={'time': 100})
print(dados_miroc5_rcp45)

<xarray.Dataset> Size: 16GB
Dimensions:  (time: 28854, lat: 390, lon: 355)
Coordinates:
  * lon      (lon) float64 3kB -100.0 -99.8 -99.6 -99.4 ... -29.6 -29.4 -29.2
  * lat      (lat) float64 3kB -50.0 -49.8 -49.6 -49.4 ... 27.2 27.4 27.6 27.8
  * time     (time) datetime64[ns] 231kB 2021-01-01 2021-01-02 ... 2099-12-31
Data variables:
    tp2m     (time, lat, lon) float32 16GB dask.array<chunksize=(100, 390, 355), meta=np.ndarray>
Attributes: (12/15)
    history:        Created by Eta Model Group
    institute:      National Institute for Space Research
    institute_id:   INPE
    contact:        projeta@inpe.br
    rcm:            Regional Climate Model (RCM): Eta Model
    driving_model:  Model for Interdisciplinary Research, version 5 (MIROC5)
    ...             ...
    south_degrees:  -50.0
    east_degrees:   -29.2
    west_degrees:   -100.0
    experiment:     RCP4.5
    ensemble:       r1i1p1
    frequency:      day


In [5]:
# Caminho para a pasta contendo os arquivos .nc
pasta_dados_hadgem = 'TP2M_Eta-HadGEM2-ES_RCP4.5'

# Obter a lista de arquivos .nc na pasta
arquivos_hadgem_rcp45 = [os.path.join(pasta_dados_hadgem, f) for f in os.listdir(pasta_dados_hadgem) if f.endswith('.nc')]

# Ler e concatenar todos os arquivos com Dask
dados_hadgem_rcp45 = xr.open_mfdataset(arquivos_hadgem_rcp45, combine='by_coords', engine='netcdf4', chunks={'time': 100})
dados_hadgem_rcp45

# Função para corrigir datas inválidas
def fix_invalid_dates(cftime_dates):
    ''' Converte cftime.Datetime360Day para datetime.datetime, ajustando datas inválidas '''
    fixed_dates = []
    for date in cftime_dates:
        try:
            # Converter diretamente
            fixed_dates.append(datetime(year=date.year, month=date.month, day=date.day))
        except ValueError:
            # Ajustar se a data estiver inválida
            if date.month > 12:
                fixed_dates.append(datetime(year=date.year, month=1, day=1))
            elif date.day > 31:
                fixed_dates.append(datetime(year=date.year, month=date.month, day=1))
            else:
                fixed_dates.append(datetime(year=date.year, month=date.month, day=1))
    return fixed_dates

# Converter cftime.Datetime360Day para datetime.datetime
cftime_dates = dados_hadgem_rcp45['time'].values
fixed_dates = fix_invalid_dates(cftime_dates)

# Converter datetime.datetime para pandas.Timestamp
timestamp_array = pd.to_datetime(fixed_dates)

# Atribuir coordenadas de tempo atualizadas
dados_hadgem_rcp45['time'] = timestamp_array
dados_hadgem_rcp45 = dados_hadgem_rcp45.assign_coords(time=dados_hadgem_rcp45['time'].values)

# Remover duplicatas e garantir ordenação
_, index = np.unique(dados_hadgem_rcp45['time'], return_index=True)
dados_hadgem_rcp45 = dados_hadgem_rcp45.isel(time=index)

# Verificar a estrutura dos dados
print(dados_hadgem_rcp45)

<xarray.Dataset> Size: 16GB
Dimensions:  (time: 28301, lat: 390, lon: 355)
Coordinates:
  * lon      (lon) float64 3kB -100.0 -99.8 -99.6 -99.4 ... -29.6 -29.4 -29.2
  * lat      (lat) float64 3kB -50.0 -49.8 -49.6 -49.4 ... 27.2 27.4 27.6 27.8
  * time     (time) datetime64[ns] 226kB 2021-01-01 2021-01-02 ... 2099-12-30
Data variables:
    tp2m     (time, lat, lon) float32 16GB dask.array<chunksize=(98, 390, 355), meta=np.ndarray>
Attributes: (12/15)
    history:        Created by Eta Model Group
    institute:      National Institute for Space Research
    institute_id:   INPE
    contact:        projeta@inpe.br
    rcm:            Regional Climate Model (RCM): Eta Model
    driving_model:  Hadley Center Global Environmental Model, version 2 Earth...
    ...             ...
    south_degrees:  -50.0
    east_degrees:   -29.2
    west_degrees:   -100.0
    experiment:     RCP4.5
    ensemble:       r1i1p1
    frequency:      day


In [6]:
ensemble = (dados_besm_rcp45 + dados_canesm2_rcp45 + dados_miroc5_rcp45 + dados_hadgem_rcp45) / 4
ensemble

Unnamed: 0,Array,Chunk
Bytes,14.60 GiB,52.29 MiB
Shape,"(28301, 390, 355)","(99, 390, 355)"
Count,7239 Tasks,493 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 14.60 GiB 52.29 MiB Shape (28301, 390, 355) (99, 390, 355) Count 7239 Tasks 493 Chunks Type float32 numpy.ndarray",355  390  28301,

Unnamed: 0,Array,Chunk
Bytes,14.60 GiB,52.29 MiB
Shape,"(28301, 390, 355)","(99, 390, 355)"
Count,7239 Tasks,493 Chunks
Type,float32,numpy.ndarray


In [7]:
# Regrid Models
new_lat = pplt.arange(-21.3, -17.8, 0.1)
new_lon = pplt.arange(-41.9, -39.7, 0.1)

# Interpolação dos pontos de grades de acordo com new_lat e new_lon
regrid_ensemble = ensemble.interp(lat=new_lat, lon=new_lon)
regrid_ensemble

Unnamed: 0,Array,Chunk
Bytes,89.39 MiB,320.20 kiB
Shape,"(28301, 36, 23)","(99, 36, 23)"
Count,11191 Tasks,493 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 89.39 MiB 320.20 kiB Shape (28301, 36, 23) (99, 36, 23) Count 11191 Tasks 493 Chunks Type float32 numpy.ndarray",23  36  28301,

Unnamed: 0,Array,Chunk
Bytes,89.39 MiB,320.20 kiB
Shape,"(28301, 36, 23)","(99, 36, 23)"
Count,11191 Tasks,493 Chunks
Type,float32,numpy.ndarray


In [8]:
# Salvar o dataset combinado em um novo arquivo NetCDF
regrid_ensemble.to_netcdf('ensemble_rcp45_temp.nc')