In [1]:
import geopandas as gpd
import xarray as xr
import numpy as np
import pandas as pd
from shapely.geometry import box

In [2]:
ds = xr.open_dataset("..\..\Data\Interim\Meteorological_data\ERA5_NetCDF\ERA5_meteo_SL_c.nc")
print(ds)

<xarray.Dataset> Size: 1GB
Dimensions:      (valid_time: 2486, latitude: 61, longitude: 41)
Coordinates:
  * valid_time   (valid_time) datetime64[ns] 20kB 2015-08-03T14:00:00 ... 202...
  * latitude     (latitude) float32 244B 43.0 42.9 42.8 42.7 ... 37.2 37.1 37.0
  * longitude    (longitude) float32 164B -10.0 -9.9 -9.8 ... -6.2 -6.1 -6.0
Data variables: (12/29)
    d2m          (valid_time, latitude, longitude) float64 50MB ...
    t2m          (valid_time, latitude, longitude) float64 50MB ...
    u10          (valid_time, latitude, longitude) float64 50MB ...
    v10          (valid_time, latitude, longitude) float64 50MB ...
    sp           (valid_time, latitude, longitude) float64 50MB ...
    z            (valid_time, latitude, longitude) float32 25MB ...
    ...           ...
    v10_Fb       (valid_time, latitude, longitude) float64 50MB ...
    sp_Fb        (valid_time, latitude, longitude) float64 50MB ...
    drtcode      (valid_time, latitude, longitude) float64 50MB ...

In [3]:
ds_PL = xr.open_dataset(r"../../Data/Interim/Meteorological_data/ERA5_NetCDF/ERA5_meteo_PL_c.nc", engine="netcdf4")
print(ds_PL)

<xarray.Dataset> Size: 1GB
Dimensions:         (valid_time: 2486, pressure_level: 5, latitude: 61,
                     longitude: 41)
Coordinates:
  * valid_time      (valid_time) datetime64[ns] 20kB 2015-08-03T14:00:00 ... ...
  * pressure_level  (pressure_level) float64 40B 950.0 850.0 700.0 500.0 300.0
  * latitude        (latitude) float64 488B 43.0 42.9 42.8 ... 37.2 37.1 37.0
  * longitude       (longitude) float64 328B -10.0 -9.9 -9.8 ... -6.2 -6.1 -6.0
Data variables:
    z               (valid_time, pressure_level, latitude, longitude) float64 249MB ...
    r               (valid_time, pressure_level, latitude, longitude) float64 249MB ...
    t               (valid_time, pressure_level, latitude, longitude) float64 249MB ...
    u               (valid_time, pressure_level, latitude, longitude) float64 249MB ...
    v               (valid_time, pressure_level, latitude, longitude) float64 249MB ...
    w               (valid_time, pressure_level, latitude, longitude) float64 

In [4]:
# ============================================================
# CONVERTER ds_PL PARA OS MESMOS DATA TYPES DO ds
# ============================================================

print("üîÑ Convertendo ds_PL para os mesmos data types do ds...")

# Verificar os tipos atuais de ambos os datasets
print("üìä TIPOS ATUAIS:")
print("ds:")
print(f"  latitude: {ds.latitude.dtype}")
print(f"  longitude: {ds.longitude.dtype}") 
print(f"  valid_time: {ds.valid_time.dtype}")
print("  Vari√°veis de dados:")
for var in list(ds.data_vars)[:3]:  # Primeiras 3 vari√°veis
    print(f"    {var}: {ds[var].dtype}")

print("\nds_PL (antes da convers√£o):")
print(f"  latitude: {ds_PL.latitude.dtype}")
print(f"  longitude: {ds_PL.longitude.dtype}")
print(f"  valid_time: {ds_PL.valid_time.dtype}")
print(f"  pressure_level: {ds_PL.pressure_level.dtype}")
print("  Vari√°veis de dados:")
for var in list(ds_PL.data_vars)[:3]:
    print(f"    {var}: {ds_PL[var].dtype}")

# ============================================================
# CONVERS√ÉO DAS COORDENADAS
# ============================================================

# Converter coordenadas para os mesmos tipos do ds
ds_PL = ds_PL.assign_coords({
    'latitude': ds_PL.latitude.astype(ds.latitude.dtype),
    'longitude': ds_PL.longitude.astype(ds.longitude.dtype),
    'valid_time': ds_PL.valid_time.astype(ds.valid_time.dtype)
})

# pressure_level mant√©m o seu tipo pr√≥prio
print(f"  pressure_level mantido como: {ds_PL.pressure_level.dtype}")

# ============================================================
# CONVERS√ÉO DAS VARI√ÅVEIS DE DADOS
# ============================================================

print("\nüîÑ Convertendo vari√°veis de dados...")

# Criar um mapping dos tipos de dados do ds para refer√™ncia
ds_dtypes = {var: ds[var].dtype for var in ds.data_vars}

# Para cada vari√°vel no ds_PL, tentar usar o tipo correspondente do ds
# Se n√£o existir no ds, usar float32 como fallback
for var in ds_PL.data_vars:
    if var in ds_dtypes:
        # Se a vari√°vel existe no ds, usar o mesmo tipo
        target_dtype = ds_dtypes[var]
    else:
        # Se n√£o existe no ds, usar float32 (tipo comum no ds)
        target_dtype = np.float32
    
    # Apenas converter se necess√°rio
    if ds_PL[var].dtype != target_dtype:
        print(f"  Convertendo {var}: {ds_PL[var].dtype} -> {target_dtype}")
        ds_PL[var] = ds_PL[var].astype(target_dtype)

# ============================================================
# VERIFICA√á√ÉO FINAL
# ============================================================

print("\n‚úÖ CONVERS√ÉO CONCLU√çDA!")
print("üìä TIPOS AP√ìS CONVERS√ÉO:")
print("ds_PL (ap√≥s convers√£o):")
print(f"  latitude: {ds_PL.latitude.dtype}")
print(f"  longitude: {ds_PL.longitude.dtype}")
print(f"  valid_time: {ds_PL.valid_time.dtype}")
print("  Vari√°veis de dados:")
for var in list(ds_PL.data_vars)[:3]:
    print(f"    {var}: {ds_PL[var].dtype}")

# Verificar se as coordenadas agora s√£o iguais
print(f"\nüîç VERIFICA√á√ÉO DE COORDENADAS:")
print(f"ds latitude == ds_PL latitude: {np.array_equal(ds.latitude.values, ds_PL.latitude.values)}")
print(f"ds longitude == ds_PL longitude: {np.array_equal(ds.longitude.values, ds_PL.longitude.values)}")

# Verificar valores espec√≠ficos
print(f"\nüìê COMPARA√á√ÉO DE VALORES:")
print(f"ds latitude[:3]: {ds.latitude.values[:3]}")
print(f"ds_PL latitude[:3]: {ds_PL.latitude.values[:3]}")
print(f"Diferen√ßa m√°xima: {np.max(np.abs(ds.latitude.values - ds_PL.latitude.values))}")

üîÑ Convertendo ds_PL para os mesmos data types do ds...
üìä TIPOS ATUAIS:
ds:
  latitude: float32
  longitude: float32
  valid_time: datetime64[ns]
  Vari√°veis de dados:
    d2m: float64
    t2m: float64
    u10: float64

ds_PL (antes da convers√£o):
  latitude: float64
  longitude: float64
  valid_time: datetime64[ns]
  pressure_level: float64
  Vari√°veis de dados:
    z: float64
    r: float64
    t: float64
  pressure_level mantido como: float64

üîÑ Convertendo vari√°veis de dados...
  Convertendo z: float64 -> float32
  Convertendo r: float64 -> <class 'numpy.float32'>
  Convertendo t: float64 -> <class 'numpy.float32'>
  Convertendo u: float64 -> <class 'numpy.float32'>
  Convertendo v: float64 -> <class 'numpy.float32'>
  Convertendo w: float64 -> <class 'numpy.float32'>

‚úÖ CONVERS√ÉO CONCLU√çDA!
üìä TIPOS AP√ìS CONVERS√ÉO:
ds_PL (ap√≥s convers√£o):
  latitude: float32
  longitude: float32
  valid_time: datetime64[ns]
  Vari√°veis de dados:
    z: float32
    r: float32

In [5]:
# ============================================================
# 1. Ler shapefile e NetCDF
# ============================================================
shp = gpd.read_file(r"../../Data/Interim/PT-FireSprd_v2.1/L2_FireBehavior/PT-FireSprd_v2.1_L2_valid.shp")
# shp = gpd.read_file(r"..\..\PT-FireSprd_v2.1_L2_p_meteo_short.shp")

shp["sdate"] = pd.to_datetime(shp["sdate"], errors="coerce")
shp["edate"] = pd.to_datetime(shp["edate"], errors="coerce")
shp = shp.dropna(subset=["sdate", "edate"])

# ============================================================
# 2. PREPARAR C√âLULAS DA GRADE
# ============================================================
shp_4326 = shp.to_crs("EPSG:4326")
lats = ds.latitude.values
lons = ds.longitude.values
lat_res = 0.1
lon_res = 0.1

cell_polys = []
for lat in lats:
    for lon in lons:
        cell_poly = box(lon - lon_res/2, lat - lat_res/2, 
                        lon + lon_res/2, lat + lat_res/2)
        cell_polys.append(cell_poly)

cells_gdf = gpd.GeoDataFrame({
    "latitude": np.repeat(lats, len(lons)),
    "longitude": np.tile(lons, len(lats)),
    "geometry": cell_polys
}, crs="EPSG:4326")

# ============================================================
# 3. GERAR COMBINA√á√ïES PARA TODOS OS POL√çGONOS + EXTENT
# ============================================================
all_times = pd.to_datetime(ds.valid_time.values)
all_combinations = []

for polygon_id, polygon_row in shp_4326.iterrows():
    
    # >>> calcular extent do pol√≠gono
    minx, miny, maxx, maxy = polygon_row.geometry.bounds
    
    # Encontrar c√©lulas que intersectam
    intersecting_cells = cells_gdf[cells_gdf.intersects(polygon_row.geometry)]
    
    # Se n√£o houver interse√ß√£o direta, usar buffer
    if len(intersecting_cells) == 0:
        buffered_poly = polygon_row.geometry.buffer(0.05)
        intersecting_cells = cells_gdf[cells_gdf.intersects(buffered_poly)]
    
    # Extrair intervalo de tempo
    sdate = polygon_row["sdate"]
    edate = polygon_row["edate"]
    
    # Filtrar tempos dentro do intervalo com minutos = 0
    polygon_times = [
        time for time in all_times
        if sdate <= time <= edate and time.minute == 0
    ]
    
    # Gerar combina√ß√µes
    for _, cell in intersecting_cells.iterrows():
        lat = cell.latitude
        lon = cell.longitude
        
        for time in polygon_times:
            all_combinations.append({
                'latitude': lat,
                'longitude': lon,
                'time': time,
                'polygon_id': polygon_id,
                'minx': minx,
                'miny': miny,
                'maxx': maxx,
                'maxy': maxy
            })

# ============================================================
# 4. OUTPUT FINAL
# ============================================================
print(f"Total de combina√ß√µes geradas: {len(all_combinations)}")
print(f"N√∫mero de pol√≠gonos processados: {len(shp_4326)}")

'''print("\nPrimeiras 5 combina√ß√µes:")
for i, combo in enumerate(all_combinations):
    print(
        f"{i+1}: Polygon {combo['polygon_id']} - "
        f"lat={combo['latitude']:.3f}, lon={combo['longitude']:.3f}, "
        f"time={combo['time'].strftime('%Y-%m-%d %H:%M')}, "
        f"extent=({combo['minx']:.3f}, {combo['miny']:.3f}, {combo['maxx']:.3f}, {combo['maxy']:.3f})"
    )'''

'''# Salvar em CSV
df_combinations = pd.DataFrame(all_combinations)
df_combinations.to_csv(r"PT-FireSprd_v2.1_L2_combinations_with_extent.csv", index=False)'''


  shp["edate"] = pd.to_datetime(shp["edate"], errors="coerce")


Total de combina√ß√µes geradas: 15890
N√∫mero de pol√≠gonos processados: 1715


'# Salvar em CSV\ndf_combinations = pd.DataFrame(all_combinations)\ndf_combinations.to_csv(r"PT-FireSprd_v2.1_L2_combinations_with_extent.csv", index=False)'

In [6]:
import numpy as np

# ============================================================
# CRIAR M√ÅSCARA PARA AS COMBINA√á√ïES QUE VAMOS MANTER
# ============================================================

# Criar um array booleano inicializado com False
keep_mask = xr.full_like(ds.u100, False, dtype=bool)  # usa u100 como template

# Converter os tempos do dataset para pandas Timestamp
ds_times = pd.to_datetime(ds.valid_time.values)

print(f"Processando {len(shp_4326)} pol√≠gonos...")

# Para cada pol√≠gono no shapefile
for polygon_id, polygon_row in shp_4326.iterrows():
    # Intervalo de tempo do pol√≠gono
    sdate = polygon_row["sdate"]
    edate = polygon_row["edate"]
    
    # Filtrar apenas os tempos que est√£o dentro do intervalo do pol√≠gono e minutos=0
    polygon_times = [time for time in ds_times if sdate <= time <= edate and time.minute == 0]

    # Encontrar c√©lulas da grade NetCDF que intersectam o pol√≠gono
    intersecting_cells = cells_gdf[cells_gdf.intersects(polygon_row.geometry)]
    
    # Se n√£o houver interse√ß√£o direta, usar buffer opcional
    if len(intersecting_cells) == 0:
        buffered_poly = polygon_row.geometry.buffer(0.05)
        intersecting_cells = cells_gdf[cells_gdf.intersects(buffered_poly)]
    
    # Marcar as c√©lulas intersectantes na m√°scara
    for _, cell in intersecting_cells.iterrows():
        # Encontrar √≠ndices exatos da c√©lula no NetCDF
        lat_idx = np.where(ds.latitude.values == cell['latitude'])[0][0]
        lon_idx = np.where(ds.longitude.values == cell['longitude'])[0][0]

        for time in polygon_times:
            time_idx = np.where(ds_times == time)[0]
            if len(time_idx) > 0:
                keep_mask[time_idx[0], lat_idx, lon_idx] = True
            else:
                # Se n√£o encontrar o tempo exato, usar o mais pr√≥ximo
                time_diff = np.abs(ds_times - time)
                closest_time_idx = time_diff.argmin()
                keep_mask[closest_time_idx, lat_idx, lon_idx] = True
                print(f"‚ö†Ô∏è Tempo n√£o encontrado exatamente: {time}. Usando mais pr√≥ximo: {ds_times[closest_time_idx]}")

print(f"Combina√ß√µes a manter: {keep_mask.sum().values}")
print(f"Combina√ß√µes a descartar: {(~keep_mask).sum().values}")

# ============================================================
# APLICAR NAN √ÄS COMBINA√á√ïES QUE N√ÉO VAMOS USAR
# ============================================================

# Criar uma c√≥pia do dataset original
ds_filtered = ds.copy()

# Lista de vari√°veis meteorol√≥gicas (excluindo coordenadas e spatial_ref)
data_vars = [var for var in ds.data_vars if var not in ['spatial_ref']]

print(f"Aplicando NaN a {len(data_vars)} vari√°veis...")

# Aplicar NaN apenas √†s combina√ß√µes que N√ÉO vamos usar
for var_name in data_vars:
    print(f"Processando {var_name}...")
    ds_filtered[var_name] = ds[var_name].where(keep_mask)

ds_filtered = ds_filtered.drop_vars('spatial_ref')

# ============================================================
# VERIFICAR RESULTADO
# ============================================================

print(f"\n‚úÖ PROCESSO CONCLU√çDO!")
print(f"Dataset original: {ds.nbytes / 1024 / 1024:.1f} MB")
print(f"Dataset filtrado: {ds_filtered.nbytes / 1024 / 1024:.1f} MB")

# Verificar quantos valores n√£o-NaN restaram
for var_name in data_vars[:3]:  # Mostrar apenas as primeiras 3 vari√°veis
    non_nan_count = (~np.isnan(ds_filtered[var_name].values)).sum()
    total_count = ds_filtered[var_name].size
    print(f"{var_name}: {non_nan_count}/{total_count} valores n√£o-NaN ({non_nan_count/total_count*100:.2f}%)")

# Mostrar o dataset resultante
print(f"\nüìä DATASET FILTRADO:")
print(ds_filtered)


Processando 1715 pol√≠gonos...
Combina√ß√µes a manter: 11091
Combina√ß√µes a descartar: 6206395
Aplicando NaN a 28 vari√°veis...
Processando d2m...
Processando t2m...
Processando u10...
Processando v10...
Processando sp...
Processando z...
Processando u100...
Processando v100...
Processando cbh...
Processando hcc...
Processando lcc...
Processando mcc...
Processando tcc...
Processando blh...
Processando cape...
Processando cin...
Processando swvl1...
Processando swvl2...
Processando swvl3...
Processando swvl4...
Processando d2m_Fb...
Processando t2m_Fb...
Processando u10_Fb...
Processando v10_Fb...
Processando sp_Fb...
Processando drtcode...
Processando ffmcode...
Processando fwinx...

‚úÖ PROCESSO CONCLU√çDO!
Dataset original: 1304.5 MB
Dataset filtrado: 1304.5 MB
d2m: 11091/6217486 valores n√£o-NaN (0.18%)
t2m: 11091/6217486 valores n√£o-NaN (0.18%)
u10: 11091/6217486 valores n√£o-NaN (0.18%)

üìä DATASET FILTRADO:
<xarray.Dataset> Size: 1GB
Dimensions:     (valid_time: 2486, latitud

In [7]:
# ============================================================
# SALVAR DATASET SEM REMOVER DIMENS√ïES VAZIAS
# ============================================================

output_path = "../../Data/Interim/Meteorological_data/ERA5_NetCDF/ERA5_meteo_SL_c_short.nc"

# Salvar diretamente sem otimizar/remover dimens√µes
ds_filtered.to_netcdf(output_path, engine="netcdf4")

print(f"\nüíæ Dataset salvo em: {output_path}")
print(f"\nüéØ DATASET FINAL (DIMENS√ïES INTACTAS):")
print(ds_filtered)

# Cria um booleano True se TODAS as vari√°veis forem NaN naquele ponto
all_nan = xr.ufuncs.isnan(ds_filtered.to_array()).all(dim="variable")
all_value = (~xr.ufuncs.isnan(ds.to_array())).all(dim="variable")

num_all_value = all_value.sum().item()
print("Pontos com todas as vari√°veis com valor:", num_all_value)

# Conta
num_all_nan = all_nan.sum().item()

print("Pontos com todas as vari√°veis NaN:", num_all_nan)


üíæ Dataset salvo em: ../../Data/Interim/Meteorological_data/ERA5_NetCDF/ERA5_meteo_SL_c_short.nc

üéØ DATASET FINAL (DIMENS√ïES INTACTAS):
<xarray.Dataset> Size: 1GB
Dimensions:     (valid_time: 2486, latitude: 61, longitude: 41)
Coordinates:
  * valid_time  (valid_time) datetime64[ns] 20kB 2015-08-03T14:00:00 ... 2025...
  * latitude    (latitude) float32 244B 43.0 42.9 42.8 42.7 ... 37.2 37.1 37.0
  * longitude   (longitude) float32 164B -10.0 -9.9 -9.8 -9.7 ... -6.2 -6.1 -6.0
Data variables: (12/28)
    d2m         (valid_time, latitude, longitude) float64 50MB nan nan ... nan
    t2m         (valid_time, latitude, longitude) float64 50MB nan nan ... nan
    u10         (valid_time, latitude, longitude) float64 50MB nan nan ... nan
    v10         (valid_time, latitude, longitude) float64 50MB nan nan ... nan
    sp          (valid_time, latitude, longitude) float64 50MB nan nan ... nan
    z           (valid_time, latitude, longitude) float32 25MB nan nan ... nan
    ...       

In [8]:
import numpy as np
import pandas as pd

# ============================================================
# 1Ô∏è‚É£ CRIAR M√ÅSCARA PARA ds_PL USANDO A MESMA METODOLOGIA DO ds
# ============================================================

print("üîç VERIFICANDO DIMENS√ïES DO ds_PL:")
print(f"Dimens√µes: {ds_PL.dims}")
print(f"Tamanho valid_time: {len(ds_PL.valid_time)}")
print(f"Tamanho pressure_level: {len(ds_PL.pressure_level)}")
print(f"Tamanho latitude: {len(ds_PL.latitude)}")
print(f"Tamanho longitude: {len(ds_PL.longitude)}")

# Criar um array booleano inicializado com False (MESMO M√âTODO DO ds)
keep_mask_PL = xr.full_like(ds_PL.u, False, dtype=bool)

# Converter os tempos do dataset para pandas Timestamp (MESMO M√âTODO DO ds)
ds_PL_times = pd.to_datetime(ds_PL.valid_time.values)

print(f"üîÑ Processando {len(shp_4326)} pol√≠gonos...")

# Para cada pol√≠gono no shapefile (MESMA L√ìGICA DO ds)
for polygon_id, polygon_row in shp_4326.iterrows():
    # Intervalo de tempo do pol√≠gono
    sdate = polygon_row["sdate"]
    edate = polygon_row["edate"]
    
    # Filtrar apenas os tempos que est√£o dentro do intervalo do pol√≠gono e minutos=0 (MESMO M√âTODO)
    polygon_times = [time for time in ds_PL_times if sdate <= time <= edate and time.minute == 0]

    # Encontrar c√©lulas da grade NetCDF que intersectam o pol√≠gono (MESMO M√âTODO)
    intersecting_cells = cells_gdf[cells_gdf.intersects(polygon_row.geometry)]
    
    # Se n√£o houver interse√ß√£o direta, usar buffer opcional (MESMO M√âTODO)
    if len(intersecting_cells) == 0:
        buffered_poly = polygon_row.geometry.buffer(0.05)
        intersecting_cells = cells_gdf[cells_gdf.intersects(buffered_poly)]
    
    # Marcar as c√©lulas intersectantes na m√°scara (MESMA L√ìGICA, ADAPTADA PARA 4 DIMENS√ïES)
    for _, cell in intersecting_cells.iterrows():
        # Encontrar √≠ndices exatos da c√©lula no NetCDF (MESMO M√âTODO)
        lat_idx = np.where(ds_PL.latitude.values == cell['latitude'])[0][0]
        lon_idx = np.where(ds_PL.longitude.values == cell['longitude'])[0][0]

        for time in polygon_times:
            time_idx = np.where(ds_PL_times == time)[0]
            if len(time_idx) > 0:
                # MARCADOR: Para ds_PL, marcamos TODOS os n√≠veis de press√£o
                for pressure_idx in range(len(ds_PL.pressure_level)):
                    keep_mask_PL[time_idx[0], pressure_idx, lat_idx, lon_idx] = True
            else:
                # Se n√£o encontrar o tempo exato, usar o mais pr√≥ximo (MESMO M√âTODO)
                time_diff = np.abs(ds_PL_times - time)
                closest_time_idx = time_diff.argmin()
                # MARCADOR: Para ds_PL, marcamos TODOS os n√≠veis de press√£o
                for pressure_idx in range(len(ds_PL.pressure_level)):
                    keep_mask_PL[closest_time_idx, pressure_idx, lat_idx, lon_idx] = True
                print(f"‚ö†Ô∏è Tempo n√£o encontrado exatamente: {time}. Usando mais pr√≥ximo: {ds_PL_times[closest_time_idx]}")

print(f"‚úÖ Combina√ß√µes a manter: {keep_mask_PL.sum().values}")
print(f"üìç Combina√ß√µes a descartar: {(~keep_mask_PL).sum().values}")

# ============================================================
# 2Ô∏è‚É£ APLICAR NAN √ÄS COMBINA√á√ïES QUE N√ÉO VAMOS USAR (MESMO M√âTODO)
# ============================================================

# Criar uma c√≥pia do dataset original (MESMO M√âTODO)
ds_PL_filtered = ds_PL.copy()

# Lista de vari√°veis meteorol√≥gicas (excluindo coordenadas e spatial_ref) (MESMO M√âTODO)
data_vars_PL = [var for var in ds_PL.data_vars if var not in ['spatial_ref']]

print(f"üéØ Aplicando NaN a {len(data_vars_PL)} vari√°veis...")

# Aplicar NaN apenas √†s combina√ß√µes que N√ÉO vamos usar (MESMO M√âTODO)
for var_name in data_vars_PL:
    print(f"   Processando {var_name}...")
    ds_PL_filtered[var_name] = ds_PL[var_name].where(keep_mask_PL)

ds_PL_filtered = ds_PL_filtered.drop_vars('spatial_ref')

# ============================================================
# 3Ô∏è‚É£ VERIFICAR RESULTADO (MESMO M√âTODO)
# ============================================================

print(f"\n‚úÖ PROCESSO CONCLU√çDO!")
print(f"Dataset PL original: {ds_PL.nbytes / 1024 / 1024:.1f} MB")
print(f"Dataset PL filtrado: {ds_PL_filtered.nbytes / 1024 / 1024:.1f} MB")

# Verificar quantos valores n√£o-NaN restaram (MESMO M√âTODO)
for var_name in data_vars_PL[:3]:  # Mostrar apenas as primeiras 3 vari√°veis
    non_nan_count = (~np.isnan(ds_PL_filtered[var_name].values)).sum()
    total_count = ds_PL_filtered[var_name].size
    print(f"   {var_name}: {non_nan_count}/{total_count} valores n√£o-NaN ({non_nan_count/total_count*100:.2f}%)")

# Mostrar o dataset resultante (MESMO M√âTODO)
print(f"\nüìä DATASET PL FILTRADO:")
print(ds_PL_filtered)


üîç VERIFICANDO DIMENS√ïES DO ds_PL:
Tamanho valid_time: 2486
Tamanho pressure_level: 5
Tamanho latitude: 61
Tamanho longitude: 41
üîÑ Processando 1715 pol√≠gonos...
‚úÖ Combina√ß√µes a manter: 55455
üìç Combina√ß√µes a descartar: 31031975
üéØ Aplicando NaN a 6 vari√°veis...
   Processando z...
   Processando r...
   Processando t...
   Processando u...
   Processando v...
   Processando w...

‚úÖ PROCESSO CONCLU√çDO!
Dataset PL original: 711.6 MB
Dataset PL filtrado: 711.6 MB
   z: 55455/31087430 valores n√£o-NaN (0.18%)
   r: 55455/31087430 valores n√£o-NaN (0.18%)
   t: 55455/31087430 valores n√£o-NaN (0.18%)

üìä DATASET PL FILTRADO:
<xarray.Dataset> Size: 746MB
Dimensions:         (valid_time: 2486, pressure_level: 5, latitude: 61,
                     longitude: 41)
Coordinates:
  * valid_time      (valid_time) datetime64[ns] 20kB 2015-08-03T14:00:00 ... ...
  * pressure_level  (pressure_level) float64 40B 950.0 850.0 700.0 500.0 300.0
  * latitude        (latitude) float32 

In [9]:

# ============================================================
# 6Ô∏è‚É£ SALVAR ds_PL FILTRADO
# ============================================================

output_path_PL = "../../Data/Interim/Meteorological_data/ERA5_NetCDF/ERA5_meteo_PL_c_short.nc"

ds_PL_filtered.to_netcdf(output_path_PL, engine="netcdf4")

print(f"\nüíæ Dataset PL salvo em: {output_path_PL}")
print(f"\nüéØ DATASET PL FINAL (DIMENS√ïES INTACTAS):")
print(ds_PL_filtered)


üíæ Dataset PL salvo em: ../../Data/Interim/Meteorological_data/ERA5_NetCDF/ERA5_meteo_PL_c_short.nc

üéØ DATASET PL FINAL (DIMENS√ïES INTACTAS):
<xarray.Dataset> Size: 746MB
Dimensions:         (valid_time: 2486, pressure_level: 5, latitude: 61,
                     longitude: 41)
Coordinates:
  * valid_time      (valid_time) datetime64[ns] 20kB 2015-08-03T14:00:00 ... ...
  * pressure_level  (pressure_level) float64 40B 950.0 850.0 700.0 500.0 300.0
  * latitude        (latitude) float32 244B 43.0 42.9 42.8 ... 37.2 37.1 37.0
  * longitude       (longitude) float32 164B -10.0 -9.9 -9.8 ... -6.2 -6.1 -6.0
Data variables:
    z               (valid_time, pressure_level, latitude, longitude) float32 124MB ...
    r               (valid_time, pressure_level, latitude, longitude) float32 124MB ...
    t               (valid_time, pressure_level, latitude, longitude) float32 124MB ...
    u               (valid_time, pressure_level, latitude, longitude) float32 124MB ...
    v          