In [1]:
import numpy as np
import pandas as pd
from netCDF4 import Dataset
from datetime import datetime, timedelta

# Exemplo de carregamento de dados NetCDF
file_path = '.\\data_from_noaa\\202405\\oisst-avhrr-v02r01.20240502.nc'
dataset = Dataset(file_path)

# Verificar as variáveis no arquivo
print("Variáveis disponíveis no dataset:")
print(dataset.variables.keys())

# Extrair dados de SST, tempo, latitude e longitude
sst_data = dataset.variables['sst'][:]
times = dataset.variables['time'][:]
latitudes = dataset.variables['lat'][:]
longitudes = dataset.variables['lon'][:]
zlev = dataset.variables['zlev'][:]

# Converter tempo para formato legível (assumindo que o tempo está em dias desde 1978-01-01 12:00:00)
reference_date = datetime(1978, 1, 1, 12, 0, 0)
times = np.array([reference_date + timedelta(days=float(t)) for t in times])

# Aplicar scale_factor e add_offset à SST
sst_fill_value = dataset.variables['sst']._FillValue
sst_scale_factor = dataset.variables['sst'].scale_factor
sst_add_offset = dataset.variables['sst'].add_offset
sst_data = np.ma.masked_equal(sst_data, sst_fill_value)  # Mascara os valores de preenchimento
sst_data = sst_data * sst_scale_factor + sst_add_offset  # Aplica o scale_factor e add_offset

# Verificar as dimensões do SST
print("Dimensões do SST:", sst_data.shape)

# Verificar a forma dos dados
data_shape = sst_data.shape

if len(data_shape) == 4:
    time_dim, zlev_dim, lat_dim, lon_dim = data_shape
    # Flatten the SST data
    sst_flattened = sst_data.flatten()

    # Repetir os valores de tempo, latitude e longitude para combinar com o número de pontos de dados de SST
    time_repeated = np.repeat(times, lat_dim * lon_dim * zlev_dim)
    lat_repeated = np.tile(np.repeat(latitudes, lon_dim), time_dim * zlev_dim)
    lon_repeated = np.tile(longitudes, time_dim * zlev_dim * lat_dim)
    zlev_repeated = np.tile(np.repeat(zlev, lat_dim * lon_dim), time_dim)

    # Criar DataFrame
    sst_df = pd.DataFrame({
        'time': time_repeated,
        'lat': lat_repeated,
        'lon': lon_repeated,
        'zlev': zlev_repeated,
        'sst': sst_flattened
    })

    # Exibir as primeiras linhas do DataFrame
    print(sst_df.head())

else:
    print(f"Forma dos dados não suportada: {data_shape}")

Variáveis disponíveis no dataset:
dict_keys(['time', 'zlev', 'lat', 'lon', 'sst', 'anom', 'err', 'ice'])
Dimensões do SST: (1, 1, 720, 1440)
                 time     lat    lon  zlev  sst
0 2024-05-02 12:00:00 -89.875  0.125   0.0  NaN
1 2024-05-02 12:00:00 -89.875  0.375   0.0  NaN
2 2024-05-02 12:00:00 -89.875  0.625   0.0  NaN
3 2024-05-02 12:00:00 -89.875  0.875   0.0  NaN
4 2024-05-02 12:00:00 -89.875  1.125   0.0  NaN


In [2]:
import xarray as xr

# Carregar o arquivo NetCDF
file_path = '.\\data_from_noaa\\202405\\ct5km_dhw_v3.1_20240502.nc'
ds = xr.open_dataset(file_path)

# Converter o dataset para um DataFrame do Pandas
df = ds.to_dataframe().reset_index()

In [3]:
merged = pd.merge(df, sst_df, how='inner', on=['time', 'lat', 'lon'])