In [1]:
import netCDF4 as nc
import h5py
import xarray as xr

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import torch
from torch.utils.data import Dataset, IterableDataset
import torch.nn.functional as F

import os
from pathlib import Path
import re
from datetime import datetime


# SST

In [6]:
# 重新读取, 将lon转换为-180至180
path = '/home/data2/pengguohang/My_Ocean/challenge/1993_2019_data/raw/oisst_199301_201901_monthly.nc'
ds = xr.open_dataset(path)
print(ds)

# 将经度从0到360转换为-180到180
ds = ds.assign_coords(lon=((ds.lon - 180) % 360) - 180)

# 删除 zlev 维度
ds = ds.drop('zlev')
ds = ds.squeeze(dim='zlev')
# 创建时间坐标
time_coords = pd.date_range(start='1993-01-01', end='2019-01-01', freq='MS')
ds = ds.assign_coords(time=time_coords)

print(ds.coords)
print(ds)


<xarray.Dataset>
Dimensions:  (lat: 720, lon: 1440, zlev: 1, time: 313)
Coordinates:
  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon      (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
  * zlev     (zlev) float32 0.0
Dimensions without coordinates: time
Data variables:
    sst      (time, zlev, lat, lon) float32 ...
    anom     (time, zlev, lat, lon) float32 ...
    err      (time, zlev, lat, lon) float32 ...
Coordinates:
  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon      (lon) float32 0.125 0.375 0.625 0.875 ... -0.625 -0.375 -0.125
  * time     (time) datetime64[ns] 1993-01-01 1993-02-01 ... 2019-01-01
<xarray.Dataset>
Dimensions:  (lat: 720, lon: 1440, time: 313)
Coordinates:
  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon      (lon) float32 0.125 0.375 0.625 0.875 ... -0.625 -0.375 -0.125
  * time     (time) datetime64[ns] 1993-01-01 1993-02-01 ... 20

In [3]:
# 掩码为nan
sst = ds['sst'].values
print(np.max(sst))

nan


In [8]:
# 降采样·
original_lat = ds['lat']
original_lon = ds['lon']
# 创建新的降采样后的纬度和经度
new_lat = np.linspace(original_lat.min(), original_lat.max(), 360)
new_lon = np.linspace(original_lon.min(), original_lon.max(), 720)

# 进行重网格化
ds_resampled = ds.interp(lat=new_lat, lon=new_lon)

print(ds_resampled)

<xarray.Dataset>
Dimensions:  (time: 313, lat: 360, lon: 720)
Coordinates:
  * time     (time) datetime64[ns] 1993-01-01 1993-02-01 ... 2019-01-01
  * lat      (lat) float64 -89.88 -89.37 -88.87 -88.37 ... 88.87 89.37 89.88
  * lon      (lon) float64 -179.9 -179.4 -178.9 -178.4 ... 178.9 179.4 179.9
Data variables:
    sst      (time, lat, lon) float64 nan nan nan nan ... -1.66 -1.6 -1.8 -1.8
    anom     (time, lat, lon) float64 nan nan nan nan ... 0.1398 0.1998 0.0 0.0
    err      (time, lat, lon) float64 nan nan nan nan nan ... 0.3 0.3 0.3 0.3


In [7]:
sst_ds = ds['sst'].rename('data')
sst_ds = sst_ds.roll(lon=len(sst_ds.lon) // 2, roll_coords=True)

# 查看结果
print(sst_ds)

# output_file = '/home/data2/pengguohang/My_Ocean/challenge/1993_2019_data/input/oisst_199301_201901_monthly.nc'
# sst_ds.to_netcdf(output_file)


<xarray.DataArray 'data' (time: 313, lat: 720, lon: 1440)>
array([[[       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        ...,
        [-1.7958064, -1.7958064, -1.7958064, ..., -1.7958064,
         -1.7958064, -1.7958064],
        [-1.7954838, -1.7954838, -1.7954838, ..., -1.7954838,
         -1.7954838, -1.7954838],
        [-1.7954838, -1.7954838, -1.7954838, ..., -1.7954838,
         -1.7954838, -1.7954838]],

       [[       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
        [       nan,        nan,        nan, ...,        nan,
                nan,        nan],
...
        [-1.6877421, -1.7083873, -1.6709673, ..., -1.69806