In [1]:
import xarray_beam as xbeam
import xarray
import apache_beam as beam
import pandas as pd
import numpy as np 

ds, _ = xbeam.open_zarr('/workspace/Haea_dev/resource/1440x720/2016-12-31_2021-12-31.zarr')
print(ds.nbytes / (10**9))

36.287454736


In [2]:
import dask.array as da

AIR_VARIABLE = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind', 'vertical_velocity']

SURFACE_VARIABLE = ['2m_temperature', '10m_u_component_of_wind', '10m_v_component_of_wind', 'mean_sea_level_pressure', 'total_cloud_cover', 'total_precipitation']

VARIABLES = AIR_VARIABLE + SURFACE_VARIABLE


def standardize(data):
    mean = data.mean(dim=['time', 'latitude', 'longitude'], keep_attrs=True)
    std = data.std(dim=['time', 'latitude', 'longitude'], keep_attrs=True)
    standardized_data = (data - mean) / std
    return standardized_data, mean, std

for val in VARIABLES:
    standardized_data, mean, std = standardize(ds[val])
    ds[val + "_st"] = standardized_data
    ds[val + "_mean"] = mean
    ds[val + "_std"] = std


ds_stacked = ds.stack(spatial=('latitude', 'longitude'))

variables_with_level = [var for var in ds_stacked.data_vars if 'level' in ds_stacked[var].dims]

level_vars = [var for var in ds_stacked.variables if 'level' in ds_stacked[var].dims]

# 각 level 차원마다 새 변수 생성
for var_name in level_vars:
    for level in ds_stacked.level:
        # 새 변수 이름 형식: 원본변수명_level값
        new_var_name = f"{var_name}_level_{level.values}"
        
        # 선택한 level에 대한 데이터를 새 변수로 할당
        ds_stacked[new_var_name] = ds_stacked[var_name].sel(level=level)
        
        # 필요하다면, 새로운 변수에서 level 차원을 제거
        ds_stacked[new_var_name] = ds_stacked[new_var_name].drop_vars('level', errors='ignore')
    

ds_stacked = ds_stacked.drop_vars(level_vars)

print(ds_stacked.keys)



In [3]:
print(ds_stacked['vertical_velocity_level_875'])

<xarray.DataArray 'vertical_velocity_level_875' (time: 43825, spatial: 900)>
array([[ 0.07863677,  0.07172513,  0.06135774, ..., -1.0056858 ,
        -0.6670177 , -0.19835865],
       [ 0.08953583,  0.07783926,  0.09352326, ..., -1.1457784 ,
        -0.6832334 , -0.07953238],
       [ 0.04195213,  0.04381299,  0.08023179, ..., -1.0083442 ,
        -0.71194315, -0.19915605],
       ...,
       [ 0.3654706 ,  0.39827365,  0.39087296, ..., -0.36599767,
        -0.22158419, -0.22118415],
       [ 0.42067575,  0.40707445,  0.3428685 , ..., -0.32299364,
        -0.23358531, -0.05516866],
       [ 0.22757697,  0.27604812,  0.30767095, ..., -0.2579121 ,
         0.06894416,  0.2560894 ]], dtype=float32)
Coordinates:
  * time       (time) datetime64[ns] 2016-12-31 ... 2021-12-31
  * spatial    (spatial) object MultiIndex
  * latitude   (spatial) float32 32.0 32.0 32.0 32.0 ... 39.25 39.25 39.25 39.25
  * longitude  (spatial) float32 124.0 124.2 124.5 124.8 ... 130.8 131.0 131.2
Attributes:
    

In [4]:
print(ds_stacked)

dataset = []
for val in ds_stacked.keys():
    dataset.append(ds_stacked[val].values)

<xarray.Dataset>
Dimensions:                              (time: 43825, spatial: 900)
Coordinates:
  * time                                 (time) datetime64[ns] 2016-12-31 ......
  * spatial                              (spatial) object MultiIndex
  * latitude                             (spatial) float32 32.0 32.0 ... 39.25
  * longitude                            (spatial) float32 124.0 124.2 ... 131.2
Data variables: (12/952)
    10m_u_component_of_wind              (time, spatial) float32 -3.736 ... 0...
    10m_v_component_of_wind              (time, spatial) float32 -1.479 ... -...
    2m_temperature                       (time, spatial) float32 283.8 ... 269.6
    land_sea_mask                        (spatial) float32 0.0 0.0 ... 0.0 0.0
    mean_sea_level_pressure              (time, spatial) float32 1.031e+05 .....
    sea_surface_temperature              (time, spatial) float32 287.3 ... 284.6
    ...                                   ...
    vertical_velocity_std_level_875 

In [11]:
print(dataset[0].shape)

(43825, 900)


: 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 2차원 넘파이 배열 생성

# 히트맵 생성
plt.imshow(sea_data, cmap='hot', interpolation='nearest')
plt.colorbar()  # 색상 바 추가
plt.show()

In [None]:
HAS_LEVEL_VARIABLE = [ 'geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind', 'vertical_velocity']

# 새 차원을 추가하고 데이터 변수를 결합
data_arrays = [ds[var].expand_dims('variable').assign_coords(variable=[var]) for var in HAS_LEVEL_VARIABLE]
combined_ds = xarray.concat(data_arrays, dim='variable')
# 결과 확인
stacked_ds = combined_ds.stack(variable_level=('variable', 'level'))
stacked_ds = stacked_ds.stack(hidden_dim=('latitude', 'longitude'))
print(stacked_ds.values.shape)

In [None]:
HAS_LEVEL_VARIABLE = [ 'geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind', 'vertical_velocity']

# 새 차원을 추가하고 데이터 변수를 결합
data_arrays = [ds[var].expand_dims('variable').assign_coords(variable=[var]) for var in HAS_LEVEL_VARIABLE]
combined_ds = xarray.concat(data_arrays, dim='variable')
# 결과 확인
stacked_ds = combined_ds.stack(variable_level=('variable', 'level'))

In [None]:
print(stacked_ds.values)

In [None]:
new_dataset = ds.isel(latitude=lat_indices, longitude=lon_indices)

In [None]:
print(new_dataset)

In [None]:
variable = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind', 'vertical_velocity']
arr = ds[variable]

In [None]:
print(arr)

In [None]:
# 새 차원을 추가하고 데이터 변수를 결합
data_arrays = [arr[var].expand_dims('variable').assign_coords(variable=[var]) for var in variable]
combined_ds = xarray.concat(data_arrays, dim='variable')
# 결과 확인
stacked_ds = combined_ds.stack(variable_level=('variable', 'level'))
stacked_ds = stacked_ds.stack(hidden_dim=('latitude', 'longitude'))


In [None]:
print(stacked_ds)

In [None]:
arr = stacked_ds.compute()
print(arr)

In [None]:
import numpy as np

lat_min, lat_max = 32.2, 39.0
lon_min, lon_max = 124.2, 131

# 해당 범위에 속하는 위도와 경도의 인덱스 찾기
lat_indices = np.where((ds.latitude >= lat_min) & (ds.latitude <= lat_max))[0]
lon_indices = np.where((ds.longitude >= lon_min) & (ds.longitude <= lon_max))[0]

print("Latitude indices:", len(lat_indices))
print("Longitude indices:", len(lon_indices))

In [None]:
ds_kor = arr.isel(latitude=lat_indices, longitude=lon_indices)
print(ds_kor)

In [None]:
dar = ds_kor['2m_temperature']
dar = dar.sel(level=100)

data = dar.to_numpy()
print(data.shape)

In [None]:
print(data)

In [None]:
import pandas as pd
dar = ds['2m_temperature']
i2 = pd.to_datetime(str(2018) + '-03-01T00:00:00.000000000') 
dar = dar.sel(time=i2)
print(dar)

In [None]:
ds_kor = dar.isel(latitude=lat_indices, longitude=lon_indices)
print(ds_kor.latitude)
print(ds_kor.longitude)


In [None]:
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action='ignore')

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams["figure.figsize"] = (6,6) #차트 사이즈
from mpl_toolkits.basemap import Basemap, cm
m = Basemap(llcrnrlon=min(ds_kor.longitude),llcrnrlat=min(ds_kor.latitude),urcrnrlon=max(ds_kor.longitude),urcrnrlat=max(ds_kor.latitude), resolution='i',projection='cyl',lon_0=(max(ds_kor.longitude)-min(ds_kor.longitude)) / 2,lat_0=(max(ds_kor.latitude)-min(ds_kor.latitude)) / 2)
ds_kor.plot()
m.drawcoastlines()