# imports

In [2]:
# 初始化整個實驗環境
import sys
sys.path.append("C:/Users/GAI/Desktop/Scott/NCA_Research")

from E4_PI_NCA.init_notebook_imports import *

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)
set_global_seed(1234)

✅ Environment initialized. Use show_env_info() to check details.
Using device: cuda
[INFO] Global seed set to 1234


# process urbantales cases into npz file 

In [3]:
"""
將 dataset 資料夾下所有 *ped.nc 轉換為 dict 結構的 .npz 檔案。
每一個 case 皆為獨立 (C,H,W) 陣列。

輸出格式：
  - case_dict: { case_name: np.ndarray(C,H,W), ... }
  - case_names: list[str]
  - channel_names: list[str]
"""

from pathlib import Path
import numpy as np
import xarray as xr
import torch
import torch.nn.functional as F
from tqdm import tqdm

# ---------------------- Helper functions ----------------------

def nan_to_masked_CHW(arr_CHW: np.ndarray):
    nan_mask = np.isnan(arr_CHW).any(axis=0)
    geo_mask = (~nan_mask).astype(np.float32)
    arr_clean = np.array(arr_CHW, copy=True)
    arr_clean[np.isnan(arr_clean)] = 0.0
    return arr_clean, geo_mask


def add_coord_channels(arr_CHW: np.ndarray, geo_mask: np.ndarray):
    C, H, W = arr_CHW.shape
    ys = np.linspace(-1.0, 1.0, H, dtype=np.float32)[:, None]
    xs = np.linspace(-1.0, 1.0, W, dtype=np.float32)[None, :]
    coord_y = np.repeat(ys, W, axis=1)
    coord_x = np.repeat(xs, H, axis=0)
    stacked = np.concatenate([
        coord_y[np.newaxis, ...],
        coord_x[np.newaxis, ...],
        geo_mask[np.newaxis, ...],
        arr_CHW
    ], axis=0)
    return stacked


# ---------------------- 主程式 ----------------------

folder = Path("../dataset")
output_path = folder / "all_cases_dict.npz"

case_dict = {}
case_names = []
channel_names_ref = None

for ped_file in tqdm(list(folder.rglob("*ped.nc")), desc="Processing ped files"):
    case_name = ped_file.parent.name
    try:
        wind_dir = float(case_name.split("_d")[-1])
    except Exception:
        wind_dir = 0.0

    with xr.open_dataset(ped_file) as ds:
        arrays = [ds[var].values for var in ds.data_vars]
        ped_np = np.stack(arrays, axis=-1)[::-1, :, :]
        vars_names = list(ds.data_vars.keys())

    wind_np = np.zeros_like(ped_np[..., 0:2])
    wind_np[..., 0] = np.sin(np.deg2rad(wind_dir))
    wind_np[..., 1] = np.cos(np.deg2rad(wind_dir))

    topo_file = next(ped_file.parent.glob("*_topo"), None)
    if topo_file:
        topo = np.loadtxt(topo_file)
        if topo.ndim == 2:
            topo = topo[:, :, np.newaxis]
        ped_np = np.concatenate([topo, wind_np, ped_np], axis=-1)
        channel_names = ["topo", "windInitX", "windInitY"] + vars_names
    else:
        ped_np = np.concatenate([wind_np, ped_np], axis=-1)
        channel_names = ["windInitX", "windInitY"] + vars_names

    ped_np = np.transpose(ped_np, (2, 0, 1))
    ped_np, geo_mask = nan_to_masked_CHW(ped_np)
    ped_np = add_coord_channels(ped_np, geo_mask)
    channel_names = ["coord_y", "coord_x", "geo_mask"] + channel_names

    case_dict[case_name] = ped_np.astype(np.float32)
    case_names.append(case_name)

    if channel_names_ref is None:
        channel_names_ref = channel_names

# ---------------------- 儲存 ----------------------
np.savez_compressed(
    output_path,
    case_dict=case_dict,
    case_names=np.array(case_names),
    channel_names=np.array(channel_names_ref)
)

print(f"✅ 已儲存：{output_path}")
print(f"共 {len(case_names)} 個 case")
print(f"範例 case：{case_names[0]}  shape={case_dict[case_names[0]].shape}")



Processing ped files: 100%|██████████| 54/54 [00:07<00:00,  6.82it/s]


✅ 已儲存：..\dataset\all_cases_dict.npz
共 54 個 case
範例 case：CN-BE-V1_d00  shape=(11, 816, 1056)


'uped', 'vped' vel_ped 平均風速(不包含湍流) Uped 平均風速(包含湍流)  TKEped(湍流項)



In [4]:
print_dict_stats(case_dict)

CN-BE-V1_d00: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d105: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d120: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d135: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d15: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d150: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d165: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d180: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d30: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d45: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d60: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d75: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V1_d90: np.ndarray, shape=(11, 816, 1056), dtype=float32
CN-BE-V2_d00: np.ndarray, shape=(11, 768, 960), dtype=float32
CN-CD-V1_d00: np.ndarray, shape=(11, 576, 1200), dtype=float32
CN-SH-V1_d00: np.ndarray, shape=(11, 528, 720), dt

In [5]:

folder = Path("../dataset")
output_path = folder / "all_cases_dict.npz"

case_dict = {}
case_names = []
channel_names_ref = None

for ped_file in tqdm(list(folder.rglob("*ped.nc")), desc="Processing ped files"):
    case_name = ped_file.parent.name
    print(case_name)

Processing ped files: 100%|██████████| 54/54 [00:00<?, ?it/s]

CN-BE-V1_d00
CN-BE-V1_d105
CN-BE-V1_d120
CN-BE-V1_d135
CN-BE-V1_d15
CN-BE-V1_d150
CN-BE-V1_d165
CN-BE-V1_d180
CN-BE-V1_d30
CN-BE-V1_d45
CN-BE-V1_d60
CN-BE-V1_d75
CN-BE-V1_d90
CN-BE-V2_d00
CN-CD-V1_d00
CN-SH-V1_d00
JP-Tok-U2_d00
JP-Tok-V1_d00
JP-Tok-V3_d00
JP-Tok-V4_d00
KO-SE-U1_d00
KO-SE-U2_d00
KO-SE-U3_d00
KO-SE-U4_d00
KO-SE-V10_d00
KO-SE-V11_d00
KO-SE-V11_d15
KO-SE-V11_d30
KO-SE-V11_d45
KO-SE-V11_d90
KO-SE-V1_d00
KO-SE-V1_d15
KO-SE-V1_d30
KO-SE-V1_d45
KO-SE-V2_d00
KO-SE-V3_d00
KO-SE-V3_d15
KO-SE-V3_d30
KO-SE-V3_d45
KO-SE-V3_d90
KO-SE-V4_d00
KO-SE-V4_d15
KO-SE-V4_d30
KO-SE-V4_d45
KO-SE-V4_d90
KO-SE-V5_d00
KO-SE-V6_d00
KO-SE-V7_d00
KO-SE-V8_d00
KO-SE-V8_d15
KO-SE-V8_d30
KO-SE-V8_d45
KO-SE-V8_d90
KO-SE-V9_d00



