In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os
from pathlib import Path
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf

# Chú ý thêm import MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (LSTM, Dense, TimeDistributed, 
                                    ConvLSTM2D, BatchNormalization, Conv3D, 
                                    Input, Flatten, RepeatVector, Reshape, Dropout,
                                    MaxPooling2D)
from tensorflow.keras.layers import Conv2D, UpSampling2D, Conv2DTranspose, Lambda, Permute
from tensorflow.keras.callbacks import EarlyStopping

# Thiết lập seed để đảm bảo tính tái lập
np.random.seed(42)
tf.random.set_seed(42)

# Các tham số chính
HEIGHT, WIDTH = 128, 256
INPUT_STEPS = 3   # Sử dụng 3 giờ đầu làm input
OUTPUT_STEPS = 6  # Dự báo 6 giờ sau


In [2]:

from tqdm import tqdm
from skimage.transform import resize

def get_timestamp_list(start, end, step_hours=1):
    return [start + timedelta(hours=i) for i in range(0, int((end - start).total_seconds() // 3600) + 1, step_hours)]

def read_radar_image(timestamp, normalize=True, target_shape=(128, 256)):
    file_path = f"DATA_SV/Precipitation/AWS/{timestamp.year}/{timestamp.month:02}/{timestamp.day:02}/AWS_{timestamp.strftime('%Y%m%d%H%M%S')}.tif"
    path = Path(file_path)
    if not path.exists():
        return None
    try:
        dataset = rioxarray.open_rasterio(file_path)
        data = dataset[0].values.astype(np.float32)
        data[~np.isfinite(data)] = 0.0  # Thay các giá trị không hợp lệ bằng 0
        if target_shape is not None:
            data = resize(data, target_shape, mode='reflect', anti_aliasing=True)
        if normalize:
            max_val = np.max(data)
            if max_val > 0:
                data /= max_val
        return data
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None


In [3]:
def build_dataset(start_time, end_time, input_steps=INPUT_STEPS, output_steps=OUTPUT_STEPS, target_shape=(128, 256)):
    X, y = [], []
    timestamps = get_timestamp_list(start_time, end_time)
    
    for i in tqdm(range(len(timestamps) - input_steps - output_steps + 1)):
        input_seq, output_seq = [], []
        valid = True
        for j in range(input_steps):
            img = read_radar_image(timestamps[i + j], target_shape=target_shape)
            if img is None:
                valid = False
                break
            input_seq.append(img)
        for j in range(output_steps):
            img = read_radar_image(timestamps[i + input_steps + j], target_shape=target_shape)
            if img is None:
                valid = False
                break
            output_seq.append(img)
        if valid:
            # Thêm dimension channel (ở đây là 1) cho phù hợp với các lớp ConvLSTM2D
            X.append(np.expand_dims(input_seq, axis=-1))
            y.append(np.expand_dims(output_seq, axis=-1))
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


In [4]:
import rioxarray
start_time = datetime(2020, 10, 1, 0)
end_time = datetime(2020, 10, 31, 23)

X_radar, y_radar = build_dataset(start_time, end_time, input_steps=INPUT_STEPS, output_steps=OUTPUT_STEPS)
print("Radar Dataset shapes:", X_radar.shape, y_radar.shape)


100%|██████████| 736/736 [00:57<00:00, 12.69it/s]


Radar Dataset shapes: (726, 3, 128, 256, 1) (726, 6, 128, 256, 1)


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    ConvLSTM2D,
    BatchNormalization,
    Dropout,
    MaxPooling2D,
    Conv2D,
    Flatten,
    Dense,
    Reshape,
    UpSampling2D,
    Conv2DTranspose,
    Lambda
)

# Ví dụ các tham số (tùy chỉnh theo bài toán):
INPUT_STEPS = 3
HEIGHT = 128
WIDTH = 256
OUTPUT_STEPS = 6

def build_improved_seq2seq_model(input_shape, output_steps):
    inputs = Input(shape=input_shape)
    
    # --- ENCODER ---
    # 1) ConvLSTM2D chồng liên tiếp
    x = ConvLSTM2D(
        filters=64, kernel_size=(2,2), 
        padding='same', return_sequences=True, 
        activation='relu'
    )(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    
    x = ConvLSTM2D(
        filters=64, kernel_size=(2,2),
        padding='same', return_sequences=False,
        activation='relu'
    )(x)
    x = BatchNormalization()(x)
    
    # 2) MaxPooling2D giảm từ 128x256 -> 64x128
    x = MaxPooling2D(pool_size=(2,2))(x)
    
    # 3) Thêm một lớp Conv2D để học thêm đặc trưng
    x = Conv2D(filters=128, kernel_size=(3,3), 
               activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    
    # --- TRÍCH XUẤT ĐẶC TRƯNG ---
    x_flat = Flatten()(x)
    reduced_units = 1024
    x_dense = Dense(reduced_units, activation='relu')(x_flat)
    x_dense = Dropout(0.2)(x_dense)
    
    # 4) Reshape về kích thước (16, 32, 128) -> "không gian thấp"
    target_h, target_w, target_ch = 16, 32, 128
    x_reshaped = Dense(target_h * target_w * target_ch, activation='relu')(x_dense)
    x_reshaped = Reshape((target_h, target_w, target_ch))(x_reshaped)
    
    # --- DECODER ---
    # Lặp lại quá trình giải mã cho mỗi bước thời gian
    decoded = []
    for _ in range(output_steps):
        # Từ (16,32) upsample -> (128,256) bằng factor (8,8)
        x_up = UpSampling2D(size=(8,8))(x_reshaped)
        
        # Conv2DTranspose để refine ảnh
        x_up = Conv2DTranspose(filters=64, kernel_size=(3,3), 
                               padding='same', activation='relu')(x_up)
        x_up = BatchNormalization()(x_up)
        
        output_img = Conv2DTranspose(filters=1, kernel_size=(3,3),
                                     padding='same', activation='relu')(x_up)
        decoded.append(output_img)
    
    # Stack theo axis=1 => (batch_size, output_steps, 128, 256, 1)
    outputs = Lambda(lambda t: tf.stack(t, axis=1))(decoded)
    
    # Tạo Model
    model = Model(inputs, outputs)
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(optimizer=optimizer, loss='mse')
    
    return model

# Tạo model
model = build_improved_seq2seq_model(
    input_shape=(INPUT_STEPS, HEIGHT, WIDTH, 1),
    output_steps=OUTPUT_STEPS
)

model.summary()





In [6]:
from sklearn.model_selection import train_test_split

X_train_r, X_val_r, y_train_r, y_val_r = train_test_split(
    X_radar, y_radar, test_size=0.2, random_state=42
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train_r, y_train_r,
    epochs=50,
    batch_size=32, 
    validation_data=(X_val_r, y_val_r),
    callbacks=[early_stopping]
)


Epoch 1/50


ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall/gradient_tape/functional_1/dense_1/MatMul/MatMul_1 defined at (most recent call last):
<stack traces unavailable>
OOM when allocating tensor with shape[1048576,1024] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node StatefulPartitionedCall/gradient_tape/functional_1/dense_1/MatMul/MatMul_1}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_multi_step_on_iterator_15040]

In [None]:
y_val_pred_r = model.predict(X_val_r)
mse_r = mean_squared_error(y_val_r.flatten(), y_val_pred_r.flatten())
mae_r = mean_absolute_error(y_val_r.flatten(), y_val_pred_r.flatten())
r2_r = r2_score(y_val_r.flatten(), y_val_pred_r.flatten())

print("Improved Seq2Seq Model - Validation MSE:", mse_r)
print("Improved Seq2Seq Model - Validation MAE:", mae_r)
print("Improved Seq2Seq Model - Validation R2:", r2_r)


In [None]:
sample_index = 0 
actual_maps = y_val_r[sample_index] 
predicted_maps = model.predict(X_val_r[[sample_index]])[0] 

cmap = plt.cm.YlGnBu.copy()
cmap.set_bad(color='white')

fig, axes = plt.subplots(nrows=OUTPUT_STEPS, ncols=2, figsize=(20, OUTPUT_STEPS * 4))

for i in range(OUTPUT_STEPS):
    # Bản đồ thực tế
    ax_actual = axes[i, 0]
    data_actual = actual_maps[i, :, :, 0]
    im_actual = ax_actual.imshow(data_actual, cmap=cmap, aspect='auto')
    ax_actual.set_title(f"Actual Rainfall t+{i+1}", fontsize=14)
    fig.colorbar(im_actual, ax=ax_actual, fraction=0.046, pad=0.04)
    
    # Bản đồ dự đoán
    ax_pred = axes[i, 1]
    data_pred = predicted_maps[i, :, :, 0]
    im_pred = ax_pred.imshow(data_pred, cmap=cmap, aspect='auto')
    ax_pred.set_title(f"Predicted Rainfall t+{i+1}", fontsize=14)
    fig.colorbar(im_pred, ax=ax_pred, fraction=0.046, pad=0.04)
for ax, label in zip(axes[0], ["Actual Rainfall", "Predicted Rainfall"]):
    ax.annotate(label, xy=(0.5, 1.05), xytext=(0, 5),
                xycoords='axes fraction', textcoords='offset points',
                ha='center', va='baseline', fontsize=16, fontweight='bold')

plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.suptitle("Radar Rainfall Prediction vs Actual (Improved Seq2Seq ConvLSTM)", fontsize=18)
plt.show()
