将u-net换成cnn

data部分

In [3]:
# 在Colab中导入自定义的包
import sys
from google.colab import drive
drive.mount('/content/drive')
sys.path.append('/content/drive/My Drive/UTCI_prediction_all/model/Code-Yifan')

Mounted at /content/drive


In [None]:
import torchvision.transforms.functional as TF
# Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, spatial_data_dir, temporal_data_csv, output_data_dir, T_in, T_out, device='cuda'):
        # Load the same four static spatial images
        self.spatial_data = []
        self.spatial_data_max = None
        self.spatial_data_min = None

        spatial_files = sorted(os.listdir(spatial_data_dir))[:4]  # Get the first 4 image files
        for spatial_file in spatial_files:
            spatial_image = tifffile.imread(os.path.join(spatial_data_dir, spatial_file))
            spatial_image = torch.tensor(spatial_image).unsqueeze(0).to(device)  # Add channel dimension
            spatial_image, self.spatial_data_max, self.spatial_data_min = self.maxminscaler_3d(spatial_image)  # normalize

            # 直接对 Tensor 进行随机裁剪
            spatial_image = TF.crop(spatial_image, top=0, left=0, height=64, width=64)
            self.spatial_data.append(spatial_image)

        # Stack into one tensor of shape [4, H, W] for static spatial information
        self.spatial_data = torch.cat(self.spatial_data, dim=0)

        # Load temporal data (e.g., weather data)
        temporal_data_df = pd.read_csv(temporal_data_csv).iloc[:, 1:]  # Skip the first time stamp column
        self.temporal_data = temporal_data_df.select_dtypes(include=[np.number]).fillna(0).astype(np.float32).values

        # Correctly reshape the temporal data to have multiple samples if possible
        num_samples_possible = (self.temporal_data.shape[0] // 336)  # Split into multiple samples, each with 336 time steps
        self.temporal_data = self.temporal_data[:num_samples_possible * 336].reshape(-1, 336, 7)  # Reshape to [num_samples, 336, 7]

        self.output_data_paths = [os.path.join(output_data_dir, f) for f in sorted(os.listdir(output_data_dir))]
        self.device = device
        self.T_in = T_in
        self.T_out = T_out
        self.utci_max = None  # 全局最大值，初始化为 None
        self.utci_min = None  # 全局最小值，初始化为 None

        # Debugging prints in __init__
        print("CustomDataset initialized successfully.")
        print(f"Number of spatial images: {len(spatial_files)}")
        print(f"Temporal data shape after reshape: {self.temporal_data.shape}")
        print(f"Number of output images (UTCI): {len(self.output_data_paths)}")
        print(f"T_in: {T_in}, T_out: {T_out}")

        # 调用计算全局最大最小值的函数（修改点 1）
        self.compute_utci_global_max_min()  # 计算全局最大最小值

        # Calculate the maximum number of samples we can generate
        self.num_samples = self.temporal_data.shape[0] * (336 - (self.T_in + self.T_out - 1))
        print(f"Calculated num_samples: {self.num_samples}")

        # If num_samples is not positive, raise an error
        if self.num_samples <= 0:
            raise ValueError("Not enough time steps to generate input and output sequences")

    def compute_utci_global_max_min(self):
        """计算整个 UTCI 数据集的全局最大值和最小值（修改点 2）"""
        for output_file in self.output_data_paths:
            output_data = tifffile.imread(output_file)
            output_data_tensor = torch.tensor(output_data).unsqueeze(0).to(self.device)  # 加上通道维度
            current_max = output_data_tensor.max().item()
            current_min = output_data_tensor.min().item()

            # 更新全局最大最小值
            if self.utci_max is None or current_max > self.utci_max:
                self.utci_max = current_max
            if self.utci_min is None or current_min < self.utci_min:
                self.utci_min = current_min

        print(f"全局 UTCI 最大值: {self.utci_max}, 最小值: {self.utci_min}")  # 打印确认

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        # Determine the sample and time step index
        sample_idx = idx // (336 - (self.T_in + self.T_out - 1))
        time_idx = idx % (336 - (self.T_in + self.T_out - 1))

        # Create the input spatial data sequence for T_in
        spatial_data_seq = self.spatial_data.unsqueeze(-1).repeat(1, 1, 1, self.T_in)  # [4, H, W, T_in]
        spatial_data_seq = spatial_data_seq.permute(1, 2, 0, 3)  # [H, W, 4, T_in]

        # Load temporal data for T_in time steps
        temporal_data = torch.tensor(self.temporal_data[sample_idx, time_idx:time_idx + self.T_in], dtype=torch.float32).to(self.device)

        # Load output data (UTCI) for T_out time steps
        output_data_list = []
        for t in range(self.T_out):
            output_data = tifffile.imread(self.output_data_paths[time_idx + self.T_in + t])
            output_data = torch.tensor(output_data).unsqueeze(0).to(self.device)  # Add channel dimension

            # 使用全局的最大最小值进行归一化（修改点 3）
            output_data, _, _ = self.maxminscaler_3d(output_data, self.utci_max, self.utci_min)  # normalize

            # 对 Tensor 进行随机裁剪
            output_data = TF.crop(output_data, top=0, left=0, height=64, width=64)
            output_data_list.append(output_data)

        # Stack the output time steps to form [T_out, 1, H, W] and then permute to [H, W, 1, T_out]
        output_data = torch.stack(output_data_list).permute(2, 3, 1, 0)

        # Add the UTCI temporal data as the 5th channel to spatial_data_seq
        # Load UTCI data (input for T_in time steps)
        utci_input_list = []
        for t in range(self.T_in):
            utci_data = tifffile.imread(self.output_data_paths[time_idx + t])
            utci_data = torch.tensor(utci_data).unsqueeze(0).to(self.device)  # Add channel dimension

            # 使用全局的最大最小值进行归一化（修改点 4）
            utci_data, _, _ = self.maxminscaler_3d(utci_data, self.utci_max, self.utci_min)  # normalize

            # 对 Tensor 进行随机裁剪
            utci_data = TF.crop(utci_data, top=0, left=0, height=64, width=64)
            utci_input_list.append(utci_data)

        # Stack the UTCI input time steps to form [T_in, 1, H, W] and then permute to [H, W, 1, T_in]
        utci_input = torch.stack(utci_input_list).permute(2, 3, 1, 0)  # [H, W, 1, T_in]

        # Concatenate the UTCI input data to spatial_data_seq
        spatial_data_seq = torch.cat([spatial_data_seq, utci_input], dim=2)  # [H, W, 5, T_in]

        # Print the shape of the combined input data (spatial + UTCI)
        print("Combined input data shape (expected [H, W, 5, T_in]):", spatial_data_seq.shape)

        # Check the shape of output_data
        print("Output data shape (expected [H, W, 1, T_out]):", output_data.shape)

        # Return the full spatial data (now with 5 channels), output data, and temporal data
        return spatial_data_seq, output_data, temporal_data

    def maxminscaler_3d(self, tensor_3d, scaler_max=None, scaler_min=None, range=(0, 1)):
        """归一化函数，使用全局最大最小值（如果提供）（修改点 5）"""
        if scaler_max is None:
            scaler_max = tensor_3d.max()
        if scaler_min is None:
            scaler_min = tensor_3d.min()

        X_std = (tensor_3d - scaler_min) / (scaler_max - scaler_min)
        X_scaled = X_std * (range[1] - range[0]) + range[0]
        return X_scaled, scaler_max, scaler_min

# Paths to directories and files
spatial_data_dir = r'/content/drive/MyDrive/UTCI_prediction_all/data/data_test9/spatial_images_256'
temporal_data_csv = r'/content/drive/MyDrive/UTCI_prediction_all/data/data_test9/Yifan_updated_data_with_timestamps.csv'
output_data_dir = r'/content/drive/MyDrive/UTCI_prediction_all/data/data_test9/output_images_256'

# Define T_in and T_out
T_in = 24   # Input time steps
T_out = 24  # Output time steps

# 创建 Dataset 并应用随机裁剪
batch_size = 4  # Example batch size
dataset = CustomDataset(spatial_data_dir, temporal_data_csv, output_data_dir, T_in, T_out, device='cuda')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Test data loader and visualize
import matplotlib.pyplot as plt

def show_image(image, title=""):
    plt.imshow(image, cmap='gray')
    plt.title(title)
    plt.axis('off')
    plt.show()

for spatial_data, output_data, temporal_data in dataloader:
    print("Loaded spatial data shape:", spatial_data.shape)  # [batch_size, H, W, C, T_in]
    print("Loaded output data shape:", output_data.shape)    # [batch_size, H, W, C, T_out]
    print("Loaded temporal data shape:", temporal_data.shape)

    # Visualize first sample's spatial data
    for i in range(spatial_data.shape[4]):
        for c in range(spatial_data.shape[3]):
            show_image(spatial_data[0, :, :, c, i].cpu().numpy(), title=f"Spatial Data Channel {c} - Time Step {i}")

    # Visualize first sample's output data
    for i in range(output_data.shape[4]):
        show_image(output_data[0, :, :, 0, i].cpu().numpy(), title=f"Output Data - Time Step {i}")

    break  # Only process one batch for visualization

将卷积网络修改成u-net网络

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [None]:
import torch
import torch.nn as nn

class UNetSpatialFeatures(nn.Module):
    def __init__(self, kernel_size=3, batch_size=4, seq_len=24, height=64, width=64, channels=5):
        """
        通过U-Net架构捕获每个时刻的空间依赖特性。
        :param kernel_size: 卷积核大小
        :param batch_size: 批量大小
        :param seq_len: 时间序列长度
        :param height: 输入的空间网格高度
        :param width: 输入的空间网格宽度
        :param channels: 输入通道数
        """
        super(UNetSpatialFeatures, self).__init__()
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.height = height
        self.width = width
        self.channels = channels
        self.kernel_size = kernel_size
        self.relu = nn.ReLU(inplace=True)

        # U-Net编码器部分（下采样）
        self.encoder_conv1 = nn.Conv2d(self.channels, 64, kernel_size=kernel_size, padding=1)
        self.encoder_conv2 = nn.Conv2d(64, 128, kernel_size=kernel_size, padding=1)
        self.encoder_conv3 = nn.Conv2d(128, 256, kernel_size=kernel_size, padding=1)
        self.encoder_conv4 = nn.Conv2d(256, 512, kernel_size=kernel_size, padding=1)

        # 最大池化层用于下采样
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        # U-Net解码器部分（上采样）
        self.decoder_conv4 = nn.Conv2d(512, 256, kernel_size=kernel_size, padding=1)
        self.decoder_conv3 = nn.Conv2d(256, 128, kernel_size=kernel_size, padding=1)
        self.decoder_conv2 = nn.Conv2d(128, 64, kernel_size=kernel_size, padding=1)
        self.decoder_conv1 = nn.Conv2d(64, 1, kernel_size=kernel_size, padding=1)

        # 上采样层
        self.upsample = nn.ConvTranspose2d(512, 512, kernel_size=2, stride=2)

    def forward(self, input):
        """
        输入形状：[batch_size, height, width, channels, seq_len]
        输出形状：[batch_size, seq_len, 1, height, width]
        """
        output = torch.empty(input.shape[0], self.seq_len, 1, self.height, self.width).cuda()
        for index in range(self.seq_len):
            x_input = input[:, :, :, :, index]  # 选择第 index 个时间步
            x_input = x_input.permute(0, 3, 1, 2)  # 转换为 [batch_size, channels, height, width]

            # 编码器部分
            enc1 = self.relu(self.encoder_conv1(x_input))  # 第一层卷积
            enc2 = self.relu(self.encoder_conv2(self.maxpool(enc1)))  # 第二层卷积
            enc3 = self.relu(self.encoder_conv3(self.maxpool(enc2)))  # 第三层卷积
            enc4 = self.relu(self.encoder_conv4(self.maxpool(enc3)))  # 第四层卷积

            # 解码器部分
            dec4 = self.relu(self.decoder_conv4(self.upsample(enc4)))  # 上采样并卷积
            dec3 = self.relu(self.decoder_conv3(self.upsample(dec4)))  # 上采样并卷积
            dec2 = self.relu(self.decoder_conv2(self.upsample(dec3)))  # 上采样并卷积
            dec1 = self.decoder_conv1(dec2)  # 最后一层输出单通道

            # 保存U-Net输出
            output[:, index, :, :, :] = dec1
        return output

输入数据的形状：U-Net 模块将提取空间特征，输出形状为 [batch_size, seq_len, 1, height, width]，而 LSTM 处理时间序列的输入通常是 [batch_size, seq_len, channels * height * width]。因此你需要在 U-Net 输出和 LSTM 输入之间进行形状调整。

集成结构：一般来说，U-Net 用于空间特征提取，LSTM 用于时间特征建模。所以可以考虑将每个时间步的空间特征通过 U-Net 处理后，再输入到 LSTM 中进行时间依赖建模。

调整数据格式：在将空间特征传递给 LSTM 之前，需要将 U-Net 的输出从 [batch_size, seq_len, 1, height, width] 转换为 LSTM 可以处理的格式 [batch_size, seq_len, height * width]。

In [None]:
import torch
import torch.nn as nn

class UNetSpatialFeatures(nn.Module):
    def __init__(self, kernel_size=3, batch_size=4, seq_len=24, height=64, width=64, channels=5):
        super(UNetSpatialFeatures, self).__init__()
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.height = height
        self.width = width
        self.channels = channels
        self.kernel_size = kernel_size
        self.relu = nn.ReLU(inplace=True)

        # U-Net编码器部分
        self.encoder_conv1 = nn.Conv2d(self.channels, 64, kernel_size=kernel_size, padding=1)
        self.encoder_conv2 = nn.Conv2d(64, 128, kernel_size=kernel_size, padding=1)
        self.encoder_conv3 = nn.Conv2d(128, 256, kernel_size=kernel_size, padding=1)
        self.encoder_conv4 = nn.Conv2d(256, 512, kernel_size=kernel_size, padding=1)

        # 最大池化层
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        # U-Net解码器部分
        self.decoder_conv4 = nn.Conv2d(512, 256, kernel_size=kernel_size, padding=1)
        self.decoder_conv3 = nn.Conv2d(256, 128, kernel_size=kernel_size, padding=1)
        self.decoder_conv2 = nn.Conv2d(128, 64, kernel_size=kernel_size, padding=1)
        self.decoder_conv1 = nn.Conv2d(64, 1, kernel_size=kernel_size, padding=1)

        self.upsample = nn.ConvTranspose2d(512, 512, kernel_size=2, stride=2)

    def forward(self, input):
        output = torch.empty(input.shape[0], self.seq_len, 1, self.height, self.width).cuda()
        for index in range(self.seq_len):
            x_input = input[:, :, :, :, index]  # 提取每个时间步的数据
            x_input = x_input.permute(0, 3, 1, 2)  # [batch_size, channels, height, width]

            # 编码器部分
            enc1 = self.relu(self.encoder_conv1(x_input))
            enc2 = self.relu(self.encoder_conv2(self.maxpool(enc1)))
            enc3 = self.relu(self.encoder_conv3(self.maxpool(enc2)))
            enc4 = self.relu(self.encoder_conv4(self.maxpool(enc3)))

            # 解码器部分
            dec4 = self.relu(self.decoder_conv4(self.upsample(enc4)))
            dec3 = self.relu(self.decoder_conv3(self.upsample(dec4)))
            dec2 = self.relu(self.decoder_conv2(self.upsample(dec3)))
            dec1 = self.decoder_conv1(dec2)  # 输出单通道

            output[:, index, :, :, :] = dec1
        return output


class Convolution_LSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, num_layer=2, batch_size=4, height=64, width=64, T_out=24):
        super(Convolution_LSTM, self).__init__()
        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.num_layer = num_layer
        self.batch_size = batch_size
        self.height = height
        self.width = width
        self.T_out = T_out

        # LSTM定义
        self.lstm = nn.LSTM(input_size=self.input_channels * self.height * self.width,
                            hidden_size=self.hidden_channels * self.height * self.width,
                            num_layers=self.num_layer, batch_first=True)

        self.tanh = nn.Tanh()
        self.hidden2out_1 = nn.Linear(self.hidden_channels * self.height * self.width,
                                      self.input_channels * self.height * self.width)

    def initialize_parameters(self, batch_size):
        return (torch.zeros(self.num_layer, batch_size, self.hidden_channels * self.height * self.width).cuda(),
                torch.zeros(self.num_layer, batch_size, self.hidden_channels * self.height * self.width).cuda())

    def forward(self, input):
        # 将输入展平为 [batch_size, seq_len, height * width]，适应 LSTM 输入
        input = input.view(input.shape[0], input.shape[1], -1)  # [batch_size, seq_len, H*W]

        # 初始化 LSTM 隐藏层参数
        h0, c0 = self.initialize_parameters(input.shape[0])

        # LSTM 前向传播
        outputs, _ = self.lstm(input, (h0, c0))

        # 只保留最后的 T_out 个时间步的输出
        outputs = outputs[:, -self.T_out:, :]
        outputs = self.hidden2out_1(outputs)
        outputs = self.tanh(outputs)

        # 将输出形状调整为 [batch_size, T_out, 1, height, width]
        output = outputs.view(input.shape[0], self.T_out, 1, self.height, self.width)
        return output


class UNetLSTM(nn.Module):
    def __init__(self, unet, conv_lstm):
        super(UNetLSTM, self).__init__()
        self.unet = unet
        self.conv_lstm = conv_lstm

    def forward(self, input):
        # U-Net提取空间特征
        spatial_features = self.unet(input)

        # 调整形状以适应 LSTM 输入
        spatial_features = spatial_features.view(spatial_features.size(0), spatial_features.size(1), -1)

        # 将空间特征输入LSTM进行时间序列建模
        output = self.conv_lstm(spatial_features)
        return output


In [None]:
import torch
import torch.nn as nn

class Regular_Convolution_LSTM(nn.Module):
    def __init__(self, kernel_size, batch_size=4, T_in=24, T_out=24, height=64, width=64, channels=5):
        """
        结合空间卷积和LSTM模块来捕捉时空依赖性，输入为时间序列，生成多步预测输出。
        :param kernel_size: 卷积核大小
        :param batch_size: 批量大小
        :param T_in: 输入的时间步长
        :param T_out: 输出的时间步长
        :param height: 空间网格的高度
        :param width: 空间网格的宽度
        :param channels: 输入通道数
        """
        super(Regular_Convolution_LSTM, self).__init__()
        self.kernel_size = kernel_size
        self.batch_size = batch_size
        self.T_in = T_in
        self.T_out = T_out
        self.height = height
        self.width = width
        self.channels = channels

        # 卷积部分提取空间特征
        self.conv_module = Extraction_spatial_features(kernel_size=self.kernel_size, batch_size=self.batch_size,
                                                       seq_len=self.T_in, height=self.height, width=self.width,
                                                       channels=self.channels).cuda()

        # LSTM部分捕捉时间依赖性并生成 T_out 输出
        self.convlstm = Convolution_LSTM(input_channels=1, hidden_channels=1,
                                         num_layer=2, batch_size=self.batch_size, height=self.height,
                                         width=self.width, T_out=self.T_out).cuda()

        # 激活函数
        self.tanh = nn.Tanh()

    def forward(self, x):
        # 卷积模块提取空间特征，形状为 [batch_size, T_in, 1, height, width]
        output_spatial = self.conv_module(x)

        # 调整形状为 [batch_size, T_in, height * width]，以适应LSTM输入
        batch_size, seq_len, _, height, width = output_spatial.size()
        output_spatial_flat = output_spatial.view(batch_size, seq_len, -1)  # 展平为 [batch_size, T_in, height * width]

        # LSTM模块捕捉时间依赖性，并输出 T_out 个时间步的预测结果
        output_lstm = self.convlstm(output_spatial_flat)

        # 激活函数
        output = self.tanh(output_lstm)

        # 将输出形状从 [batch_size, T_out, 1, height, width] 调整为 [batch_size, height, width, 1, T_out]
        output = output.permute(0, 3, 4, 2, 1)

        return output

In [None]:
for spatial_data, output_data, temporal_data in dataloader:
    # 打印加载的数据形状
    print("Loaded spatial data shape:", spatial_data.shape)  # [batch_size, h, w, channels, T_in]
    print("Loaded output data shape:", output_data.shape)    # [batch_size, h, w, 1, T_out]
    print("Loaded temporal data shape:", temporal_data.shape)  # 暂时不处理 temporal_data

    # 实例化模型
    model = Regular_Convolution_LSTM(kernel_size=3, batch_size=4, T_in=24, T_out=24, height=64, width=64, channels=5).cuda()

    # 确保输入的形状为 [batch_size, channels, height, width, T_in]
    spatial_data = spatial_data.permute(0, 3, 1, 2, 4).cuda()  # 调整维度顺序为 [batch_size, channels, height, width, T_in]

    # 前向传播测试
    output = model(spatial_data)

    # 打印输出的形状，验证是否符合预期 [batch_size, height, width, channels, T_out]
    print("Model output shape:", output.shape)  # 期望输出形状为 [batch_size, height, width, channels, T_out]

    break  # 只运行一次进行测试