In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score as sklearn_r2_score
import numpy as np
import os

# Device configuration
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ConvLSTM Cell
class ConvLSTMCell(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size, bias=True, dropout=0.3):
        super(ConvLSTMCell, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.padding = kernel_size // 2
        self.bias = bias
        self.conv = nn.Conv2d(
            in_channels=input_dim + hidden_dim,
            out_channels=4 * hidden_dim,
            kernel_size=kernel_size,
            padding=self.padding,
            bias=bias
        )
        self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

    def forward(self, input_tensor, cur_state):
        h_cur, c_cur = cur_state
        combined = torch.cat([input_tensor, h_cur], dim=1)
        combined_conv = self.conv(combined)
        combined_conv = self.dropout(combined_conv)
        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)
        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)
        return h_next, c_next

    def init_hidden(self, batch_size, image_size):
        height, width = image_size
        return (torch.zeros(batch_size, self.hidden_dim, height, width, device=device),
                torch.zeros(batch_size, self.hidden_dim, height, width, device=device))

# ConvLSTM Module
class ConvLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size, num_layers, batch_first=True, dropout=0.3):
        super(ConvLSTM, self).__init__()
        kernel_size = [kernel_size] * num_layers if isinstance(kernel_size, int) else kernel_size
        hidden_dim = [hidden_dim] * num_layers if isinstance(hidden_dim, int) else hidden_dim
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.cell_list = nn.ModuleList([
            ConvLSTMCell(
                input_dim=self.input_dim if i == 0 else self.hidden_dim[i-1],
                hidden_dim=self.hidden_dim[i],
                kernel_size=self.kernel_size[i],
                bias=True,
                dropout=dropout if i < num_layers-1 else 0
            ) for i in range(self.num_layers)
        ])

    def forward(self, input_tensor, hidden_state=None):
        if not self.batch_first:
            input_tensor = input_tensor.permute(1, 0, 2, 3, 4)
        b, t, c, h, w = input_tensor.size()
        if hidden_state is None:
            hidden_state = self._init_hidden(batch_size=b, image_size=(h, w))
        layer_output_list = []
        last_state_list = []
        cur_layer_input = input_tensor
        for layer_idx in range(self.num_layers):
            h, c = hidden_state[layer_idx]
            output_inner = []
            for t_idx in range(t):
                h, c = self.cell_list[layer_idx](cur_layer_input[:, t_idx, :, :, :], cur_state=[h, c])
                output_inner.append(h)
            layer_output = torch.stack(output_inner, dim=1)
            cur_layer_input = layer_output
            layer_output_list.append(layer_output)
            last_state_list.append([h, c])
        return layer_output_list[-1], last_state_list[-1]

    def _init_hidden(self, batch_size, image_size):
        return [self.cell_list[i].init_hidden(batch_size, image_size) for i in range(self.num_layers)]

# 水头模型 - 16维输入
class HeadCNN(nn.Module):
    def __init__(self, input_dim=16, hidden_dim=32, dropout=0.3):
        super(HeadCNN, self).__init__()
        self.convlstm = ConvLSTM(input_dim=input_dim, hidden_dim=hidden_dim, kernel_size=3, num_layers=1, dropout=dropout)
        self.out_conv = nn.Conv2d(hidden_dim, 1, kernel_size=1)
        self.activation = nn.ReLU()

    def forward(self, x):
        layer_output, _ = self.convlstm(x)
        b, t, c, h, w = layer_output.size()
        layer_output = layer_output.view(b * t, c, h, w)
        out = self.out_conv(layer_output)
        out = self.activation(out)
        out = out.view(b, t, 1, h, w)
        return out

# 浓度模型 - 19维输入（包含预测的水头）
class ConcCNN(nn.Module):
    def __init__(self, input_dim=20, hidden_dim=32, dropout=0.3):
        super(ConcCNN, self).__init__()
        self.convlstm = ConvLSTM(input_dim=input_dim, hidden_dim=hidden_dim, kernel_size=3, num_layers=1, dropout=dropout)
        self.out_conv = nn.Conv2d(hidden_dim, 1, kernel_size=1)
        self.activation = nn.ReLU()

    def forward(self, x):
        layer_output, _ = self.convlstm(x)
        b, t, c, h, w = layer_output.size()
        layer_output = layer_output.view(b * t, c, h, w)
        out = self.out_conv(layer_output)
        out = self.activation(out)
        out = out.view(b, t, 1, h, w)
        return out

# Dataset Class - 修复索引问题
class HydroCNNDataset(Dataset):
    def __init__(self, data, grid_size, max_time_steps):
        # 关键修复：重置索引并创建副本
        self.data = data.reset_index(drop=True).copy()
        self.grid_size = grid_size
        self.max_time_steps = max_time_steps
        self.models = self.data['model_name'].unique()
        self.cached_data = {}
        
        # 基础特征列 - 与GNN保持一致 (14维)
        self.base_feature_cols = [
            'x', 'y', 'top', 'bottom', 'K', 'recharge', 'ET',
            'river_stage', 'river_cond', 'river_rbot', 'well_rate', 'well_mask',
            'chd_mask', 'lytyp'
        ]
        
        # 浓度模型额外的基础特征 (15维，增加conc_mask)
        self.conc_base_feature_cols = self.base_feature_cols + ['conc_mask']
        
        self._normalize_features()
        self._preprocess_data()

    def _normalize_features(self):
        """标准化特征，与GNN保持一致"""
        print("开始特征标准化...")
        
        # 只对浮点数特征进行标准化
        float_cols = [col for col in self.base_feature_cols if col not in ['well_mask', 'chd_mask', 'lytyp']]
        
        for model_name in self.models:
            model_df = self.data[self.data['model_name'] == model_name].copy()
            
            # 标准化基础浮点特征
            if len(model_df) > 0:  # 确保模型数据不为空
                scaler = StandardScaler()
                float_data = model_df[float_cols].values
                if float_data.size > 0:  # 确保数据不为空
                    scaled_data = scaler.fit_transform(float_data)
                    # 使用loc更新数据，避免索引问题
                    mask = self.data['model_name'] == model_name
                    self.data.loc[mask, float_cols] = scaled_data

    def _preprocess_data(self):
        """预处理数据，构建与GNN一致的特征 - 优化版本"""
        print("预处理CNN数据...")
        M, N = self.grid_size
        T = self.max_time_steps
        
        for model_idx, model_name in enumerate(self.models):
            print(f"处理模型 {model_idx+1}/{len(self.models)}: {model_name}")
            
            # 获取当前模型的数据并重置索引
            model_df = self.data[self.data['model_name'] == model_name].copy().reset_index(drop=True)
            
            if len(model_df) == 0:
                print(f"警告: 模型 {model_name} 没有数据，跳过")
                continue
            
            # 时间步归一化
            model_df['time_step'] = model_df['time_step'] - model_df['time_step'].min()
            
            # 计算历史特征 - 优化版本
            model_df = model_df.sort_values(['row', 'col', 'time_step']).reset_index(drop=True)
            
            print(f"  计算历史特征...")
            # 使用向量化操作计算历史特征
            model_df['prev_head'] = 0.0
            model_df['prev2_head'] = 0.0
            model_df['prev_conc'] = 0.0
            model_df['prev2_conc'] = 0.0
            
            # 按(row, col)分组，向量化计算历史特征
            for (row, col), group in model_df.groupby(['row', 'col']):
                group_sorted = group.sort_values('time_step')
                indices = group_sorted.index
                
                if len(indices) > 0:
                    head_vals = group_sorted['head'].values
                    conc_vals = group_sorted['concentration'].values
                    
                    # 计算前一时间步
                    prev_head_vals = np.concatenate([[head_vals[0]], head_vals[:-1]])
                    prev_conc_vals = np.concatenate([[conc_vals[0]], conc_vals[:-1]])
                    
                    # 计算前两时间步
                    if len(head_vals) >= 2:
                        prev2_head_vals = np.concatenate([[head_vals[0]], [head_vals[0]], head_vals[:-2]])
                        prev2_conc_vals = np.concatenate([[conc_vals[0]], [conc_vals[0]], conc_vals[:-2]])
                    else:
                        prev2_head_vals = np.full(len(head_vals), head_vals[0])
                        prev2_conc_vals = np.full(len(conc_vals), conc_vals[0])
                    
                    model_df.loc[indices, 'prev_head'] = prev_head_vals
                    model_df.loc[indices, 'prev2_head'] = prev2_head_vals
                    model_df.loc[indices, 'prev_conc'] = prev_conc_vals
                    model_df.loc[indices, 'prev2_conc'] = prev2_conc_vals
            
            # 构建特征列表
            head_feature_cols = self.base_feature_cols + ['prev_head', 'prev2_head']
            conc_feature_cols = self.conc_base_feature_cols + ['prev_head', 'prev2_head', 'prev_conc', 'prev2_conc']
            
            # 初始化数组
            X_head = np.zeros((T, len(head_feature_cols), M, N), dtype=np.float32)
            X_conc_base = np.zeros((T, len(conc_feature_cols) - 1, M, N), dtype=np.float32)
            Y_head = np.zeros((T, 1, M, N), dtype=np.float32)
            Y_conc = np.zeros((T, 1, M, N), dtype=np.float32)
            mask = np.zeros((M, N), dtype=np.float32)
            
            print(f"  填充网格数据...")
            
            # 优化的填充过程 - 按时间步批量处理
            max_t = min(T, model_df['time_step'].max() + 1)
            for t in range(max_t):
                if t % 50 == 0:  # 每50个时间步输出一次进度
                    print(f"    处理时间步 {t}/{max_t}")
                    
                t_df = model_df[model_df['time_step'] == t]
                if len(t_df) == 0:
                    continue
                
                rows = t_df['row'].values.astype(int)
                cols = t_df['col'].values.astype(int)
                
                # 检查索引范围
                valid_mask = (rows >= 0) & (rows < M) & (cols >= 0) & (cols < N)
                if not np.any(valid_mask):
                    continue
                    
                rows = rows[valid_mask]
                cols = cols[valid_mask]
                t_df_valid = t_df.iloc[valid_mask]
                
                # 批量填充水头特征
                head_data = t_df_valid[head_feature_cols].values  # Shape: (n_points, 16)
                for feat_idx in range(len(head_feature_cols)):
                    X_head[t, feat_idx, rows, cols] = head_data[:, feat_idx]
                
                # 批量填充浓度特征（除了预测水头）
                conc_data = t_df_valid[conc_feature_cols[:-1]].values  # Shape: (n_points, 18)
                for feat_idx in range(len(conc_feature_cols) - 1):
                    X_conc_base[t, feat_idx, rows, cols] = conc_data[:, feat_idx]
                
                # 填充目标值
                Y_head[t, 0, rows, cols] = t_df_valid['head'].values
                Y_conc[t, 0, rows, cols] = t_df_valid['concentration'].values
                
                # 更新掩码
                mask[rows, cols] = 1
            
            self.cached_data[model_name] = {
                'X_head': torch.from_numpy(X_head),
                'X_conc_base': torch.from_numpy(X_conc_base),
                'Y_head': torch.from_numpy(Y_head),
                'Y_conc': torch.from_numpy(Y_conc),
                'mask': torch.from_numpy(mask),
                'model_name': model_name
            }
            
            print(f"  模型 {model_name} 处理完成")
        
        print(f"预处理完成！处理了 {len(self.cached_data)} 个模型")
        if len(self.cached_data) > 0:
            sample_data = list(self.cached_data.values())[0]
            print(f"特征维度 - 水头: {sample_data['X_head'].shape[1]}, 浓度基础: {sample_data['X_conc_base'].shape[1]}")

    def __len__(self):
        return len(self.cached_data)

    def __getitem__(self, idx):
        model_name = list(self.cached_data.keys())[idx]
        return self.cached_data[model_name]

# Custom Collate Function
def custom_collate_fn(batch):
    fixed_keys = ['X_head', 'X_conc_base', 'Y_head', 'Y_conc', 'mask']
    variable_keys = ['model_name']
    collated = {}
    for key in fixed_keys:
        collated[key] = torch.stack([item[key] for item in batch])
    for key in variable_keys:
        collated[key] = [item[key] for item in batch]
    return collated

# Metrics Computation
def compute_metrics(y_true, y_pred, mask, T):
    """计算指标，与GNN保持一致"""
    if isinstance(y_true, torch.Tensor):
        y_true = y_true.detach().cpu().numpy()
    if isinstance(y_pred, torch.Tensor):
        y_pred = y_pred.detach().cpu().numpy()
    if isinstance(mask, torch.Tensor):
        mask = mask.detach().cpu().numpy()

    # 应用掩码
    mask = mask[:, np.newaxis, np.newaxis, :, :]  # Shape: [B, 1, 1, M, N]
    mask = np.repeat(mask, T, axis=1)  # Shape: [B, T, 1, M, N]

    y_true = y_true[mask > 0]
    y_pred = y_pred[mask > 0]

    valid_mask = ~np.isnan(y_true) & ~np.isinf(y_true) & ~np.isnan(y_pred) & ~np.isinf(y_pred)
    y_true = y_true[valid_mask]
    y_pred = y_pred[valid_mask]

    if len(y_true) == 0:
        return {'mse': np.nan, 'rmse': np.nan, 'mae': np.nan, 'r2': np.nan}

    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    
    # 计算R2
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    r2 = 1 - (ss_res / (ss_tot + 1e-8))

    return {'mse': mse, 'rmse': rmse, 'mae': mae, 'r2': r2}

# 训练函数 - 先训练水头，再训练浓度
def train_dual_cnn(train_loader, val_loader, config):
    """分步训练：先训练水头模型，再训练浓度模型"""
    
    # 创建保存目录
    os.makedirs(config['save_path'], exist_ok=True)
    
    print("🔹 开始训练水头模型...")
    
    # ==================== 第一阶段：训练水头模型 ====================
    head_model = HeadCNN(input_dim=16, hidden_dim=config['hidden_dim']).to(device)
    head_optimizer = torch.optim.AdamW(
        head_model.parameters(),
        lr=config['lr'],
        weight_decay=config['weight_decay']
    )
    head_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(head_optimizer, T_max=config['num_epochs'])
    
    best_head_val_loss = float('inf')
    head_early_stop_counter = 0
    
    for epoch in range(config['num_epochs']):
        # 训练水头模型
        head_model.train()
        total_head_loss = 0
        
        for batch in train_loader:
            X_head = batch['X_head'].to(device)
            Y_head = batch['Y_head'].to(device)
            mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)  # [B, 1, 1, M, N]
            
            head_optimizer.zero_grad()
            pred_head = head_model(X_head)
            
            # 应用掩码计算损失
            loss_head = F.mse_loss(pred_head * mask, Y_head * mask)
            loss_head.backward()
            
            # 梯度裁剪
            torch.nn.utils.clip_grad_norm_(head_model.parameters(), max_norm=1.0)
            head_optimizer.step()
            
            total_head_loss += loss_head.item()
        
        # 验证水头模型
        head_model.eval()
        total_head_val_loss = 0
        all_head_metrics = []
        
        with torch.no_grad():
            for batch in val_loader:
                X_head = batch['X_head'].to(device)
                Y_head = batch['Y_head'].to(device)
                mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)
                
                pred_head = head_model(X_head)
                loss_head = F.mse_loss(pred_head * mask, Y_head * mask)
                total_head_val_loss += loss_head.item()
                
                # 计算指标
                head_metrics = compute_metrics(Y_head, pred_head, batch['mask'], config['max_time_steps'])
                all_head_metrics.append(head_metrics)
        
        avg_head_train_loss = total_head_loss / len(train_loader)
        avg_head_val_loss = total_head_val_loss / len(val_loader)
        
        # 计算平均指标
        avg_head_r2 = np.nanmean([m['r2'] for m in all_head_metrics])
        avg_head_rmse = np.nanmean([m['rmse'] for m in all_head_metrics])
        
        head_scheduler.step()
        current_lr = head_scheduler.get_last_lr()[0]
        
        print(f"水头模型 Epoch {epoch+1:03d}/{config['num_epochs']} | "
              f"训练损失: {avg_head_train_loss:.4f} | 验证损失: {avg_head_val_loss:.4f} | "
              f"R2: {avg_head_r2:.4f} | RMSE: {avg_head_rmse:.4f} | LR: {current_lr:.6f}")
        
        # 保存最佳水头模型
        if avg_head_val_loss < best_head_val_loss:
            best_head_val_loss = avg_head_val_loss
            head_early_stop_counter = 0
            torch.save({
                'model_state_dict': head_model.state_dict(),
                'epoch': epoch,
                'train_loss': avg_head_train_loss,
                'val_loss': avg_head_val_loss,
                'r2': avg_head_r2,
                'config': config
            }, os.path.join(config['save_path'], 'best_head_model.pth'))
            print(f"保存最佳水头模型，验证损失: {best_head_val_loss:.4f}")
        else:
            head_early_stop_counter += 1
        
        # 早停检查
        if head_early_stop_counter >= config['patience']:
            print(f"水头模型早停触发! 在第{epoch+1}个epoch停止训练")
            break
    
    # 加载最佳水头模型
    best_head_checkpoint = torch.load(os.path.join(config['save_path'], 'best_head_model.pth'))
    head_model.load_state_dict(best_head_checkpoint['model_state_dict'])
    head_model.eval()
    
    print(f"\n🔹 水头模型训练完成！最佳验证损失: {best_head_val_loss:.4f}")
    print("🔹 开始训练浓度模型...")
    
    # ==================== 第二阶段：训练浓度模型 ====================
    conc_model = ConcCNN(input_dim=20, hidden_dim=config['hidden_dim']).to(device)
    conc_optimizer = torch.optim.AdamW(
        conc_model.parameters(),
        lr=config['lr'],
        weight_decay=config['weight_decay']
    )
    conc_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(conc_optimizer, T_max=config['num_epochs'])
    
    best_conc_val_loss = float('inf')
    best_conc_r2 = float('-inf')
    conc_early_stop_counter = 0
    
    for epoch in range(config['num_epochs']):
        # 训练浓度模型
        conc_model.train()
        total_conc_loss = 0
        
        for batch in train_loader:
            X_head = batch['X_head'].to(device)
            X_conc_base = batch['X_conc_base'].to(device)
            Y_conc = batch['Y_conc'].to(device)
            mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)
            
            # 使用固定的水头模型预测水头
            with torch.no_grad():
                pred_head = head_model(X_head)
            
            # 构建浓度模型输入（添加预测的水头作为第19个特征）
            X_conc = torch.cat([X_conc_base, pred_head], dim=2)  # [B, T, 19, M, N]
            
            conc_optimizer.zero_grad()
            pred_conc = conc_model(X_conc)
            
            # 应用掩码计算损失
            loss_conc = F.mse_loss(pred_conc * mask, Y_conc * mask)
            loss_conc.backward()
            
            # 梯度裁剪
            torch.nn.utils.clip_grad_norm_(conc_model.parameters(), max_norm=1.0)
            conc_optimizer.step()
            
            total_conc_loss += loss_conc.item()
        
        # 验证浓度模型
        conc_model.eval()
        total_conc_val_loss = 0
        all_conc_metrics = []
        
        with torch.no_grad():
            for batch in val_loader:
                X_head = batch['X_head'].to(device)
                X_conc_base = batch['X_conc_base'].to(device)
                Y_conc = batch['Y_conc'].to(device)
                mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)
                
                pred_head = head_model(X_head)
                X_conc = torch.cat([X_conc_base, pred_head], dim=2)
                pred_conc = conc_model(X_conc)
                
                loss_conc = F.mse_loss(pred_conc * mask, Y_conc * mask)
                total_conc_val_loss += loss_conc.item()
                
                # 计算指标
                conc_metrics = compute_metrics(Y_conc, pred_conc, batch['mask'], config['max_time_steps'])
                all_conc_metrics.append(conc_metrics)
        
        avg_conc_train_loss = total_conc_loss / len(train_loader)
        avg_conc_val_loss = total_conc_val_loss / len(val_loader)
        
        # 计算平均指标
        avg_conc_r2 = np.nanmean([m['r2'] for m in all_conc_metrics])
        avg_conc_rmse = np.nanmean([m['rmse'] for m in all_conc_metrics])
        
        conc_scheduler.step()
        current_lr = conc_scheduler.get_last_lr()[0]
        
        print(f"浓度模型 Epoch {epoch+1:03d}/{config['num_epochs']} | "
              f"训练损失: {avg_conc_train_loss:.4f} | 验证损失: {avg_conc_val_loss:.4f} | "
              f"R2: {avg_conc_r2:.4f} | RMSE: {avg_conc_rmse:.4f} | LR: {current_lr:.6f}")
        
        # 保存最佳浓度模型（基于验证损失）
        if avg_conc_val_loss < best_conc_val_loss:
            best_conc_val_loss = avg_conc_val_loss
            torch.save({
                'head_model_state_dict': head_model.state_dict(),
                'conc_model_state_dict': conc_model.state_dict(),
                'epoch': epoch,
                'train_loss': avg_conc_train_loss,
                'val_loss': avg_conc_val_loss,
                'r2': avg_conc_r2,
                'config': config,
                'criterion': 'loss'
            }, os.path.join(config['save_path'], 'best_conc_model_loss.pth'))
        
        # 保存最佳浓度模型（基于R2）
        if avg_conc_r2 > best_conc_r2:
            best_conc_r2 = avg_conc_r2
            conc_early_stop_counter = 0
            torch.save({
                'head_model_state_dict': head_model.state_dict(),
                'conc_model_state_dict': conc_model.state_dict(),
                'epoch': epoch,
                'train_loss': avg_conc_train_loss,
                'val_loss': avg_conc_val_loss,
                'r2': avg_conc_r2,
                'config': config,
                'criterion': 'r2'
            }, os.path.join(config['save_path'], 'best_conc_model_r2.pth'))
            print(f"保存基于R2的最佳浓度模型，R2: {best_conc_r2:.4f}")
        else:
            conc_early_stop_counter += 1
        
        # 早停检查
        if conc_early_stop_counter >= config['patience']:
            print(f"浓度模型早停触发! 在第{epoch+1}个epoch停止训练")
            break
    
    print(f"\n🔹 浓度模型训练完成！")
    print(f"最佳验证损失: {best_conc_val_loss:.4f}")
    print(f"最佳R2: {best_conc_r2:.4f}")
    
    return head_model, conc_model

# 评估函数
def evaluate_dual_cnn(data_loader, config):
    """评估训练好的双模型"""
    
    # 加载最佳模型
    checkpoint = torch.load(os.path.join(config['save_path'], 'best_conc_model_r2.pth'))
    
    head_model = HeadCNN(input_dim=16, hidden_dim=config['hidden_dim']).to(device)
    conc_model = ConcCNN(input_dim=19, hidden_dim=config['hidden_dim']).to(device)
    
    head_model.load_state_dict(checkpoint['head_model_state_dict'])
    conc_model.load_state_dict(checkpoint['conc_model_state_dict'])
    
    head_model.eval()
    conc_model.eval()
    
    all_head_metrics = []
    all_conc_metrics = []
    all_predictions = []
    
    print("开始模型评估...")
    
    with torch.no_grad():
        for batch_idx, batch in enumerate(data_loader):
            X_head = batch['X_head'].to(device)
            X_conc_base = batch['X_conc_base'].to(device)
            Y_head = batch['Y_head'].to(device)
            Y_conc = batch['Y_conc'].to(device)
            mask = batch['mask'].to(device)
            model_names = batch['model_name']
            
            # 预测水头
            pred_head = head_model(X_head)
            
            # 预测浓度
            X_conc = torch.cat([X_conc_base, pred_head], dim=2)
            pred_conc = conc_model(X_conc)
            
            # 计算指标
            for i in range(len(model_names)):
                head_metrics = compute_metrics(Y_head[i:i+1], pred_head[i:i+1], mask[i:i+1], config['max_time_steps'])
                conc_metrics = compute_metrics(Y_conc[i:i+1], pred_conc[i:i+1], mask[i:i+1], config['max_time_steps'])
                
                all_head_metrics.append(head_metrics)
                all_conc_metrics.append(conc_metrics)
                
                # 保存预测结果（简化版，仅保存关键指标）
                all_predictions.append({
                    'model_name': model_names[i],
                    'head_r2': head_metrics['r2'],
                    'head_rmse': head_metrics['rmse'],
                    'conc_r2': conc_metrics['r2'],
                    'conc_rmse': conc_metrics['rmse']
                })
    
    # 计算平均指标
    avg_head_metrics = {
        'mse': np.nanmean([m['mse'] for m in all_head_metrics]),
        'rmse': np.nanmean([m['rmse'] for m in all_head_metrics]),
        'mae': np.nanmean([m['mae'] for m in all_head_metrics]),
        'r2': np.nanmean([m['r2'] for m in all_head_metrics])
    }
    
    avg_conc_metrics = {
        'mse': np.nanmean([m['mse'] for m in all_conc_metrics]),
        'rmse': np.nanmean([m['rmse'] for m in all_conc_metrics]),
        'mae': np.nanmean([m['mae'] for m in all_conc_metrics]),
        'r2': np.nanmean([m['r2'] for m in all_conc_metrics])
    }
    
    # 输出结果
    print("\n📊 CNN模型验证结果:")
    print("\n🔹 水头指标:")
    for k, v in avg_head_metrics.items():
        print(f"{k.upper():<5}: {v:.4f}")
    print("\n🔹 浓度指标:")
    for k, v in avg_conc_metrics.items():
        print(f"{k.upper():<5}: {v:.4f}")
    
    # 保存预测结果
    predictions_df = pd.DataFrame(all_predictions)
    predictions_df.to_csv(os.path.join(config['save_path'], 'val_predictions_cnn.csv'), index=False)
    print(f"\n预测结果已保存到: {os.path.join(config['save_path'], 'val_predictions_cnn.csv')}")
    
    return avg_head_metrics, avg_conc_metrics

# Main Execution
if __name__ == "__main__":
    # 加载数据
    cleaned_data = pd.read_csv('conc_dual_guass.csv')
    print("数据统计:")
    print(cleaned_data[['head', 'concentration']].describe())
    
    # 检查必要的列
    required_cols = [
        'row', 'col', 'time_step', 'x', 'y', 'top', 'bottom', 'K', 'recharge', 'ET',
        'river_stage', 'river_cond', 'river_rbot', 'well_rate', 'well_mask',
        'chd_mask', 'lytyp', 'conc_mask', 'head', 'concentration', 'model_name'
    ]
    
    missing_cols = [col for col in required_cols if col not in cleaned_data.columns]
    if missing_cols:
        raise KeyError(f"缺少必要的列: {missing_cols}")
    
    # 数据集参数
    M = cleaned_data['row'].max() + 1
    N = cleaned_data['col'].max() + 1
    T = cleaned_data['time_step'].max() + 1 - cleaned_data['time_step'].min()
    
    print(f"网格大小: {M} x {N}, 时间步数: {T}")
    
    # 数据划分 - 与GNN保持一致的7:3划分
    unique_models = cleaned_data['model_name'].unique()
    print(f"总模型数: {len(unique_models)}")
    
    # 7:3 划分训练集和验证集（与GNN保持一致）
    train_models, val_models = train_test_split(unique_models, test_size=0.3, random_state=42)
    
    train_data = cleaned_data[cleaned_data['model_name'].isin(train_models)]
    val_data = cleaned_data[cleaned_data['model_name'].isin(val_models)]
    
    print(f"训练集: {len(train_models)} 个模型 ({len(train_models)/len(unique_models)*100:.1f}%)")
    print(f"验证集: {len(val_models)} 个模型 ({len(val_models)/len(unique_models)*100:.1f}%)")
    
    # 创建数据集
    train_dataset = HydroCNNDataset(train_data, (M, N), T)
    val_dataset = HydroCNNDataset(val_data, (M, N), T)
    
    # 检查数据集是否为空
    if len(train_dataset) == 0 or len(val_dataset) == 0:
        print("错误: 某个数据集为空！")
        print(f"训练集大小: {len(train_dataset)}")
        print(f"验证集大小: {len(val_dataset)}")
        exit(1)
    
    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=4, collate_fn=custom_collate_fn)
    
    # 训练配置 - 与GNN保持一致
    config = {
        'hidden_dim': 96,        # 与GNN保持一致
        'num_epochs': 500,
        'lr': 1e-3,              # 与GNN保持一致
        'weight_decay': 1e-4,
        'patience': 30,
        'save_path': './saved_models/cnn_dual_sequential',
        'max_time_steps': T
    }
    
    print("开始训练CNN模型...")
    print(f"配置: {config}")
    
    # 训练模型
    head_model, conc_model = train_dual_cnn(train_loader, val_loader, config)
    
    # 评估模型（使用验证集）
    print("\n开始最终评估（使用验证集）...")
    head_metrics, conc_metrics = evaluate_dual_cnn(val_loader, config)
    
    print("\n🎉 CNN模型训练和评估完成！")
    print("\n📊 最终结果总结:")
    print(f"📈 水头模型 - R2: {head_metrics['r2']:.4f}, RMSE: {head_metrics['rmse']:.4f}")
    print(f"📈 浓度模型 - R2: {conc_metrics['r2']:.4f}, RMSE: {conc_metrics['rmse']:.4f}")

Using device: cuda:2


In [5]:
# 训练模型
head_model, conc_model = train_dual_cnn(train_loader, val_loader, config)

# 评估模型（使用验证集）
print("\n开始最终评估（使用验证集）...")
head_metrics, conc_metrics = evaluate_dual_cnn(val_loader, config)

print("\n🎉 CNN模型训练和评估完成！")
print("\n📊 最终结果总结:")
print(f"📈 水头模型 - R2: {head_metrics['r2']:.4f}, RMSE: {head_metrics['rmse']:.4f}")
print(f"📈 浓度模型 - R2: {conc_metrics['r2']:.4f}, RMSE: {conc_metrics['rmse']:.4f}")

🔹 开始训练水头模型...
水头模型 Epoch 001/500 | 训练损失: 5373.6399 | 验证损失: 5198.3797 | R2: -267.7979 | RMSE: 93.7048 | LR: 0.001000
保存最佳水头模型，验证损失: 5198.3797
水头模型 Epoch 002/500 | 训练损失: 5109.2276 | 验证损失: 4989.8930 | R2: -257.0076 | RMSE: 91.8065 | LR: 0.001000
保存最佳水头模型，验证损失: 4989.8930
水头模型 Epoch 003/500 | 训练损失: 4909.7512 | 验证损失: 4798.4414 | R2: -247.0991 | RMSE: 90.0279 | LR: 0.001000
保存最佳水头模型，验证损失: 4798.4414
水头模型 Epoch 004/500 | 训练损失: 4721.7006 | 验证损失: 4615.4959 | R2: -237.6307 | RMSE: 88.2950 | LR: 0.001000
保存最佳水头模型，验证损失: 4615.4959
水头模型 Epoch 005/500 | 训练损失: 4536.7331 | 验证损失: 4431.2030 | R2: -228.0930 | RMSE: 86.5141 | LR: 0.001000
保存最佳水头模型，验证损失: 4431.2030
水头模型 Epoch 006/500 | 训练损失: 4364.7646 | 验证损失: 4265.3370 | R2: -219.5089 | RMSE: 84.8794 | LR: 0.001000
保存最佳水头模型，验证损失: 4265.3370
水头模型 Epoch 007/500 | 训练损失: 4201.4710 | 验证损失: 4099.9967 | R2: -210.9523 | RMSE: 83.2179 | LR: 0.001000
保存最佳水头模型，验证损失: 4099.9967
水头模型 Epoch 008/500 | 训练损失: 4035.5202 | 验证损失: 3932.0897 | R2: -202.2630 | RMSE: 81.4960 | LR: 0.00

KeyboardInterrupt: 

In [None]:
# Main Execution
if __name__ == "__main__":
    # 加载数据
    cleaned_data = pd.read_csv('conc_dual_guass.csv')
    print("数据统计:")
    print(cleaned_data[['head', 'concentration']].describe())
    
    # 检查必要的列
    required_cols = [
        'row', 'col', 'time_step', 'x', 'y', 'top', 'bottom', 'K', 'recharge', 'ET',
        'river_stage', 'river_cond', 'river_rbot', 'well_rate', 'well_mask',
        'chd_mask', 'lytyp', 'conc_mask', 'head', 'concentration', 'model_name'
    ]
    
    missing_cols = [col for col in required_cols if col not in cleaned_data.columns]
    if missing_cols:
        raise KeyError(f"缺少必要的列: {missing_cols}")
    
    # 数据集参数
    M = cleaned_data['row'].max() + 1
    N = cleaned_data['col'].max() + 1
    T = cleaned_data['time_step'].max() + 1 - cleaned_data['time_step'].min()
    
    print(f"网格大小: {M} x {N}, 时间步数: {T}")
    
    # 数据划分 - 与GNN保持一致的7:3划分
    unique_models = cleaned_data['model_name'].unique()
    print(f"总模型数: {len(unique_models)}")
    
    # 7:3 划分训练集和验证集（与GNN保持一致）
    train_models, val_models = train_test_split(unique_models, test_size=0.3, random_state=42)
    
    train_data = cleaned_data[cleaned_data['model_name'].isin(train_models)]
    val_data = cleaned_data[cleaned_data['model_name'].isin(val_models)]
    
    print(f"训练集: {len(train_models)} 个模型 ({len(train_models)/len(unique_models)*100:.1f}%)")
    print(f"验证集: {len(val_models)} 个模型 ({len(val_models)/len(unique_models)*100:.1f}%)")
    
    # 创建数据集
    train_dataset = HydroCNNDataset(train_data, (M, N), T)
    val_dataset = HydroCNNDataset(val_data, (M, N), T)
    
    # 检查数据集是否为空
    if len(train_dataset) == 0 or len(val_dataset) == 0:
        print("错误: 某个数据集为空！")
        print(f"训练集大小: {len(train_dataset)}")
        print(f"验证集大小: {len(val_dataset)}")
        exit(1)
    
    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=4, collate_fn=custom_collate_fn)
    
    # 训练配置 - 与GNN保持一致
    config = {
        'hidden_dim': 96,        # 与GNN保持一致
        'num_epochs': 500,
        'lr': 1e-3,              # 与GNN保持一致
        'weight_decay': 1e-4,
        'patience': 30,
        'save_path': './saved_models/cnn_dual_sequential',
        'max_time_steps': T
    }
    
    print("开始训练CNN模型...")
    print(f"配置: {config}")
    
    # 训练模型
    head_model, conc_model = train_dual_cnn(train_loader, val_loader, config)
    
    # 评估模型（使用验证集）
    print("\n开始最终评估（使用验证集）...")
    head_metrics, conc_metrics = evaluate_dual_cnn(val_loader, config)
    
    print("\n🎉 CNN模型训练和评估完成！")
    print("\n📊 最终结果总结:")
    print(f"📈 水头模型 - R2: {head_metrics['r2']:.4f}, RMSE: {head_metrics['rmse']:.4f}")
    print(f"📈 浓度模型 - R2: {conc_metrics['r2']:.4f}, RMSE: {conc_metrics['rmse']:.4f}")

In [22]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score as sklearn_r2_score
import numpy as np
import os

# Device configuration
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 经典2D CNN模型
class Classic2DCNN(nn.Module):
    def __init__(self, input_channels, hidden_dim=64, dropout=0.3):
        super(Classic2DCNN, self).__init__()
        
        # 第一层 - 使用更大的卷积核，减少感受野的精细度
        self.conv1 = nn.Conv2d(input_channels, hidden_dim, kernel_size=5, padding=2, stride=2)  # 增加stride
        self.bn1 = nn.BatchNorm2d(hidden_dim)
        self.dropout1 = nn.Dropout2d(dropout)
        
        # 第二层 - 保持维度不变
        self.conv2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(hidden_dim)
        self.dropout2 = nn.Dropout2d(dropout)
        
        # 第三层 - 新增层，继续保持维度
        self.conv3 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(hidden_dim)
        self.dropout3 = nn.Dropout2d(dropout)
        
        # 第四层 - 新增层，可以选择轻微改变维度或保持不变
        self.conv4 = nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(hidden_dim)
        self.dropout4 = nn.Dropout2d(dropout)
        
        # 反向上采样（可能破坏空间信息）
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')  # 简单最近邻上采样
        
        # 输出层
        self.output_conv = nn.Conv2d(hidden_dim, 1, kernel_size=1)
        
    def forward(self, x):
        # x shape: (B, T, C, H, W)
        B, T, C, H, W = x.shape
        
        # 将时间维度合并到batch维度
        x = x.view(B * T, C, H, W)
        
        # 四层前向传播
        x = self.dropout1(F.relu(self.bn1(self.conv1(x))))  # 下采样
        x = self.dropout2(F.relu(self.bn2(self.conv2(x))))  # 保持维度
        x = self.dropout3(F.relu(self.bn3(self.conv3(x))))  # 保持维度 (新增)
        x = self.dropout4(F.relu(self.bn4(self.conv4(x))))  # 保持维度 (新增)
        x = self.upsample(x)  # 上采样回原始尺寸
        
        # 输出层
        x = F.relu(self.output_conv(x))
        
        # 恢复时间维度
        output = x.view(B, T, 1, H, W)
        return output

# 水头模型 - 16维输入
class HeadCNN2D(nn.Module):
    def __init__(self, input_dim=16, hidden_dim=64, dropout=0.3):
        super(HeadCNN2D, self).__init__()
        self.cnn = Classic2DCNN(input_dim, hidden_dim, dropout)
        
    def forward(self, x):
        return self.cnn(x)

# 浓度模型 - 19维输入
class ConcCNN2D(nn.Module):
    def __init__(self, input_dim=20, hidden_dim=64, dropout=0.3):
        super(ConcCNN2D, self).__init__()
        self.cnn = Classic2DCNN(input_dim, hidden_dim, dropout)
        
    def forward(self, x):
        return self.cnn(x)

# Dataset Class - 修改预处理部分，使用您原来的历史特征计算方式
class HydroCNNDataset(Dataset):
    def __init__(self, data, grid_size, max_time_steps):
        self.data = data.reset_index(drop=True).copy()
        self.grid_size = grid_size
        self.max_time_steps = max_time_steps
        self.models = self.data['model_name'].unique()
        self.cached_data = {}
        
        # 基础特征列 - 与GNN保持一致 (14维)
        self.base_feature_cols = [
            'x', 'y', 'top', 'bottom', 'K', 'recharge', 'ET',
            'river_stage', 'river_cond', 'river_rbot', 'well_rate', 'well_mask',
            'chd_mask', 'lytyp'
        ]
        
        # 浓度模型额外的基础特征 (15维，增加conc_mask)
        self.conc_base_feature_cols = self.base_feature_cols + ['conc_mask']
        
        self._normalize_features()
        self._preprocess_data()

    def _normalize_features(self):
        """标准化特征，与GNN保持一致"""
        print("开始特征标准化...")
        
        # 只对浮点数特征进行标准化
        float_cols = [col for col in self.base_feature_cols if col not in ['well_mask', 'chd_mask', 'lytyp']]
        
        for model_name in self.models:
            model_df = self.data[self.data['model_name'] == model_name].copy()
            
            # 标准化基础浮点特征
            if len(model_df) > 0:
                scaler = StandardScaler()
                float_data = model_df[float_cols].values
                if float_data.size > 0:
                    scaled_data = scaler.fit_transform(float_data)
                    mask = self.data['model_name'] == model_name
                    self.data.loc[mask, float_cols] = scaled_data

    def _preprocess_data(self):
        """预处理数据 - 使用您原来的历史特征计算方式"""
        print("预处理CNN数据...")
        M, N = self.grid_size
        T = self.max_time_steps
        
        for model_idx, model_name in enumerate(self.models):
            print(f"处理模型 {model_idx+1}/{len(self.models)}: {model_name}")
            
            model_df = self.data[self.data['model_name'] == model_name].copy().reset_index(drop=True)
            if len(model_df) == 0:
                print(f"警告: 模型 {model_name} 没有数据，跳过")
                continue
            
            # 时间步归一化
            model_df['time_step'] = model_df['time_step'] - model_df['time_step'].min()
            
            # 初始化数组
            # 水头模型：14个基础特征 + 2个历史水头 = 16维
            X_head = np.zeros((T, len(self.base_feature_cols) + 2, M, N), dtype=np.float32)
            # 浓度模型：15个基础特征 + 2个历史水头 + 2个历史浓度 = 19维（不包括预测水头）
            X_conc_base = np.zeros((T, len(self.conc_base_feature_cols) + 4, M, N), dtype=np.float32)
            Y_head = np.zeros((T, 1, M, N), dtype=np.float32)
            Y_conc = np.zeros((T, 1, M, N), dtype=np.float32)
            mask = np.zeros((M, N), dtype=np.float32)
            
            # 按时间步处理
            max_t = min(T, model_df['time_step'].max() + 1)
            for t in range(max_t):
                if t % 50 == 0:  # 每50个时间步输出一次进度
                    print(f"    处理时间步 {t}/{max_t}")
                
                t_df = model_df[model_df['time_step'] == t]
                if len(t_df) == 0:
                    continue
                
                rows = t_df['row'].values.astype(int)
                cols = t_df['col'].values.astype(int)
                
                # 检查索引范围
                valid_mask = (rows >= 0) & (rows < M) & (cols >= 0) & (cols < N)
                if not np.any(valid_mask):
                    continue
                    
                rows = rows[valid_mask]
                cols = cols[valid_mask]
                t_df_valid = t_df.iloc[valid_mask]
                
                # 填充基础特征
                # 水头模型基础特征（前14维）
                for feat_idx, feat_name in enumerate(self.base_feature_cols):
                    feat_values = t_df_valid[feat_name].values
                    X_head[t, feat_idx, rows, cols] = feat_values
                
                # 浓度模型基础特征（前15维）
                for feat_idx, feat_name in enumerate(self.conc_base_feature_cols):
                    feat_values = t_df_valid[feat_name].values
                    X_conc_base[t, feat_idx, rows, cols] = feat_values
                
                # 填充目标值
                Y_head[t, 0, rows, cols] = t_df_valid['head'].values
                Y_conc[t, 0, rows, cols] = t_df_valid['concentration'].values
                
                # 更新掩码
                mask[rows, cols] = 1
                
                # 计算历史特征 - 使用您原来的方式
                # 前一个时间步 (t-1)
                if t > 0:
                    prev_df = model_df[model_df['time_step'] == t-1]
                    if len(prev_df) > 0:
                        # 创建前一时间步的映射
                        prev_head_map = {(r, c): h for r, c, h in zip(
                            prev_df['row'].values.astype(int),
                            prev_df['col'].values.astype(int),
                            prev_df['head'].values
                        )}
                        prev_conc_map = {(r, c): c for r, c, c in zip(
                            prev_df['row'].values.astype(int),
                            prev_df['col'].values.astype(int),
                            prev_df['concentration'].values
                        )}
                        
                        # 填充前一时间步的值
                        for r, c in zip(rows, cols):
                            if (r, c) in prev_head_map:
                                # 水头模型的前一时间步水头 (第15维，索引14)
                                X_head[t, len(self.base_feature_cols), r, c] = prev_head_map[(r, c)]
                                # 浓度模型的前一时间步水头 (第16维，索引15)
                                X_conc_base[t, len(self.conc_base_feature_cols), r, c] = prev_head_map[(r, c)]
                            if (r, c) in prev_conc_map:
                                # 浓度模型的前一时间步浓度 (第18维，索引17)
                                X_conc_base[t, len(self.conc_base_feature_cols) + 2, r, c] = prev_conc_map[(r, c)]
                
                # 前两个时间步 (t-2)
                if t > 1:
                    prev2_df = model_df[model_df['time_step'] == t-2]
                    if len(prev2_df) > 0:
                        # 创建前两时间步的映射
                        prev2_head_map = {(r, c): h for r, c, h in zip(
                            prev2_df['row'].values.astype(int),
                            prev2_df['col'].values.astype(int),
                            prev2_df['head'].values
                        )}
                        prev2_conc_map = {(r, c): c for r, c, c in zip(
                            prev2_df['row'].values.astype(int),
                            prev2_df['col'].values.astype(int),
                            prev2_df['concentration'].values
                        )}
                        
                        # 填充前两时间步的值
                        for r, c in zip(rows, cols):
                            if (r, c) in prev2_head_map:
                                # 水头模型的前两时间步水头 (第16维，索引15)
                                X_head[t, len(self.base_feature_cols) + 1, r, c] = prev2_head_map[(r, c)]
                                # 浓度模型的前两时间步水头 (第17维，索引16)
                                X_conc_base[t, len(self.conc_base_feature_cols) + 1, r, c] = prev2_head_map[(r, c)]
                            if (r, c) in prev2_conc_map:
                                # 浓度模型的前两时间步浓度 (第19维，索引18)
                                X_conc_base[t, len(self.conc_base_feature_cols) + 3, r, c] = prev2_conc_map[(r, c)]
                
                # 对于t=0和t=1，用当前值填充缺失的历史特征
                if t == 0:
                    # t=0时，前一和前两时间步都用当前值
                    current_head = t_df_valid['head'].values
                    current_conc = t_df_valid['concentration'].values
                    
                    # 水头模型历史特征
                    X_head[t, len(self.base_feature_cols), rows, cols] = current_head      # prev_head
                    X_head[t, len(self.base_feature_cols) + 1, rows, cols] = current_head  # prev2_head
                    
                    # 浓度模型历史特征
                    X_conc_base[t, len(self.conc_base_feature_cols), rows, cols] = current_head      # prev_head
                    X_conc_base[t, len(self.conc_base_feature_cols) + 1, rows, cols] = current_head  # prev2_head
                    X_conc_base[t, len(self.conc_base_feature_cols) + 2, rows, cols] = current_conc  # prev_conc
                    X_conc_base[t, len(self.conc_base_feature_cols) + 3, rows, cols] = current_conc  # prev2_conc
                    
                elif t == 1:
                    # t=1时，前两时间步用前一时间步的值
                    for r, c in zip(rows, cols):
                        # 获取前一时间步的值
                        prev_head_val = X_head[t, len(self.base_feature_cols), r, c]
                        prev_conc_val = X_conc_base[t, len(self.conc_base_feature_cols) + 2, r, c]
                        
                        # 设置前两时间步
                        X_head[t, len(self.base_feature_cols) + 1, r, c] = prev_head_val  # prev2_head = prev_head
                        X_conc_base[t, len(self.conc_base_feature_cols) + 1, r, c] = prev_head_val  # prev2_head = prev_head
                        X_conc_base[t, len(self.conc_base_feature_cols) + 3, r, c] = prev_conc_val  # prev2_conc = prev_conc
            
            # 缓存预处理后的数据
            self.cached_data[model_name] = {
                'X_head': torch.from_numpy(X_head),
                'X_conc_base': torch.from_numpy(X_conc_base),
                'Y_head': torch.from_numpy(Y_head),
                'Y_conc': torch.from_numpy(Y_conc),
                'mask': torch.from_numpy(mask),
                'model_name': model_name
            }
            
            print(f"  模型 {model_name} 处理完成")
        
        print(f"预处理完成！处理了 {len(self.cached_data)} 个模型")
        if len(self.cached_data) > 0:
            sample_data = list(self.cached_data.values())[0]
            print(f"特征维度 - 水头: {sample_data['X_head'].shape[1]}, 浓度基础: {sample_data['X_conc_base'].shape[1]}")

    def __len__(self):
        return len(self.cached_data)

    def __getitem__(self, idx):
        model_name = list(self.cached_data.keys())[idx]
        return self.cached_data[model_name]

# Custom Collate Function
def custom_collate_fn(batch):
    fixed_keys = ['X_head', 'X_conc_base', 'Y_head', 'Y_conc', 'mask']
    variable_keys = ['model_name']
    collated = {}
    for key in fixed_keys:
        collated[key] = torch.stack([item[key] for item in batch])
    for key in variable_keys:
        collated[key] = [item[key] for item in batch]
    return collated

# Metrics Computation
def compute_metrics(y_true, y_pred, mask, T):
    """计算指标，与GNN保持一致"""
    if isinstance(y_true, torch.Tensor):
        y_true = y_true.detach().cpu().numpy()
    if isinstance(y_pred, torch.Tensor):
        y_pred = y_pred.detach().cpu().numpy()
    if isinstance(mask, torch.Tensor):
        mask = mask.detach().cpu().numpy()

    # 应用掩码
    mask = mask[:, np.newaxis, np.newaxis, :, :]  # Shape: [B, 1, 1, M, N]
    mask = np.repeat(mask, T, axis=1)  # Shape: [B, T, 1, M, N]

    y_true = y_true[mask > 0]
    y_pred = y_pred[mask > 0]

    valid_mask = ~np.isnan(y_true) & ~np.isinf(y_true) & ~np.isnan(y_pred) & ~np.isinf(y_pred)
    y_true = y_true[valid_mask]
    y_pred = y_pred[valid_mask]

    if len(y_true) == 0:
        return {'mse': np.nan, 'rmse': np.nan, 'mae': np.nan, 'r2': np.nan}

    mse = np.mean((y_true - y_pred) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    
    # 计算R2
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    r2 = 1 - (ss_res / (ss_tot + 1e-8))

    return {'mse': mse, 'rmse': rmse, 'mae': mae, 'r2': r2}

# 训练函数 - 分步训练
def train_dual_cnn_2d(train_loader, val_loader, config):
    """经典2D CNN训练函数 - 分步训练"""
    
    # 创建保存目录
    os.makedirs(config['save_path'], exist_ok=True)
    
    print("🔹 开始训练经典2D CNN水头模型...")
    
    # ==================== 第一阶段：训练水头模型 ====================
    head_model = HeadCNN2D(input_dim=16, hidden_dim=config['hidden_dim']).to(device)
    head_optimizer = torch.optim.AdamW(
        head_model.parameters(),
        lr=config['lr'],
        weight_decay=config['weight_decay']
    )
    head_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(head_optimizer, T_max=config['num_epochs'])
    
    best_head_val_loss = float('inf')
    head_early_stop_counter = 0
    
    for epoch in range(config['num_epochs']):
        # 训练水头模型
        head_model.train()
        total_head_loss = 0
        
        for batch in train_loader:
            X_head = batch['X_head'].to(device)
            Y_head = batch['Y_head'].to(device)
            mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)  # [B, 1, 1, M, N]
            
            head_optimizer.zero_grad()
            pred_head = head_model(X_head)
            
            # 应用掩码计算损失
            loss_head = F.mse_loss(pred_head * mask, Y_head * mask)
            loss_head.backward()
            
            # 梯度裁剪
            torch.nn.utils.clip_grad_norm_(head_model.parameters(), max_norm=1.0)
            head_optimizer.step()
            
            total_head_loss += loss_head.item()
        
        # 验证水头模型
        head_model.eval()
        total_head_val_loss = 0
        all_head_metrics = []
        
        with torch.no_grad():
            for batch in val_loader:
                X_head = batch['X_head'].to(device)
                Y_head = batch['Y_head'].to(device)
                mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)
                
                pred_head = head_model(X_head)
                loss_head = F.mse_loss(pred_head * mask, Y_head * mask)
                total_head_val_loss += loss_head.item()
                
                # 计算指标
                head_metrics = compute_metrics(Y_head, pred_head, batch['mask'], config['max_time_steps'])
                all_head_metrics.append(head_metrics)
        
        avg_head_train_loss = total_head_loss / len(train_loader)
        avg_head_val_loss = total_head_val_loss / len(val_loader)
        
        # 计算平均指标
        avg_head_r2 = np.nanmean([m['r2'] for m in all_head_metrics])
        avg_head_rmse = np.nanmean([m['rmse'] for m in all_head_metrics])
        
        head_scheduler.step()
        current_lr = head_scheduler.get_last_lr()[0]
        
        print(f"水头2D CNN Epoch {epoch+1:03d}/{config['num_epochs']} | "
              f"训练损失: {avg_head_train_loss:.4f} | 验证损失: {avg_head_val_loss:.4f} | "
              f"R2: {avg_head_r2:.4f} | RMSE: {avg_head_rmse:.4f} | LR: {current_lr:.6f}")
        
        # 保存最佳水头模型
        if avg_head_val_loss < best_head_val_loss:
            best_head_val_loss = avg_head_val_loss
            head_early_stop_counter = 0
            torch.save({
                'model_state_dict': head_model.state_dict(),
                'epoch': epoch,
                'train_loss': avg_head_train_loss,
                'val_loss': avg_head_val_loss,
                'r2': avg_head_r2,
                'config': config
            }, os.path.join(config['save_path'], 'best_head_model_2d.pth'))
            print(f"保存最佳2D CNN水头模型，验证损失: {best_head_val_loss:.4f}")
        else:
            head_early_stop_counter += 1
        
        # 早停检查
        if head_early_stop_counter >= config['patience']:
            print(f"水头模型早停触发! 在第{epoch+1}个epoch停止训练")
            break
    
    # 加载最佳水头模型
    best_head_checkpoint = torch.load(os.path.join(config['save_path'], 'best_head_model_2d.pth'),weights_only=False)
    head_model.load_state_dict(best_head_checkpoint['model_state_dict'])
    head_model.eval()
    
    print(f"\n🔹 2D CNN水头模型训练完成！最佳验证损失: {best_head_val_loss:.4f}")
    print("🔹 开始训练2D CNN浓度模型...")
    
    # ==================== 第二阶段：训练浓度模型 ====================
    conc_model = ConcCNN2D(input_dim=20, hidden_dim=config['hidden_dim']).to(device)
    conc_optimizer = torch.optim.AdamW(
        conc_model.parameters(),
        lr=config['lr'],
        weight_decay=config['weight_decay']
    )
    conc_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(conc_optimizer, T_max=config['num_epochs'])
    
    best_conc_val_loss = float('inf')
    best_conc_r2 = float('-inf')
    conc_early_stop_counter = 0
    
    for epoch in range(config['num_epochs']):
        # 训练浓度模型
        conc_model.train()
        total_conc_loss = 0
        
        for batch in train_loader:
            X_head = batch['X_head'].to(device)
            X_conc_base = batch['X_conc_base'].to(device)
            Y_conc = batch['Y_conc'].to(device)
            mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)
            
            # 使用固定的水头模型预测水头
            with torch.no_grad():
                pred_head = head_model(X_head)
            
            # 构建浓度模型输入（19维）
            X_conc = torch.cat([X_conc_base, pred_head], dim=2)  # [B, T, 19, M, N]
            
            conc_optimizer.zero_grad()
            pred_conc = conc_model(X_conc)
            
            # 应用掩码计算损失
            loss_conc = F.mse_loss(pred_conc * mask, Y_conc * mask)
            loss_conc.backward()
            
            # 梯度裁剪
            torch.nn.utils.clip_grad_norm_(conc_model.parameters(), max_norm=1.0)
            conc_optimizer.step()
            
            total_conc_loss += loss_conc.item()
        
        # 验证浓度模型
        conc_model.eval()
        total_conc_val_loss = 0
        all_conc_metrics = []
        
        with torch.no_grad():
            for batch in val_loader:
                X_head = batch['X_head'].to(device)
                X_conc_base = batch['X_conc_base'].to(device)
                Y_conc = batch['Y_conc'].to(device)
                mask = batch['mask'].to(device).unsqueeze(1).unsqueeze(1)
                
                pred_head = head_model(X_head)
                X_conc = torch.cat([X_conc_base, pred_head], dim=2)
                pred_conc = conc_model(X_conc)
                
                loss_conc = F.mse_loss(pred_conc * mask, Y_conc * mask)
                total_conc_val_loss += loss_conc.item()
                
                # 计算指标
                conc_metrics = compute_metrics(Y_conc, pred_conc, batch['mask'], config['max_time_steps'])
                all_conc_metrics.append(conc_metrics)
        
        avg_conc_train_loss = total_conc_loss / len(train_loader)
        avg_conc_val_loss = total_conc_val_loss / len(val_loader)
        
        # 计算平均指标
        avg_conc_r2 = np.nanmean([m['r2'] for m in all_conc_metrics])
        avg_conc_rmse = np.nanmean([m['rmse'] for m in all_conc_metrics])
        
        conc_scheduler.step()
        current_lr = conc_scheduler.get_last_lr()[0]
        
        print(f"浓度2D CNN Epoch {epoch+1:03d}/{config['num_epochs']} | "
              f"训练损失: {avg_conc_train_loss:.4f} | 验证损失: {avg_conc_val_loss:.4f} | "
              f"R2: {avg_conc_r2:.4f} | RMSE: {avg_conc_rmse:.4f} | LR: {current_lr:.6f}")
        
        # 保存最佳浓度模型（基于验证损失）
        if avg_conc_val_loss < best_conc_val_loss:
            best_conc_val_loss = avg_conc_val_loss
            torch.save({
                'head_model_state_dict': head_model.state_dict(),
                'conc_model_state_dict': conc_model.state_dict(),
                'epoch': epoch,
                'train_loss': avg_conc_train_loss,
                'val_loss': avg_conc_val_loss,
                'r2': avg_conc_r2,
                'config': config,
                'criterion': 'loss'
            }, os.path.join(config['save_path'], 'best_conc_model_2d_loss.pth'))
        
        # 保存最佳浓度模型（基于R2）
        if avg_conc_r2 > best_conc_r2:
            best_conc_r2 = avg_conc_r2
            conc_early_stop_counter = 0
            torch.save({
                'head_model_state_dict': head_model.state_dict(),
                'conc_model_state_dict': conc_model.state_dict(),
                'epoch': epoch,
                'train_loss': avg_conc_train_loss,
                'val_loss': avg_conc_val_loss,
                'r2': avg_conc_r2,
                'config': config,
                'criterion': 'r2'
            }, os.path.join(config['save_path'], 'best_conc_model_2d_r2.pth'))
            print(f"保存基于R2的最佳2D CNN浓度模型，R2: {best_conc_r2:.4f}")
        else:
            conc_early_stop_counter += 1
        
        # 早停检查
        if conc_early_stop_counter >= config['patience']:
            print(f"浓度模型早停触发! 在第{epoch+1}个epoch停止训练")
            break
    
    print(f"\n🔹 2D CNN浓度模型训练完成！")
    print(f"最佳验证损失: {best_conc_val_loss:.4f}")
    print(f"最佳R2: {best_conc_r2:.4f}")
    
    return head_model, conc_model

# 评估函数
def evaluate_dual_cnn_2d(data_loader, config):
    """评估训练好的2D CNN双模型"""
    
    # 加载最佳模型
    checkpoint = torch.load(os.path.join(config['save_path'], 'best_conc_model_2d_r2.pth'),weights_only=False)
    
    head_model = HeadCNN2D(input_dim=16, hidden_dim=config['hidden_dim']).to(device)
    conc_model = ConcCNN2D(input_dim=20, hidden_dim=config['hidden_dim']).to(device)
    
    head_model.load_state_dict(checkpoint['head_model_state_dict'])
    conc_model.load_state_dict(checkpoint['conc_model_state_dict'])
    
    head_model.eval()
    conc_model.eval()
    
    all_head_metrics = []
    all_conc_metrics = []
    all_predictions = []
    
    print("开始2D CNN模型评估...")
    
    with torch.no_grad():
        for batch_idx, batch in enumerate(data_loader):
            X_head = batch['X_head'].to(device)
            X_conc_base = batch['X_conc_base'].to(device)
            Y_head = batch['Y_head'].to(device)
            Y_conc = batch['Y_conc'].to(device)
            mask = batch['mask'].to(device)
            model_names = batch['model_name']
            
            # 预测水头
            pred_head = head_model(X_head)
            
            # 预测浓度
            X_conc = torch.cat([X_conc_base, pred_head], dim=2)
            pred_conc = conc_model(X_conc)
            
            # 计算指标
            for i in range(len(model_names)):
                head_metrics = compute_metrics(Y_head[i:i+1], pred_head[i:i+1], mask[i:i+1], config['max_time_steps'])
                conc_metrics = compute_metrics(Y_conc[i:i+1], pred_conc[i:i+1], mask[i:i+1], config['max_time_steps'])
                
                all_head_metrics.append(head_metrics)
                all_conc_metrics.append(conc_metrics)
                
                # 保存预测结果
                all_predictions.append({
                    'model_name': model_names[i],
                    'head_r2': head_metrics['r2'],
                    'head_rmse': head_metrics['rmse'],
                    'conc_r2': conc_metrics['r2'],
                    'conc_rmse': conc_metrics['rmse']
                })
    
    # 计算平均指标
    avg_head_metrics = {
        'mse': np.nanmean([m['mse'] for m in all_head_metrics]),
        'rmse': np.nanmean([m['rmse'] for m in all_head_metrics]),
        'mae': np.nanmean([m['mae'] for m in all_head_metrics]),
        'r2': np.nanmean([m['r2'] for m in all_head_metrics])
    }
    
    avg_conc_metrics = {
        'mse': np.nanmean([m['mse'] for m in all_conc_metrics]),
        'rmse': np.nanmean([m['rmse'] for m in all_conc_metrics]),
        'mae': np.nanmean([m['mae'] for m in all_conc_metrics]),
        'r2': np.nanmean([m['r2'] for m in all_conc_metrics])
    }
    
    # 输出结果
    print("\n📊 经典2D CNN模型验证结果:")
    print("\n🔹 水头指标:")
    for k, v in avg_head_metrics.items():
        print(f"{k.upper():<5}: {v:.4f}")
    print("\n🔹 浓度指标:")
    for k, v in avg_conc_metrics.items():
        print(f"{k.upper():<5}: {v:.4f}")
    
    # 保存预测结果
    predictions_df = pd.DataFrame(all_predictions)
    predictions_df.to_csv(os.path.join(config['save_path'], 'val_predictions_2d_cnn.csv'), index=False)
    print(f"\n预测结果已保存到: {os.path.join(config['save_path'], 'val_predictions_2d_cnn.csv')}")
    
    return avg_head_metrics, avg_conc_metrics

# Main Execution
if __name__ == "__main__":
    # 加载数据
    cleaned_data = pd.read_csv('conc_dual_guass.csv')
    print("数据统计:")
    print(cleaned_data[['head', 'concentration']].describe())
    
    # 检查必要的列
    required_cols = [
        'row', 'col', 'time_step', 'x', 'y', 'top', 'bottom', 'K', 'recharge', 'ET',
        'river_stage', 'river_cond', 'river_rbot', 'well_rate', 'well_mask',
        'chd_mask', 'lytyp', 'conc_mask', 'head', 'concentration', 'model_name'
    ]
    
    missing_cols = [col for col in required_cols if col not in cleaned_data.columns]
    if missing_cols:
        raise KeyError(f"缺少必要的列: {missing_cols}")
    
    # 数据集参数
    M = cleaned_data['row'].max() + 1
    N = cleaned_data['col'].max() + 1
    T = cleaned_data['time_step'].max() + 1 - cleaned_data['time_step'].min()
    
    print(f"网格大小: {M} x {N}, 时间步数: {T}")
    
    # 数据划分 - 与GNN保持一致的7:3划分
    unique_models = cleaned_data['model_name'].unique()
    print(f"总模型数: {len(unique_models)}")
    
    train_models, val_models = train_test_split(unique_models, test_size=0.3, random_state=42)
    
    train_data = cleaned_data[cleaned_data['model_name'].isin(train_models)]
    val_data = cleaned_data[cleaned_data['model_name'].isin(val_models)]
    
    print(f"训练集: {len(train_models)} 个模型 ({len(train_models)/len(unique_models)*100:.1f}%)")
    print(f"验证集: {len(val_models)} 个模型 ({len(val_models)/len(unique_models)*100:.1f}%)")
    
    # 创建数据集
    train_dataset = HydroCNNDataset(train_data, (M, N), T)
    val_dataset = HydroCNNDataset(val_data, (M, N), T)
    
    # 检查数据集是否为空
    if len(train_dataset) == 0 or len(val_dataset) == 0:
        print("错误: 某个数据集为空！")
        print(f"训练集大小: {len(train_dataset)}")
        print(f"验证集大小: {len(val_dataset)}")
        exit(1)
    
    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=4, collate_fn=custom_collate_fn)
    
    # 训练配置
    config = {
        'hidden_dim': 96,
        'num_epochs': 300,
        'lr': 1e-3,
        'weight_decay': 1e-4,
        'patience': 30,
        'save_path': './saved_models/classic_2d_cnn_dual_original_preprocess',
        'max_time_steps': T
    }
    
    print("开始训练经典2D CNN模型...")
    print(f"配置: {config}")
    
    # 训练模型
    head_model, conc_model = train_dual_cnn_2d(train_loader, val_loader, config)
    
    # 评估模型
    print("\n开始最终评估（使用验证集）...")
    head_metrics, conc_metrics = evaluate_dual_cnn_2d(val_loader, config)
    
    print("\n🎉 经典2D CNN模型训练和评估完成！")
    print("\n📊 最终结果总结:")
    print(f"📈 水头模型 - R2: {head_metrics['r2']:.4f}, RMSE: {head_metrics['rmse']:.4f}")
    print(f"📈 浓度模型 - R2: {conc_metrics['r2']:.4f}, RMSE: {conc_metrics['rmse']:.4f}")

Using device: cuda:2
数据统计:
               head  concentration
count  5.328000e+06   5.328000e+06
mean   9.752326e+01   3.256942e-01
std    5.914362e+00   3.125932e+00
min    7.999980e+01   0.000000e+00
25%    9.476521e+01   0.000000e+00
50%    9.840869e+01   1.826313e-13
75%    1.010800e+02   7.200300e-06
max    1.325345e+02   6.359028e+02
网格大小: 30 x 50, 时间步数: 30
总模型数: 100
训练集: 70 个模型 (70.0%)
验证集: 30 个模型 (30.0%)
开始特征标准化...
预处理CNN数据...
处理模型 1/70: dual_93
    处理时间步 0/30
  模型 dual_93 处理完成
处理模型 2/70: dual_71
    处理时间步 0/30
  模型 dual_71 处理完成
处理模型 3/70: dual_31
    处理时间步 0/30
  模型 dual_31 处理完成
处理模型 4/70: dual_15
    处理时间步 0/30
  模型 dual_15 处理完成
处理模型 5/70: dual_88
    处理时间步 0/30
  模型 dual_88 处理完成
处理模型 6/70: dual_40
    处理时间步 0/30
  模型 dual_40 处理完成
处理模型 7/70: dual_20
    处理时间步 0/30
  模型 dual_20 处理完成
处理模型 8/70: dual_84
    处理时间步 0/30
  模型 dual_84 处理完成
处理模型 9/70: dual_10
    处理时间步 0/30
  模型 dual_10 处理完成
处理模型 10/70: dual_21
    处理时间步 0/30
  模型 dual_21 处理完成
处理模型 11/70: dual_29
    处理时间步 0/30
  模型 