# 1.Comparative Analysis of Different Transfer Strategies

In [None]:
#(1)CNN-LSTM-Transformer-N

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Dense, Conv1D, MaxPooling1D, LSTM, Flatten, Dropout,
    Input, MultiHeadAttention, LayerNormalization, Add, Lambda
)
import matplotlib.pyplot as plt

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# 读取目标域数据

# 特征和目标变量
X_data2 = data2.iloc[:, 1:].values
y_data2 = data2.iloc[:, 0].values

# 数据标准化
scaler = StandardScaler()
X_data2_scaled = scaler.fit_transform(X_data2)

# Reshape 数据用于卷积层输入
input_shape = (X_data2_scaled.shape[1], 1)
X_data2_reshaped = X_data2_scaled.reshape((-1, input_shape[0], 1))

# 构建 Transformer 块，用于特征重校准
def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    attention = Dropout(dropout)(attention)
    attention_out = Add()([inputs, attention])
    attention_out = LayerNormalization(epsilon=1e-6)(attention_out)

    # 前馈网络
    ff_out = Dense(inputs.shape[-1], activation='relu')(attention_out)
    ff_out = Dropout(dropout)(ff_out)
    ff_out = Add()([attention_out, ff_out])
    return LayerNormalization(epsilon=1e-6)(ff_out)

# 构建特征提取模型：CNN-LSTM + Transformer
def build_feature_extractor(input_shape):
    inputs = Input(shape=input_shape)

    # CNN部分
    x = Conv1D(filters=256, kernel_size=2, activation='relu', padding='same')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=128, kernel_size=2, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=64, kernel_size=2, activation='relu', padding='same')(x)

    # LSTM部分
    x = LSTM(50, activation='relu', return_sequences=False)(x)
    x = Dropout(0.5)(x)

    # 将 LSTM 输出通过 Transformer 进行重校准
    x = Dense(50, activation='relu')(x)  # 映射维度为 50
    x = Lambda(lambda x: tf.expand_dims(x, axis=1))(x)  # 使用 Lambda 层进行维度扩展
    x = transformer_block(x, head_size=64, num_heads=4, ff_dim=128)

    # Flatten 将 Transformer 输出拉平为 1D 向量
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)  # 映射到固定大小 256

    return Model(inputs, x, name="feature_extractor")

# 构建回归头（用于腐蚀率预测）
def build_regression_head():
    inputs = Input(shape=(256,))  # 与特征提取模型的输出匹配
    outputs = Dense(1)(inputs)  # 单输出回归任务
    return Model(inputs, outputs, name="regression_head")

# 创建完整模型：特征提取 + 回归预测
feature_extractor = build_feature_extractor(input_shape)
regression_head = build_regression_head()

# 输入层
input_tensor = Input(shape=input_shape)

# 提取特征并进行预测
features = feature_extractor(input_tensor)
prediction = regression_head(features)

# 构建完整模型
model = Model(inputs=input_tensor, outputs=prediction)

# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mse')

# 在目标域数据上进行训练
model.fit(X_data2_reshaped, y_data2, epochs=50, batch_size=32, verbose=1)

# 在目标域数据上进行预测和评估
y_pred_data2 = model.predict(X_data2_reshaped)

# 计算 MAE, RMSE, R²
mae_data2 = mean_absolute_error(y_data2, y_pred_data2)
rmse_data2 = np.sqrt(mean_squared_error(y_data2, y_pred_data2))
r2_data2 = r2_score(y_data2, y_pred_data2)

# 输出目标域数据上的 MAE、RMSE 和 R²
print(f'Mean Absolute Error on data2: {mae_data2}')
print(f'Root Mean Squared Error on data2: {rmse_data2}')
print(f'R² on data2: {r2_data2}')

# 显示目标域数据的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_data2)), y_data2, label='Actual Max Corrosion Rate', color='b', marker='o')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted Max Corrosion Rate', color='r', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Max Corrosion Rate for data2')
plt.legend()
plt.show()


In [None]:
#(2)CNN-LSTM-Transformer-S

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Dense, Conv1D, MaxPooling1D, LSTM, Flatten, Dropout,
    Input, MultiHeadAttention, LayerNormalization, Add, Lambda
)
import matplotlib.pyplot as plt

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# 读取数据
 # 源域数据
 # 目标域数据

# 特征和目标变量
X_data1 = data1.iloc[:, 1:].values
y_data1 = data1.iloc[:, 0].values
X_data2 = data2.iloc[:, 1:].values
y_data2 = data2.iloc[:, 0].values

# 数据标准化
scaler = StandardScaler()
X_data1_scaled = scaler.fit_transform(X_data1)
X_data2_scaled = scaler.transform(X_data2)

# Reshape 数据用于卷积层输入
input_shape = (X_data1_scaled.shape[1], 1)
X_data1_reshaped = X_data1_scaled.reshape((-1, input_shape[0], 1))
X_data2_reshaped = X_data2_scaled.reshape((-1, input_shape[0], 1))

# 构建 Transformer 块，用于特征重校准
def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    attention = Dropout(dropout)(attention)
    attention_out = Add()([inputs, attention])
    attention_out = LayerNormalization(epsilon=1e-6)(attention_out)

    # 前馈网络
    ff_out = Dense(inputs.shape[-1], activation='relu')(attention_out)
    ff_out = Dropout(dropout)(ff_out)
    ff_out = Add()([attention_out, ff_out])
    return LayerNormalization(epsilon=1e-6)(ff_out)

# 构建特征提取模型：CNN-LSTM + Transformer
def build_feature_extractor(input_shape):
    inputs = Input(shape=input_shape)

    # CNN部分
    x = Conv1D(filters=256, kernel_size=2, activation='relu', padding='same')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=128, kernel_size=2, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=64, kernel_size=2, activation='relu', padding='same')(x)

    # LSTM部分
    x = LSTM(50, activation='relu', return_sequences=False)(x)
    x = Dropout(0.5)(x)

    # 将 LSTM 输出通过 Transformer 进行重校准
    x = Dense(50, activation='relu')(x)  # 映射维度为 50
    x = Lambda(lambda x: tf.expand_dims(x, axis=1))(x)  # 使用 Lambda 层进行维度扩展
    x = transformer_block(x, head_size=64, num_heads=4, ff_dim=128)

    # Flatten 将 Transformer 输出拉平为 1D 向量
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)  # 映射到固定大小 256

    return Model(inputs, x, name="feature_extractor")

# 构建回归头（用于腐蚀率预测）
def build_regression_head():
    inputs = Input(shape=(256,))  # 与特征提取模型的输出匹配
    outputs = Dense(1)(inputs)  # 单输出回归任务
    return Model(inputs, outputs, name="regression_head")

# 创建完整模型：特征提取 + 回归预测
feature_extractor = build_feature_extractor(input_shape)
regression_head = build_regression_head()

# 输入层
input_tensor = Input(shape=input_shape)

# 提取特征并进行预测
features = feature_extractor(input_tensor)
prediction = regression_head(features)

# 构建完整模型
model = Model(inputs=input_tensor, outputs=prediction)

# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mse')

# K折交叉验证准备
kf = KFold(n_splits=5)
mse_list = []

# 在源域数据上进行预训练
for train_index, val_index in kf.split(X_data1_scaled):
    X_train, X_val = X_data1_reshaped[train_index], X_data1_reshaped[val_index]
    y_train, y_val = y_data1[train_index], y_data1[val_index]

    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_data=(X_val, y_val))
    y_pred = model.predict(X_val)
    mse = mean_squared_error(y_val, y_pred)
    mse_list.append(mse)

# 输出 K 折交叉验证的平均 MSE 和 RMSE
avg_mse = np.mean(mse_list)
avg_rmse = np.sqrt(avg_mse)
print(f'Cross-Validation MSE: {avg_mse}')
print(f'Cross-Validation RMSE: {avg_rmse}')

# 直接微调所有层
for layer in model.layers:
    layer.trainable = True

# 在目标域数据上进行微调
model.fit(X_data2_reshaped, y_data2, epochs=100, batch_size=32, verbose=1)

# 在目标域数据上进行预测和评估
y_pred_data2 = model.predict(X_data2_reshaped)

# 计算 MAE, RMSE, R²
mae_data2 = mean_absolute_error(y_data2, y_pred_data2)
rmse_data2 = np.sqrt(mean_squared_error(y_data2, y_pred_data2))
r2_data2 = r2_score(y_data2, y_pred_data2)

# 输出目标域数据上的 MAE、RMSE 和 R²
print(f'Mean Absolute Error on data2: {mae_data2}')
print(f'Root Mean Squared Error on data2: {rmse_data2}')
print(f'R² on data2: {r2_data2}')

# 显示目标域数据的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_data2)), y_data2, label='Actual Max Corrosion Rate', color='b', marker='o')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted Max Corrosion Rate', color='r', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Max Corrosion Rate for data2')
plt.legend()
plt.show()


In [None]:
#(3)CNN-LSTM-Transformer-Y

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Dense, Conv1D, MaxPooling1D, LSTM, Flatten, Dropout,
    Input, MultiHeadAttention, LayerNormalization, Add, Lambda
)
import matplotlib.pyplot as plt

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# 读取数据
# 源域数据
 # 目标域数据

# 特征和目标变量
X_data1 = data1.iloc[:, 1:].values
y_data1 = data1.iloc[:, 0].values
X_data2 = data2.iloc[:, 1:].values
y_data2 = data2.iloc[:, 0].values

# 数据标准化
scaler = StandardScaler()
X_data1_scaled = scaler.fit_transform(X_data1)
X_data2_scaled = scaler.transform(X_data2)

# Reshape 数据用于卷积层输入
input_shape = (X_data1_scaled.shape[1], 1)
X_data1_reshaped = X_data1_scaled.reshape((-1, input_shape[0], 1))
X_data2_reshaped = X_data2_scaled.reshape((-1, input_shape[0], 1))

# 构建 Transformer 块，用于特征重校准
def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    attention = Dropout(dropout)(attention)
    attention_out = Add()([inputs, attention])
    attention_out = LayerNormalization(epsilon=1e-6)(attention_out)

    # 前馈网络
    ff_out = Dense(inputs.shape[-1], activation='relu')(attention_out)
    ff_out = Dropout(dropout)(ff_out)
    ff_out = Add()([attention_out, ff_out])
    return LayerNormalization(epsilon=1e-6)(ff_out)

# 构建特征提取模型：CNN-LSTM + Transformer
def build_feature_extractor(input_shape):
    inputs = Input(shape=input_shape)

    # CNN部分
    x = Conv1D(filters=256, kernel_size=2, activation='relu', padding='same')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=128, kernel_size=2, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=64, kernel_size=2, activation='relu', padding='same')(x)

    # LSTM部分
    x = LSTM(50, activation='relu', return_sequences=False)(x)
    x = Dropout(0.5)(x)

    # 将 LSTM 输出通过 Transformer 进行重校准
    x = Dense(50, activation='relu')(x)  # 映射维度为 50
    x = Lambda(lambda x: tf.expand_dims(x, axis=1))(x)  # 使用 Lambda 层进行维度扩展
    x = transformer_block(x, head_size=64, num_heads=4, ff_dim=128)

    # Flatten 将 Transformer 输出拉平为 1D 向量
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)  # 映射到固定大小 256

    return Model(inputs, x, name="feature_extractor")

# 构建回归头（用于腐蚀率预测）
def build_regression_head():
    inputs = Input(shape=(256,))  # 与特征提取模型的输出匹配
    outputs = Dense(1)(inputs)  # 单输出回归任务
    return Model(inputs, outputs, name="regression_head")

# 创建完整模型：特征提取 + 回归预测
feature_extractor = build_feature_extractor(input_shape)
regression_head = build_regression_head()

# 输入层
input_tensor = Input(shape=input_shape)

# 提取特征并进行预测
features = feature_extractor(input_tensor)
prediction = regression_head(features)

# 构建完整模型
model = Model(inputs=input_tensor, outputs=prediction)

# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mse')

# K折交叉验证准备
kf = KFold(n_splits=5)
mse_list = []

# 在源域数据上进行预训练
for train_index, val_index in kf.split(X_data1_scaled):
    X_train, X_val = X_data1_reshaped[train_index], X_data1_reshaped[val_index]
    y_train, y_val = y_data1[train_index], y_data1[val_index]

    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_data=(X_val, y_val))
    y_pred = model.predict(X_val)
    mse = mean_squared_error(y_val, y_pred)
    mse_list.append(mse)

# 输出 K 折交叉验证的平均 MSE 和 RMSE
avg_mse = np.mean(mse_list)
avg_rmse = np.sqrt(avg_mse)
print(f'Cross-Validation MSE: {avg_mse}')
print(f'Cross-Validation RMSE: {avg_rmse}')

# 逐步微调策略     # 冻结所有层
for layer in feature_extractor.layers:
    layer.trainable = False

# 阶段1：微调CNN部分
for layer in feature_extractor.layers[:6]:  # 微调前6层（CNN部分）
    layer.trainable = True

model.fit(X_data2_reshaped, y_data2, epochs=100, batch_size=32, verbose=1)

# 阶段2：微调LSTM部分
for layer in feature_extractor.layers[6:8]:  # 解冻LSTM层
    layer.trainable = True

model.fit(X_data2_reshaped, y_data2, epochs=100, batch_size=32, verbose=1)

# 阶段3：微调Transformer和最后的Dense层
for layer in feature_extractor.layers[8:]:  # 解冻Transformer及后续层
    layer.trainable = True

model.fit(X_data2_reshaped, y_data2, epochs=100, batch_size=32, verbose=1)

# 在目标域数据上进行预测和评估
y_pred_data2 = model.predict(X_data2_reshaped)
# 计算 MAE, RMSE, R²
mae_data2 = mean_absolute_error(y_data2, y_pred_data2)
rmse_data2 = np.sqrt(mean_squared_error(y_data2, y_pred_data2))
r2_data2 = r2_score(y_data2, y_pred_data2)

# 输出目标域数据上的 MAE、RMSE 和 R²
print(f'Mean Absolute Error on data2: {mae_data2}')
print(f'Root Mean Squared Error on data2: {rmse_data2}')
print(f'R² on data2: {r2_data2}')

# 显示目标域数据的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_data2)), y_data2, label='Actual Max Corrosion Rate', color='b', marker='o')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted Max Corrosion Rate', color='r', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Max Corrosion Rate for data2')
plt.legend()
plt.show()

# 2.Comparison with Other TL Models

In [None]:
#(1)CNN-LSTM

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GRU, Flatten, Dropout
import matplotlib.pyplot as plt

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# Read data


# 特征和目标变量
X_data1 = data1.iloc[:, 1:].values
y_data1 = data1.iloc[:, 0].values
X_data2 = data2.iloc[:, 1:].values
y_data2 = data2.iloc[:, 0].values

# 数据标准化
scaler = StandardScaler()
X_data1_scaled = scaler.fit_transform(X_data1)
X_data2_scaled = scaler.transform(X_data2)

# 定义源域CNN-LSTM模型
def create_source_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),
        MaxPooling1D(pool_size=2, padding='same'),
        Conv1D(filters=128, kernel_size=2, activation='relu', padding='same'),
        MaxPooling1D(pool_size=2, padding='same'),
        Conv1D(filters=256, kernel_size=2, activation='relu', padding='same'),
        MaxPooling1D(pool_size=2, padding='same'),
        LSTM(50, activation='relu', return_sequences=True),
        Dropout(0.5),
        LSTM(50, activation='relu'),
        Flatten(),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# K折交叉验证准备
kf = KFold(n_splits=5)
mse_list = []
rmse_list = []
input_shape = (X_data1_scaled.shape[1], 1)
X_data1_reshaped = X_data1_scaled.reshape((-1, input_shape[0], 1))

# 在源域数据上进行预训练
for train_index, val_index in kf.split(X_data1_scaled):
    X_train, X_val = X_data1_reshaped[train_index], X_data1_reshaped[val_index]
    y_train, y_val = y_data1[train_index], y_data1[val_index]

    source_model = create_source_model(input_shape)
    source_model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=0, validation_data=(X_val, y_val))
    y_pred = source_model.predict(X_val)
    mse = mean_squared_error(y_val, y_pred)
    rmse = np.sqrt(mse)
    mse_list.append(mse)
    rmse_list.append(rmse)

# 输出K折交叉验证的平均MSE和RMSE
avg_mse = np.mean(mse_list)
avg_rmse = np.mean(rmse_list)
print(f'Cross-Validation MSE: {avg_mse}')
print(f'Cross-Validation RMSE: {avg_rmse}')

# 使用在源域数据上训练好的模型，在目标域数据上进行微调
X_data2_reshaped = X_data2_scaled.reshape((-1, input_shape[0], 1))
source_model.fit(X_data2_reshaped, y_data2, epochs=200, batch_size=32, verbose=0)  # 在目标域数据上进行微调

# 在目标域数据上进行预测和评估
y_pred_data2 = source_model.predict(X_data2_reshaped)

# 计算 MAE, RMSE, R²
mae_data2 = mean_absolute_error(y_data2, y_pred_data2)
rmse_data2 = np.sqrt(mean_squared_error(y_data2, y_pred_data2))
r2_data2 = r2_score(y_data2, y_pred_data2)

# 输出目标域数据上的 MAE、RMSE 和 R²
print(f'Mean Absolute Error on data2: {mae_data2}')
print(f'Root Mean Squared Error on data2: {rmse_data2}')
print(f'R² on data2: {r2_data2}')

# 显示目标域数据的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_data2)), y_data2, label='Actual Max Corrosion Rate', color='b', marker='o')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted Max Corrosion Rate', color='r', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Max Corrosion Rate for data2')
plt.legend()
plt.show()

In [None]:
#(2)CNN-BiLSTM

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GRU, Flatten, Dropout, LSTM, Bidirectional
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# 记录训练开始时间
start_time = time.time()

# 读取数据


# 特征和目标变量
X_data1 = data1.iloc[:, 1:].values  # 源域特征数据
y_data1 = data1.iloc[:, 0].values  # 源域目标变量
X_data2 = data2.iloc[:, 1:].values  # 目标域特征数据
y_data2 = data2.iloc[:, 0].values  # 目标域目标变量

X_train_data2, X_test_data2, y_train_data2, y_test_data2 = train_test_split(X_data2, y_data2, test_size=0.3, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_data1_scaled = scaler.fit_transform(X_data1)
X_train_data2_scaled = scaler.transform(X_train_data2)
X_test_data2_scaled = scaler.transform( X_test_data2)

# Reshape 数据用于卷积层输入
input_shape = (X_data1_scaled.shape[1], 1)
X_data1_reshaped = X_data1_scaled.reshape((-1, input_shape[0], 1))
X_train_data2_reshaped = X_train_data2_scaled.reshape((-1, input_shape[0], 1))
X_test_data2_reshaped = X_test_data2_scaled.reshape((-1, input_shape[0], 1))

# 定义源域CNN-LSTM模型
def create_source_model(input_shape):
    model = Sequential([
        Conv1D(filters=256, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Conv1D(filters=128, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Conv1D(filters=64, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Bidirectional(LSTM(50, activation='relu', return_sequences=True)),  # 第一层双向LSTM
        Dropout(0.5),  # Dropout层用于防止过拟合
        Bidirectional(LSTM(50, activation='relu')),  # 第二层双向LSTM
        Flatten(),  # 展平层
        Dense(1)  # 全连接层用于输出预测值
    ])
    model.compile(optimizer='adam', loss='mse')  # 使用Adam优化器，损失函数为均方误差
    return model

# K折交叉验证准备（源域）
kf = KFold(n_splits=5)  # 初始化KFold，设置5折交叉验证
mse_list = []  # 用于存储每折的MSE
rmse_list = []  # 用于存储每折的RMSE

# 在源域数据上进行预训练
for train_index, val_index in kf.split(X_data1_scaled):  # 进行5折交叉验证
    X_train, X_val = X_data1_reshaped[train_index], X_data1_reshaped[val_index]  # 获取训练集和验证集
    y_train, y_val = y_data1[train_index], y_data1[val_index]  # 获取训练集和验证集的标签

    source_model = create_source_model(input_shape)  # 创建源域模型
    source_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_val, y_val))  # 在源域数据上训练模型
    y_pred = source_model.predict(X_val)  # 对验证集进行预测
    mse = mean_squared_error(y_val, y_pred)  # 计算MSE
    rmse = np.sqrt(mse)  # 计算RMSE
    mse_list.append(mse)  # 将MSE添加到列表中
    rmse_list.append(rmse)  # 将RMSE添加到列表中

# 输出K折交叉验证的平均MSE和RMSE（源域）
avg_mse = np.mean(mse_list)  # 计算平均MSE
avg_rmse = np.mean(rmse_list)  # 计算平均RMSE
print(f'Cross-Validation MSE: {avg_mse}')  # 输出平均MSE
print(f'Cross-Validation RMSE: {avg_rmse}')  # 输出平均RMSE

source_model.fit(X_train_data2_reshaped,y_train_data2, epochs=200, batch_size=32, verbose=1)

# 在目标域数据上进行预测和评估
y_pred_data2 = source_model.predict(X_test_data2_reshaped)

# 计算评估指标
mae = mean_absolute_error(y_test_data2, y_pred_data2)
rmse = np.sqrt(mean_squared_error(y_test_data2, y_pred_data2))
r2 = r2_score(y_test_data2, y_pred_data2)

# 新增指标计算
mape = np.mean(np.abs((y_test_data2 - y_pred_data2) / y_test_data2)) * 100  # 平均绝对百分比误差（%）
rmspe = np.sqrt(np.mean(((y_test_data2 - y_pred_data2) / y_test_data2) ** 2)) * 100  # 均方根百分比误差（%）

# 记录训练结束时间
end_time = time.time()
training_time = end_time - start_time  # 计算训练耗时

# 整理结果为表格形式
results = {
    '指标': ['MAE', 'RMSE', 'R²', 'MAPE(%)', 'RMSPE(%)', '耗时(s)'],
    '值': [mae, rmse, r2, mape, rmspe, training_time]
}
results_df = pd.DataFrame(results)


# 打印结果表格
print("\n评估结果：")
print(results_df.to_string(index=False))

# 显示目标域测试集的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_test_data2)), y_test_data2, label='Actual', color='b', marker='o', linestyle='-')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted', color='r', marker='x', linestyle='--')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Values')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
#(3)CNN-GRU

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GRU, Flatten, Dropout, LSTM, Bidirectional
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# 记录训练开始时间
start_time = time.time()

# 读取数据


# 特征和目标变量
X_data1 = data1.iloc[:, 1:].values  # 源域特征数据
y_data1 = data1.iloc[:, 0].values  # 源域目标变量
X_data2 = data2.iloc[:, 1:].values  # 目标域特征数据
y_data2 = data2.iloc[:, 0].values  # 目标域目标变量

X_train_data2, X_test_data2, y_train_data2, y_test_data2 = train_test_split(X_data2, y_data2, test_size=0.3, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_data1_scaled = scaler.fit_transform(X_data1)
X_train_data2_scaled = scaler.transform(X_train_data2)
X_test_data2_scaled = scaler.transform( X_test_data2)

# Reshape 数据用于卷积层输入
input_shape = (X_data1_scaled.shape[1], 1)
X_data1_reshaped = X_data1_scaled.reshape((-1, input_shape[0], 1))
X_train_data2_reshaped = X_train_data2_scaled.reshape((-1, input_shape[0], 1))
X_test_data2_reshaped = X_test_data2_scaled.reshape((-1, input_shape[0], 1))

# 定义源域CNN-LSTM模型
def create_source_model(input_shape):
    model = Sequential([
        Conv1D(filters=256, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Conv1D(filters=128, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Conv1D(filters=64, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        GRU(50, activation='relu', return_sequences=True),  # 第一层GRU
        Dropout(0.5),  # Dropout层用于防止过拟合
        GRU(50, activation='relu'),  # 第二层GRU
        Flatten(),  # 展平层
        Dense(1)  # 全连接层用于输出预测值
    ])
    model.compile(optimizer='adam', loss='mse')  # 使用Adam优化器，损失函数为均方误差
    return model

# K折交叉验证准备（源域）
kf = KFold(n_splits=5)  # 初始化KFold，设置5折交叉验证
mse_list = []  # 用于存储每折的MSE
rmse_list = []  # 用于存储每折的RMSE

# 在源域数据上进行预训练
for train_index, val_index in kf.split(X_data1_scaled):  # 进行5折交叉验证
    X_train, X_val = X_data1_reshaped[train_index], X_data1_reshaped[val_index]  # 获取训练集和验证集
    y_train, y_val = y_data1[train_index], y_data1[val_index]  # 获取训练集和验证集的标签

    source_model = create_source_model(input_shape)  # 创建源域模型
    source_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_val, y_val))  # 在源域数据上训练模型
    y_pred = source_model.predict(X_val)  # 对验证集进行预测
    mse = mean_squared_error(y_val, y_pred)  # 计算MSE
    rmse = np.sqrt(mse)  # 计算RMSE
    mse_list.append(mse)  # 将MSE添加到列表中
    rmse_list.append(rmse)  # 将RMSE添加到列表中

# 输出K折交叉验证的平均MSE和RMSE（源域）
avg_mse = np.mean(mse_list)  # 计算平均MSE
avg_rmse = np.mean(rmse_list)  # 计算平均RMSE
print(f'Cross-Validation MSE: {avg_mse}')  # 输出平均MSE
print(f'Cross-Validation RMSE: {avg_rmse}')  # 输出平均RMSE

source_model.fit(X_train_data2_reshaped,y_train_data2, epochs=200, batch_size=32, verbose=1)

# 在目标域数据上进行预测和评估
y_pred_data2 = source_model.predict(X_test_data2_reshaped)

# 计算评估指标
mae = mean_absolute_error(y_test_data2, y_pred_data2)
rmse = np.sqrt(mean_squared_error(y_test_data2, y_pred_data2))
r2 = r2_score(y_test_data2, y_pred_data2)

# 新增指标计算
mape = np.mean(np.abs((y_test_data2 - y_pred_data2) / y_test_data2)) * 100  # 平均绝对百分比误差（%）
rmspe = np.sqrt(np.mean(((y_test_data2 - y_pred_data2) / y_test_data2) ** 2)) * 100  # 均方根百分比误差（%）

# 记录训练结束时间
end_time = time.time()
training_time = end_time - start_time  # 计算训练耗时

# 整理结果为表格形式
results = {
    '指标': ['MAE', 'RMSE', 'R²', 'MAPE(%)', 'RMSPE(%)', '耗时(s)'],
    '值': [mae, rmse, r2, mape, rmspe, training_time]
}
results_df = pd.DataFrame(results)


# 打印结果表格
print("\n评估结果：")
print(results_df.to_string(index=False))

# 显示目标域测试集的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_test_data2)), y_test_data2, label='Actual', color='b', marker='o', linestyle='-')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted', color='r', marker='x', linestyle='--')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Values')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
#(4)CNN-BiGRU

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GRU, Flatten, Dropout, LSTM, Bidirectional
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split

# 设置随机种子以保证结果可复现
np.random.seed(42)
tf.random.set_seed(42)

# 记录训练开始时间
start_time = time.time()

# 读取数据


# 特征和目标变量
X_data1 = data1.iloc[:, 1:].values  # 源域特征数据
y_data1 = data1.iloc[:, 0].values  # 源域目标变量
X_data2 = data2.iloc[:, 1:].values  # 目标域特征数据
y_data2 = data2.iloc[:, 0].values  # 目标域目标变量

X_train_data2, X_test_data2, y_train_data2, y_test_data2 = train_test_split(X_data2, y_data2, test_size=0.3, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_data1_scaled = scaler.fit_transform(X_data1)
X_train_data2_scaled = scaler.transform(X_train_data2)
X_test_data2_scaled = scaler.transform( X_test_data2)

# Reshape 数据用于卷积层输入
input_shape = (X_data1_scaled.shape[1], 1)
X_data1_reshaped = X_data1_scaled.reshape((-1, input_shape[0], 1))
X_train_data2_reshaped = X_train_data2_scaled.reshape((-1, input_shape[0], 1))
X_test_data2_reshaped = X_test_data2_scaled.reshape((-1, input_shape[0], 1))

# 定义源域CNN-LSTM模型
def create_source_model(input_shape):
    model = Sequential([
        Conv1D(filters=256, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Conv1D(filters=128, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Conv1D(filters=64, kernel_size=2, activation='relu', padding='same', input_shape=input_shape),  # 1D卷积层
        MaxPooling1D(pool_size=2, padding='same'),  # 最大池化层
        Bidirectional(GRU(50, activation='relu', return_sequences=True)),  # 第一层双向GRU
        Dropout(0.5),  # Dropout层用于防止过拟合
        Bidirectional(GRU(50, activation='relu')),  # 第二层双向GRU
        Flatten(),  # 展平层
        Dense(1)  # 全连接层用于输出预测值
    ])
    model.compile(optimizer='adam', loss='mse')  # 使用Adam优化器，损失函数为均方误差
    return model

# K折交叉验证准备（源域）
kf = KFold(n_splits=5)  # 初始化KFold，设置5折交叉验证
mse_list = []  # 用于存储每折的MSE
rmse_list = []  # 用于存储每折的RMSE

# 在源域数据上进行预训练
for train_index, val_index in kf.split(X_data1_scaled):  # 进行5折交叉验证
    X_train, X_val = X_data1_reshaped[train_index], X_data1_reshaped[val_index]  # 获取训练集和验证集
    y_train, y_val = y_data1[train_index], y_data1[val_index]  # 获取训练集和验证集的标签

    source_model = create_source_model(input_shape)  # 创建源域模型
    source_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_val, y_val))  # 在源域数据上训练模型
    y_pred = source_model.predict(X_val)  # 对验证集进行预测
    mse = mean_squared_error(y_val, y_pred)  # 计算MSE
    rmse = np.sqrt(mse)  # 计算RMSE
    mse_list.append(mse)  # 将MSE添加到列表中
    rmse_list.append(rmse)  # 将RMSE添加到列表中

# 输出K折交叉验证的平均MSE和RMSE（源域）
avg_mse = np.mean(mse_list)  # 计算平均MSE
avg_rmse = np.mean(rmse_list)  # 计算平均RMSE
print(f'Cross-Validation MSE: {avg_mse}')  # 输出平均MSE
print(f'Cross-Validation RMSE: {avg_rmse}')  # 输出平均RMSE

source_model.fit(X_train_data2_reshaped,y_train_data2, epochs=200, batch_size=32, verbose=1)

# 在目标域数据上进行预测和评估
y_pred_data2 = source_model.predict(X_test_data2_reshaped)

# 计算评估指标
mae = mean_absolute_error(y_test_data2, y_pred_data2)
rmse = np.sqrt(mean_squared_error(y_test_data2, y_pred_data2))
r2 = r2_score(y_test_data2, y_pred_data2)

# 新增指标计算
mape = np.mean(np.abs((y_test_data2 - y_pred_data2) / y_test_data2)) * 100  # 平均绝对百分比误差（%）
rmspe = np.sqrt(np.mean(((y_test_data2 - y_pred_data2) / y_test_data2) ** 2)) * 100  # 均方根百分比误差（%）

# 记录训练结束时间
end_time = time.time()
training_time = end_time - start_time  # 计算训练耗时

# 整理结果为表格形式
results = {
    '指标': ['MAE', 'RMSE', 'R²', 'MAPE(%)', 'RMSPE(%)', '耗时(s)'],
    '值': [mae, rmse, r2, mape, rmspe, training_time]
}
results_df = pd.DataFrame(results)


# 打印结果表格
print("\n评估结果：")
print(results_df.to_string(index=False))

# 显示目标域测试集的实际值与预测值的折线图
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_test_data2)), y_test_data2, label='Actual', color='b', marker='o', linestyle='-')
plt.plot(range(len(y_pred_data2)), y_pred_data2, label='Predicted', color='r', marker='x', linestyle='--')
plt.xlabel('Sample Index')
plt.ylabel('Max Corrosion Rate')
plt.title('Actual vs Predicted Values')
plt.legend()
plt.grid(True)
plt.show()