In [3]:
import pandas as pd

# 文件路径
file_path = "/home/develop/VAE/data/csv/final.csv"

# 读取CSV文件并打印列名
data = pd.read_csv(file_path)
print(data.columns)

Index(['sensible_heat', 'latent_heat', 'ground_temperature', 'rain_energy',
       'outgoing_long_wave_radiation', 'incoming_long_wave_radiation',
       'net_long_wave_radiation', 'reflected_short_wave_radiation',
       'incoming_short_wave_radiation', 'net_short_wave_radiation',
       'parametrized_albedo', 'incoming_short_wave_on_horizontal',
       'direct_incoming_short_wave', 'diffuse_incoming_short_wave',
       'air_temperature', 'surface_temperature(mod)', 'bottom_temperature',
       'relative_humidity', 'wind_velocity', 'wind_velocity_drift',
       'wind_direction', 'solid_precipitation_rate', 'snow_height(mod)',
       'snow_height(meas)', 'hoar_size', '24h_height_of_new_snow',
       '3d_sum_of_daily_height_of_new_snow', 'snow_water_equivalent',
       'total_amount_of_water', 'rain_rate', 'virtual_lysimeter',
       'sublimation_mass', 'evaporated_mass', 'stability_class', 'z_Sdef',
       'deformation_rate_stability_index', 'z_Sn38', 'natural_stability_index',
       

In [4]:
import pandas as pd

# 文件路径
input_file = "/home/develop/VAE/data/csv/final.csv"
output_file = "/home/develop/VAE/data/csv/final_temp.csv"

# 温度相关列
temperature_columns = [
    "ground_temperature",
    "surface_temperature(mod)",
    "air_temperature",
    "bottom_temperature"
]

# 读取CSV文件并筛选温度相关列
try:
    data = pd.read_csv(input_file)
    temp_data = data[temperature_columns]
    temp_data.to_csv(output_file, index=False)
    print(f"温度相关列已保存到文件: {output_file}")
except Exception as e:
    print(f"处理文件时出错: {e}")


处理文件时出错: "['surface_temperature(meas)'] not in index"


In [1]:
import pandas as pd
data = pd.read_csv('/home/develop/VAE/data/csv/final_temp.csv')
x,y=data.shape
print(f"文件行数{x},列数{y}")

文件行数1186,列数4


In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model
import matplotlib.pyplot as plt
from datetime import datetime

# 数据读取
data = pd.read_csv('/home/develop/VAE/data/csv/final_temp.csv')
X = data.values

# 数据集划分
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]

input_dim = X.shape[1] 

# 超参数网格
latent_dims = [1, 3, 5, 7]  # 潜在空间维度
batch_sizes = [8, 16, 32]
epochs_list = [10, 20, 30]
kl_loss_weight = 1.0  

save_dir = "/home/develop/VAE/Result/Temp_pic/"
os.makedirs(save_dir, exist_ok=True)
log_file = "/home/develop/VAE/Result/Temp_train.log" 

def build_vae(input_dim, latent_dim):
    # 编码器
    input_layer = layers.Input(shape=(input_dim,))
    encoder = layers.Dense(16, activation="relu")(input_layer)
    encoder = layers.Dense(8, activation="relu")(encoder)
    z_mean = layers.Dense(latent_dim)(encoder)
    z_log_var = layers.Dense(latent_dim)(encoder)

    # 采样层
    def sampling(args):
        z_mean, z_log_var = args
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

    z = layers.Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

    # 解码器
    decoder = layers.Dense(8, activation="relu")(z)
    decoder = layers.Dense(16, activation="relu")(decoder)
    decoder_output = layers.Dense(input_dim, activation="sigmoid")(decoder)

    vae = Model(input_layer, [decoder_output, z_mean, z_log_var])

    def vae_loss(y_true, y_pred):
        decoder_output = y_pred[0]
        z_mean = y_pred[1]
        z_log_var = y_pred[2]
        reconstruction_loss_fn = tf.keras.losses.MeanSquaredError()
        reconstruction_loss = reconstruction_loss_fn(y_true, decoder_output)
        kl_loss = -0.5 * tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
        return reconstruction_loss + kl_loss_weight * tf.reduce_mean(kl_loss)

    return vae, vae_loss

# 开始训练和记录日志
with open(log_file, "w") as log:
    log.write("Training Log\n")
    log.write("Parameters: latent_dim, batch_size, epochs\n")
    log.write("Results: reconstruction_error_threshold, anomalies_detected, training_time\n")
    log.write("-" * 80 + "\n")

    for latent_dim in latent_dims:
        for batch_size in batch_sizes:
            for epochs in epochs_list:
                # 构建模型
                vae, vae_loss = build_vae(input_dim, latent_dim)
                vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss=vae_loss)

                # 训练模型
                start_time = datetime.now()
                history = vae.fit(
                    X_train, X_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_test, X_test),
                    verbose=0
                )
                training_time = datetime.now() - start_time

                # 绘制训练损失
                plt.figure()
                plt.plot(history.history["loss"], label="Train Loss")
                plt.plot(history.history["val_loss"], label="Validation Loss")
                plt.xlabel("Epoch")
                plt.ylabel("Loss")
                plt.legend()
                plt.title(f"Loss (latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs})")
                plt.savefig(f"{save_dir}loss_latent{latent_dim}_batch{batch_size}_epochs{epochs}.png")
                plt.close()

                # 异常检测
                X_pred = vae.predict(X_test, verbose=0)[0]
                reconstruction_error = np.mean(np.square(X_test - X_pred), axis=1)
                threshold = np.percentile(reconstruction_error, 95)
                anomalies = reconstruction_error > threshold

                # 绘制重构误差分布
                plt.figure()
                plt.hist(reconstruction_error, bins=50)
                plt.xlabel("Reconstruction Error")
                plt.ylabel("Number of Samples")
                plt.title(f"Error Dist. (latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs})")
                plt.savefig(f"{save_dir}error_dist_latent{latent_dim}_batch{batch_size}_epochs{epochs}.png")
                plt.close()
                log.write(f"latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs}\n")
                log.write(f"reconstruction_error_threshold={threshold:.4f}, anomalies_detected={np.sum(anomalies)}, training_time={training_time}\n")
                log.write("-" * 80 + "\n")
                print(f"Params: latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs}")
                print(f"Reconstruction Error Threshold: {threshold:.4f}")
                print(f"Anomalies detected: {np.sum(anomalies)}")
                print(f"Training Time: {training_time}\n") 

Filtered data shape: (1186, 4)


OperatorNotAllowedInGraphError: in user code:

    File "/tmp/ipykernel_794056/3041667768.py", line 59, in vae_loss  *
        decoder_output, z_mean, z_log_var = y_pred

    OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed. You can attempt the following resolutions to the problem: If you are running in Graph mode, use Eager execution mode or decorate this function with @tf.function. If you are using AutoGraph, you can try decorating this function with @tf.function. If that does not work, then you may be using an unsupported feature or your source code may not be visible to AutoGraph. See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md#access-to-source-code for more information.
