In [None]:
import h5py

def print_h5_structure(file_path):
    with h5py.File(file_path, 'r') as f:
        def print_name(name, obj):
            if isinstance(obj, h5py.Group):
                print(f"Group: {name}")
            elif isinstance(obj, h5py.Dataset):
                print(f"Dataset: {name}, Shape: {obj.shape}")
        f.visititems(print_name)

print_h5_structure('/mnt/d/Research/PHD/DLEPS/data/zinc_vae_grammar_L56_E100_val.hdf5')


In [None]:
print_h5_structure('/mnt/d/Research/PHD/DLEPS/data/adjusted_weights.hdf5')

# adjust_weights.py

In [1]:
import h5py
import numpy as np

def convert_conv1d_weights(weights):
    if len(weights.shape) == 4:
        # 旧的 Conv1D 权重形状: (kernel_size, 1, input_dim, filters)
        # 新的 Conv1D 权重形状: (kernel_size, input_dim, filters)
        weights = np.squeeze(weights, axis=1)
    return weights

def adjust_weights(weights_file, adjusted_weights_file):
    with h5py.File(weights_file, 'r') as f_in, h5py.File(adjusted_weights_file, 'w') as f_out:
        # 获取 'model_weights' 组
        model_weights_group = f_in.get('model_weights')
        if model_weights_group is None:
            print("No 'model_weights' group found in the weights file.")
            return
        
        # 遍历 'model_weights' 组中的所有子组（即各层）
        for layer_name in model_weights_group:
            if layer_name.startswith('conv'):
                # 处理 Conv1D 层
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    if '_W_' in dataset_name:
                        print(f"Adjusting weights for: {layer_name}/{dataset_name}")
                        data = convert_conv1d_weights(data)
                        new_weight_name = 'kernel:0'
                    elif '_b_' in dataset_name:
                        new_weight_name = 'bias:0'
                    else:
                        new_weight_name = dataset_name
                    layer_group_out.create_dataset(new_weight_name, data=data)
            elif layer_name.startswith('dense'):
                # 处理 Dense 层
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    if '_W_' in dataset_name:
                        new_weight_name = 'kernel:0'
                    elif '_b_' in dataset_name:
                        new_weight_name = 'bias:0'
                    else:
                        new_weight_name = dataset_name
                    layer_group_out.create_dataset(new_weight_name, data=data)
            elif layer_name.startswith('gru'):
                # 处理 GRU 层
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    if '_W_' in dataset_name:
                        new_weight_name = 'kernel:0'
                        layer_group_out.create_dataset(new_weight_name, data=data)
                    elif '_U_' in dataset_name:
                        new_weight_name = 'recurrent_kernel:0'
                        layer_group_out.create_dataset(new_weight_name, data=data)
                    elif '_b_' in dataset_name:
                        new_weight_name = 'bias:0'
                        layer_group_out.create_dataset(new_weight_name, data=data)
                    else:
                        new_weight_name = dataset_name
                        layer_group_out.create_dataset(new_weight_name, data=data)
            elif layer_name.startswith('latent_input'):
                # 处理 latent_input 层
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    if '_W_' in dataset_name:
                        new_weight_name = 'kernel:0'
                    elif '_b_' in dataset_name:
                        new_weight_name = 'bias:0'
                    else:
                        new_weight_name = dataset_name
                    layer_group_out.create_dataset(new_weight_name, data=data)
            elif layer_name.startswith('z_mean') or layer_name.startswith('z_log_var'):
                # 处理 z_mean 和 z_log_var 层
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    if '_W_' in dataset_name:
                        new_weight_name = 'kernel:0'
                    elif '_b_' in dataset_name:
                        new_weight_name = 'bias:0'
                    else:
                        new_weight_name = dataset_name
                    layer_group_out.create_dataset(new_weight_name, data=data)
            elif layer_name.startswith('decoded_mean'):
                # 处理 decoded_mean 层
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    if '_W_' in dataset_name:
                        new_weight_name = 'kernel:0'
                    elif '_b_' in dataset_name:
                        new_weight_name = 'bias:0'
                    else:
                        new_weight_name = dataset_name
                    layer_group_out.create_dataset(new_weight_name, data=data)
            elif layer_name.startswith('flatten'):
                # 处理 flatten 层（无权重）
                layer_group_in = model_weights_group[layer_name]
                f_out.create_group(layer_name)
            elif layer_name.startswith('repeat_vector'):
                # 处理 repeat_vector 层（无权重）
                layer_group_in = model_weights_group[layer_name]
                f_out.create_group(layer_name)
            elif layer_name.startswith('lambda'):
                # 处理 lambda 层（无权重）
                layer_group_in = model_weights_group[layer_name]
                f_out.create_group(layer_name)
            else:
                # 其他层按原样复制
                layer_group_in = model_weights_group[layer_name]
                layer_group_out = f_out.create_group(layer_name)
                for dataset_name in layer_group_in:
                    data = layer_group_in[dataset_name][()]
                    layer_group_out.create_dataset(dataset_name, data=data)
                
        # 注意：optimizer_weights 组已被忽略
        print("Weights adjustment completed.")

# 使用示例：
adjust_weights(
    '/mnt/d/Research/PHD/DLEPS/data/zinc_vae_grammar_L56_E100_val.hdf5', 
    '/mnt/d/Research/PHD/DLEPS/data/adjusted_weights.hdf5'
)


Adjusting weights for: conv_1/conv_1_W_1:0
Adjusting weights for: conv_2/conv_2_W_1:0
Adjusting weights for: conv_3/conv_3_W_1:0


ValueError: Unable to synchronously create dataset (name already exists)