In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder

# 文件夹和对应的标签
folders = ['Normal', 'AddWeight', 'PressureGain_constant', 'PropellerDamage_bad', 'PropellerDamage_slight']
labels = np.arange(len(folders))

# 初始化数据和标签列表
data_list = []
label_list = []

# 遍历每个文件夹
for label, folder in zip(labels, folders):
    # 获取当前文件夹中的所有文件名
    files = os.listdir(folder)
    # 遍历每个文件
    for file in files:
        # 读取数据（包含标题行）
        file_path = os.path.join(folder, file)
        columns_to_read = range(1, 17)  # 从第二列开始读取，直到第17列
        data = pd.read_csv(file_path, header=0, nrows=180, usecols=columns_to_read)
        # 确保数据包含180行
        if data.shape[0] == 180:
            # 将数据添加到列表中
            data_list.append(data)
            label_list.append(label)
        else:
            print(f'Error: {file} has {data.shape[0]} rows.')

# 转换数据为NumPy数组
# 将数据列表转换为NumPy数组
data_array = np.array(data_list)

# 重塑数组以便每行代表一个样本，每列代表一个特征
reshaped_data_array = data_array.reshape(-1, 16)

# 找出每列的全局最大值和最小值
global_min = reshaped_data_array.min(axis=0)
global_max = reshaped_data_array.max(axis=0)
print(f'Global min: {global_min}')
# 使用全局最大值和最小值进行归一化
normalized_data_array = (reshaped_data_array - global_min) / (global_max - global_min)

# 重新将数据重塑为原始形状
normalized_data_array = normalized_data_array.reshape(-1, 180, 16)

# 其余代码保持不变


# 转换标签为NumPy数组，并进行独热编码
label_array = np.array(label_list).reshape(-1, 1)
# encoder = OneHotEncoder(sparse=False)
# encoded_labels = encoder.fit_transform(label_array)

# 保存数据和标签到文件
np.savez('trainset_normalized_no.npz', data=normalized_data_array, labels=label_array)

# 输出确认
print('Normalized train set and labels saved as trainset_normalized.npz')


Error: Normal_130.csv has 164 rows.
Error: Normal_73.csv has 136 rows.
Error: Normal_8.csv has 172 rows.
Error: PropellerDamage_bad_166.csv has 146 rows.
Error: PropellerDamage_slight_130.csv has 146 rows.
Global min: [ 1.0000e+03  1.0000e+03  1.0000e+03  1.0000e+03 -1.2000e-01  5.8400e+02
  1.1430e+01 -1.6080e+01 -1.7210e+01 -1.8001e+02 -1.9600e+01 -1.8730e+01
 -1.7330e+01 -9.7500e+01 -7.2400e+01 -1.1039e+03]
Normalized train set and labels saved as trainset_normalized.npz
