In [5]:
import os
import numpy as np
from scipy.io import loadmat

# 指定文件夹路径
download_save_path = 'E:/Dataset/wind_shear/Data_Download'
exception_save_path = '../Dataset/Exception_Data'
plt_save_path = '../result/WSHR/figures'

# 获取文件夹下的所有文件名称
download_folder_names = [item for item in os.listdir(download_save_path) if os.path.isdir(os.path.join(download_save_path, item))]
exception_folder_names = [item for item in os.listdir(exception_save_path) if os.path.isdir(os.path.join(exception_save_path, item))]
instruction_folder_names = ["@Instructions"]

# 生成所有文件夹路径
download_folder_paths = [os.path.join(download_save_path, item) for item in download_folder_names]
exception_folder_paths = [os.path.join(exception_save_path, item) for item in exception_folder_names]

In [2]:
# give the preset classification of determined variables
group_names_list = ["mechanism", "power", "control", "external", "recorder", "unclassified"]
group_lens_dict = {}

var_group_mechanism = ["AIL_1", "AIL_2", "FLAP", "ELEV_1", "ELEV_2", "RUDD", "SPL_1", "SPL_2", "SPLG", "ABRK", "NSQT",
                       "AOA1", "AOA2", "GLS", "PTCH", "ROLL", "TH", 
                       "TAS", "CASM", "GS",
                       "VRTG", "LATG", "LONG", "FPAC", "CTAC"]
var_group_power = [
                   "FADS", 
                   
                   "FQTY_1", "FQTY_2", "FQTY_3", "FQTY_4", "OIT_1", "OIT_2", "OIT_3", "OIT_4", "OIPL",
                   "LGDN"]
var_group_control = ["HDGS", "PTRM", 
                     "RUDP", "CCPC", "CCPF", "CWPC", "CWPF",
                     "SNAP", 
                     "GPWS", "SHKR", "FADF"]
var_group_external = ["ALT", "WS", "WD", "PT", "TAT",
                      "LOC"]
var_group_recorder = ["PH"
                     ]
var_group_unclassified = ["EVNT", "HF1", "HF2", "VHF1", "VHF2", "VHF3", "SMKB"]

var_groups_dict = {"mechanism": var_group_mechanism, "power": var_group_power, "control": var_group_control, "external": var_group_external, "recorder": var_group_recorder, "unclassified": var_group_unclassified}
for group_name, var_group in var_groups_dict.items():
    group_lens_dict[group_name] = len(var_group)
    print(f"{group_name}: {len(var_group)}")
print(f"\n{sum(group_lens_dict.values())} variables in total")

mechanism: 25
power: 11
control: 11
external: 6
recorder: 1
unclassified: 7

61 variables in total


In [8]:
'''
检查所有预选变量在原始数据中的完整性，筛选出能保证全变量完整的子数据集作为MLP的训练集和测试集；
以训练的MLP对原始数据进行数据补正，以供之后通过随机森林在WSHR的二分类问题上对特征变量进行再次评估，以迭代出新的预选变量集
'''

# 检查预选变量数据完整性并生成工作子集
def check_var_integrity(mat, mat_name):
    for group_name, var_group in var_groups_dict.items():
        for var_name in var_group:
            if np.mean(mat[var_name][0][0][0]) == 0 and np.var(mat[var_name][0][0][0]) == 0:
                # print(f"{var_name} in {group_name} is empty")
                # print(f"Jump mat {mat_name}")
                return False
    print(f"Receive mat {mat_name}")
    return True

work_dict = {}
# for exception_folder_path in exception_folder_paths:
#     for exception_mat_name in os.listdir(exception_folder_path):
#         e_mat = loadmat(os.path.join(exception_folder_path, exception_mat_name))
#         if check_var_integrity(e_mat, exception_mat_name):
#             work_dict[exception_mat_name] = e_mat
'''
Received matname: ['653200212291431.mat', '660200210081101.mat','686200105101854.mat']
'''

for download_folder_path in download_folder_paths:
    for download_mat_name in os.listdir(download_folder_path):
        d_mat = loadmat(os.path.join(download_folder_path, download_mat_name))
        if check_var_integrity(d_mat, download_mat_name):
            work_dict[download_mat_name] = d_mat
'''
Received matname: ['653200212291431.mat', '660200210081101.mat','686200105101854.mat']
'''

np.save("../result/variable_evaluate/work_dict.npy", work_dict)

Receive mat 653200212291431.mat
Receive mat 660200210081101.mat
Receive mat 686200105101854.mat


In [14]:
# 统计存在数据缺失的所有变量，设置为待补完变量
invalid_var_dict = {}
for group_name, var_group in var_groups_dict.items():
    invalid_var_list = []
    for var_name in var_group:
        print(f"Checking {var_name} in {group_name} ...")
        break_flag = False
        # for download_folder_path in download_folder_paths:
        #     print(f"Arrive at folder {download_folder_path.split('/')[-1]}")
        #     for download_mat_name in os.listdir(download_folder_path):
        #         d_mat = loadmat(os.path.join(download_folder_path, download_mat_name))
        #         if np.mean(d_mat[var_name][0][0][0]) == 0 and np.var(d_mat[var_name][0][0][0]) == 0:
        #             invalid_var_list.append(var_name)
        #             print(f"----- {var_name} in {group_name} is invalid -----")
        #             break_flag = True
        #             break
        #     if break_flag:
        #         break
        for exception_folder_path in exception_folder_paths:
            # print(f"Arrive at folder {exception_folder_path.split('/')[-1]}")
            for exception_mat_name in os.listdir(exception_folder_path):
                e_mat = loadmat(os.path.join(exception_folder_path, exception_mat_name))
                if np.mean(e_mat[var_name][0][0][0]) == 0 and np.var(e_mat[var_name][0][0][0]) == 0:
                    invalid_var_list.append(var_name)
                    print(f"----- {var_name} in {group_name} is invalid -----")
                    break_flag = True
                    break
            if break_flag:
                break
    invalid_var_dict[group_name] = invalid_var_list

Checking AIL_1 in mechanism ...
Checking AIL_2 in mechanism ...
Checking FLAP in mechanism ...
----- FLAP in mechanism is invalid -----
Checking ELEV_1 in mechanism ...
Checking ELEV_2 in mechanism ...
Checking RUDD in mechanism ...
Checking SPL_1 in mechanism ...
Checking SPL_2 in mechanism ...
Checking SPLG in mechanism ...
----- SPLG in mechanism is invalid -----
Checking ABRK in mechanism ...
Checking NSQT in mechanism ...
----- NSQT in mechanism is invalid -----
Checking AOA1 in mechanism ...
----- AOA1 in mechanism is invalid -----
Checking AOA2 in mechanism ...
----- AOA2 in mechanism is invalid -----
Checking GLS in mechanism ...
----- GLS in mechanism is invalid -----
Checking PTCH in mechanism ...
----- PTCH in mechanism is invalid -----
Checking ROLL in mechanism ...
----- ROLL in mechanism is invalid -----
Checking TH in mechanism ...
----- TH in mechanism is invalid -----
Checking TAS in mechanism ...
----- TAS in mechanism is invalid -----
Checking CASM in mechanism ...
-

In [18]:
for group_name, var_group in invalid_var_dict.items():
    group_lens_dict[group_name] = len(var_group)
    print(f"{group_name}: {len(var_group)}")
print(f"\n{sum(group_lens_dict.values())} invalid variables in total\n")

valid_var_dict = {group_name: list(set(var_groups_dict[group_name]) - set(invalid_var_dict[group_name])) for group_name in var_groups_dict.keys()}
for group_name, var_group in valid_var_dict.items():
    group_lens_dict[group_name] = len(var_group)
    print(f"{group_name}: {len(var_group)}")
print(f"\n{sum(group_lens_dict.values())} valid variables in total")

mechanism: 14
power: 7
control: 9
external: 6
recorder: 1
unclassified: 7

44 invalid variables in total

mechanism: 11
power: 4
control: 2
external: 0
recorder: 0
unclassified: 0

17 valid variables in total


{'mechanism': ['FLAP', 'SPLG', 'NSQT', 'AOA1', 'AOA2', 'GLS', 'PTCH', 'ROLL', 'TH', 'TAS', 'CASM', 'GS', 'FPAC', 'CTAC'], 'power': ['FADS', 'FQTY_1', 'FQTY_2', 'FQTY_3', 'FQTY_4', 'OIPL', 'LGDN'], 'control': ['HDGS', 'RUDP', 'CCPC', 'CCPF', 'CWPC', 'CWPF', 'SNAP', 'SHKR', 'FADF'], 'external': ['ALT', 'WS', 'WD', 'PT', 'TAT', 'LOC'], 'recorder': ['PH'], 'unclassified': ['EVNT', 'HF1', 'HF2', 'VHF1', 'VHF2', 'VHF3', 'SMKB']}
