In [19]:
# 単純なCNN読み込みのために、各データをシーケンスから単体に分解する
import numpy as np
import os
import glob

# データとラベルを全て読み込む→ソートして全処理
# シーケンス情報が失われるので、シーケンス内で標準化する必要があるかも？(まだやってない)
def split_and_save_data(data_dirs, label_dirs, output_dir):
  """
  L: シーケンス数
  D: 方向数
  F: 周波数ビン数
  """
  data_paths = get_all_files_from_dirs(data_dirs)
  label_paths = get_all_files_from_dirs(label_dirs)

  data_paths.sort()
  label_paths.sort()

  for i in range(len(data_paths)):
    data = np.load(data_paths[i])
    labels = np.load(label_paths[i], allow_pickle=True)
    input_filename = os.path.basename(data_paths[i])
    input_filename = os.path.splitext(input_filename)[0]
    L, D, F = data.shape

    for j in range(L):
      timestep_data = data[j]
      label = labels[j]
      
      output_file = os.path.join(output_dir, label, f"{input_filename}_timestep_{j}.npy")
      np.save(output_file, timestep_data)

def get_all_files_from_dirs(directories, extension="*.npy"):
  all_files = []
  for directory in directories:
    files = glob.glob(os.path.join(directory, extension))
    all_files.extend(files)

  return all_files


In [20]:
data_dirs = ["/home/nishimura-k/audioprocessing/datas/npy_MUSIC/EF/EF_ltor_150_0to59", "/home/nishimura-k/audioprocessing/datas/npy_MUSIC/EF/EF_rtol_150_0to59"]
label_dirs = ["/home/nishimura-k/audioprocessing/datas/npy_AREA_label/EF/EF_ltor_150_0to59", "/home/nishimura-k/audioprocessing/datas/npy_AREA_label/EF/EF_rtol_150_0to59"]
output_dir = "/home/nishimura-k/audioprocessing/datas_mono"
split_and_save_data(data_dirs, label_dirs, output_dir)

In [23]:
# データを標準化
parent_data_dir = "/home/nishimura-k/audioprocessing/datas_mono"
labels = ["none", "left", "center", "right"]
data_label_dirs = [os.path.join(parent_data_dir, label) for label in labels]
standarized_data_dir = "/home/nishimura-k/audioprocessing/datas_mono_st"

files = get_all_files_from_dirs(data_label_dirs)

all_data = []
for file in files:
  data = np.load(file)
  all_data.extend(data.flatten())

all_data = np.array(all_data)
mean = np.mean(all_data)
std = np.std(all_data)

for label in labels:
  data_label_dir = os.path.join(parent_data_dir, label)
  standarized_data_label_dir = os.path.join(standarized_data_dir, label)

  files = get_all_files_from_dirs([data_label_dir])
  for file in files:
    basename = os.path.basename(file)
    data = np.load(file)
    if std > 0:
      data_st = (data - mean) / std
    np.save(os.path.join(standarized_data_label_dir, basename), data_st)