# This notebook is used to prepare training data from Excel sheets with all training data

In [1]:
import pandas as pd
import numpy as np
import uuid
import os
import shutil
from wavinfo import WavInfoReader
import soundfile as sf
from tqdm import tqdm
from multiprocessing import Pool, RLock, freeze_support
from tqdm.contrib.concurrent import process_map, thread_map

In [10]:
CUR_PATH = os.getcwd()
TRAIN_DATA_PATH_PREFIX = CUR_PATH.replace("notebooks", "data") + "/datasets/train/"
VAL_DATA_PATH_PREFIX = CUR_PATH.replace("notebooks", "data") + "/datasets/validation/"
X_DATA_PATH = (TRAIN_DATA_PATH_PREFIX + "x_data").replace('\\', '/')
Y_DATA_PATH = (TRAIN_DATA_PATH_PREFIX + "y_data").replace('\\', '/')

VAL_X_DATA_PATH = (VAL_DATA_PATH_PREFIX + "x_data").replace('\\', '/')
VAL_Y_DATA_PATH = (VAL_DATA_PATH_PREFIX + "y_data").replace('\\', '/')

print(f"X training data path:\t{X_DATA_PATH}")
print(f"Y training data path:\t{Y_DATA_PATH}")
print(f"X validation data path:\t{VAL_X_DATA_PATH}")
print(f"Y validation data path:\t{VAL_Y_DATA_PATH}")

X training data path:	D:/Repos/NAEC/data/datasets/train/x_data
Y training data path:	D:/Repos/NAEC/data/datasets/train/y_data
X validation data path:	D:/Repos/NAEC/data/datasets/validation/x_data
Y validation data path:	D:/Repos/NAEC/data/datasets/validation/y_data


In [3]:
def create_silent_wav(filename, x_file_info):
    sf.write(filename, np.zeros(x_file_info.data.frame_count), x_file_info.fmt.sample_rate, 'PCM_16')

In [4]:
def process_x_y_far_end_signal(filename_x, x_data_path, y_data_path):
    if (filename_x == "") or (filename_x is None) or (not isinstance(filename_x, str)):
        return

    uuid_hex = uuid.uuid4().hex
    x_path = x_data_path + "/" + uuid_hex + "_xdata.wav"
    y_path = y_data_path + "/" + uuid_hex + "_ydata.wav"
    shutil.copy(filename_x,x_path)

    x_info = WavInfoReader(filename_x)
    create_silent_wav(y_path, x_info)

In [5]:
def process_x_y_near_end_signal(filename_x, x_data_path, y_data_path):
    if (filename_x == "") or (filename_x is None) or (not isinstance(filename_x, str)):
        return

    uuid_hex = uuid.uuid4().hex
    x_path = x_data_path + "/" + uuid_hex + "_xdata.wav"
    y_path = y_data_path + "/" + uuid_hex + "_ydata.wav"
    shutil.copy(filename_x,x_path)
    shutil.copy(filename_x,y_path)

In [6]:
def create_x_y_dataset(dataframe_tuple):
    dataframe_row = dataframe_tuple[1]
    process_x_y_far_end_signal(dataframe_row['FE Mic'], X_DATA_PATH, Y_DATA_PATH)
    process_x_y_far_end_signal(dataframe_row['FE MV Mic'], X_DATA_PATH, Y_DATA_PATH)
    process_x_y_near_end_signal(dataframe_row['NE Clean'], X_DATA_PATH, Y_DATA_PATH)

In [14]:
train_data = pd.read_excel("train_data.xlsx")

In [None]:
freeze_support()
thread_map(create_x_y_dataset, train_data.iterrows(), total=train_data.shape[0])

KeyboardInterrupt: 

In [None]:
train_data.close()

In [11]:
def create_x_y_dataset_val(dataframe_tuple):
    dataframe_row = dataframe_tuple[1]
    process_x_y_far_end_signal(dataframe_row['FE Mic'], VAL_X_DATA_PATH, VAL_Y_DATA_PATH)
    process_x_y_far_end_signal(dataframe_row['FE MV Mic'], VAL_X_DATA_PATH, VAL_Y_DATA_PATH)
    process_x_y_near_end_signal(dataframe_row['NE Clean'], VAL_X_DATA_PATH, VAL_Y_DATA_PATH)

In [8]:
validation_data = pd.read_excel("test_data.xlsx")

In [12]:
freeze_support()
thread_map(create_x_y_dataset_val, validation_data.iterrows(), total=validation_data.shape[0])

  0%|          | 0/1124 [00:00<?, ?it/s]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,