In [4]:
# ! pip install pandas

In [5]:
import numpy as np
import pandas as pd
import os
from glob import glob
from tqdm import tqdm
import torch

def gather_files_from_folder(PATH, ext='wav'):
    return [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], f'*.{ext}'))]

In [6]:
root_folder = '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune'

pth_files = gather_files_from_folder(root_folder, ext='pth')
pth_files

['/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_000635.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_001592.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_000269.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_001533.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_000015.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_001283.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_000154.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_000369.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_000522.pth',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/0011/0011_001

In [7]:
for f in tqdm(pth_files):
    f_out = f.replace('.pth', '.npy')
    f_out = os.path.join(os.path.dirname(os.path.dirname(f_out)), os.path.basename(f_out))
    t = torch.load(f)
    x = t.detach().cpu().numpy()
    np.save(f_out, x)
#     print(f, f_out)

In [8]:
wav_ids = [f.split('/')[-1][:-4] for f in pth_files]
wav_ids[0:5]

['0011_000635', '0011_001592', '0011_000269', '0011_001533', '0011_000015']

In [9]:
new_train = pd.DataFrame(data = {
    '0': wav_ids,
    '1': ['no text' for t in wav_ids],
    '2': ['no text' for t in wav_ids],
})
new_train[:2]

Unnamed: 0,0,1,2
0,0011_000635,no text,no text
1,0011_001592,no text,no text


In [7]:
f_hifi_train = f'{root_folder}/hifi_finetune_train.txt'
f_hifi_valid = f'{root_folder}/hifi_finetune_valid.txt'

f_hifi_train, f_hifi_valid

('/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/hifi_finetune_train.txt',
 '/media/sdb1/made_emotts_2023/data/feature_output/nat_va_after_duration_tune/hifi_finetune_valid.txt')

In [8]:
## random shuffling
new_train = new_train.sample(frac=1)

In [9]:
new_train[:-6].to_csv(f_hifi_train, sep="|",index=None, header=None, quoting=3)
new_train[-6:].to_csv(f_hifi_valid, sep="|",index=None, header=None, quoting=3)

In [15]:
len(new_train[:-6])

15327

In [10]:
input_wavs_dir = '/media/sdb1/made_emotts_2023/data/preprocessed/resampled'
wav_files = gather_files_from_folder(input_wavs_dir, ext='wav')
wav_files[0:5]

['/media/sdb1/made_emotts_2023/data/preprocessed/resampled/0019/0019_001607.wav',
 '/media/sdb1/made_emotts_2023/data/preprocessed/resampled/0019/0019_001476.wav',
 '/media/sdb1/made_emotts_2023/data/preprocessed/resampled/0019/0019_000949.wav',
 '/media/sdb1/made_emotts_2023/data/preprocessed/resampled/0019/0019_001200.wav',
 '/media/sdb1/made_emotts_2023/data/preprocessed/resampled/0019/0019_000710.wav']

In [11]:
import torchaudio
import soundfile as sf

In [17]:
from IPython.display import display, Audio, Markdown, clear_output, HTML

In [18]:
TARGET_SR = 22050

In [19]:
def get_audio(path, resample=None):
    effects = [
        ["remix", "-"] ## merge all channels
    ]
    if resample:
        effects.extend([
          ["rate", f'{resample}'],
        ])
    return torchaudio.sox_effects.apply_effects_file(path, effects=effects)

In [22]:
import soundfile as sf
def remix_resample_write(f_wav, target_sr, f_out):
    audio, sr = get_audio(f_wav, resample=target_sr)
    audio = audio.squeeze(0).cpu().detach().numpy()
    sf.write(f_out, audio, target_sr, subtype='PCM_16')
    return f_out

In [24]:
for f in tqdm(wav_files):
    f_out = f
    f_out = os.path.join(os.path.dirname(os.path.dirname(f_out)), os.path.basename(f_out))
#     print(f, f_out)
#     ! ln -s $f $f_out
    f_wav = f
    remix_resample_write(f_wav, TARGET_SR, f_out)
#     print(f_out)
#     display(Audio(f_out))
#     break    

100%|██████████████████████████████████████████████████████████████████████| 17498/17498 [18:06<00:00, 16.10it/s]


In [25]:
tacotron_gta_output_path = root_folder
hifi_output_checkpoint_path = '/media/sdb1/made_emotts_2023/checkpoints/hifigan/nat_va_after_duration_tune_210'
# ! mkdir -p $hifi_output_checkpoint_path

In [26]:
!echo cd /data/data/resources/nat-hifigan/nat-hifigan/hifi-gan-master && echo train.py \
    --input_wavs_dir $input_wavs_dir \
    --input_mels_dir $tacotron_gta_output_path \
    --input_training_file $f_hifi_train \
    --input_validation_file $f_hifi_valid \
    --checkpoint_path $hifi_output_checkpoint_path \
    --fine_tuning True --config /data/data/resources/nat-hifigan/nat-hifigan/checkpoints/hifi-gan/config.json

cd /data/data/resources/nat-hifigan/nat-hifigan/hifi-gan-master
train.py --input_wavs_dir --input_mels_dir --input_training_file --input_validation_file --checkpoint_path --fine_tuning True --config /data/data/resources/nat-hifigan/nat-hifigan/checkpoints/hifi-gan/config.json


In [14]:
1

1