In [None]:
import os
import json
import IPython.display as ipd
import librosa



def play_audio(speaker_id, clean_other, split="dev"):
    data_dir = "/home/pneekhara/Datasets/78419/Hi_Fi_TTS_v_0_backup"
    manifest_path = os.path.join(data_dir, "{}_manifest_{}_{}.json".format(speaker_id, clean_other, split))
    with open(manifest_path, "r") as f:
        for i, line in enumerate(f):
            record = json.loads(line)
            print(record)
            audio_path = os.path.join( data_dir, record['audio_filepath'] )
            audio, sr = librosa.load(audio_path, sr=None)
            ipd.display(ipd.Audio(audio, rate=44100))
            if i > 5:
                break

In [None]:
play_audio(8051, "clean", "dev")

In [None]:
play_audio(8051, "clean")

In [None]:
manifest_files = {
#     92 : ["/home/pneekhara/Datasets/78419/Hi_Fi_TTS_v_0_backup/92_manifest_clean_train.json"],
#     6097 : ["/home/pneekhara/Datasets/78419/Hi_Fi_TTS_v_0_backup/6097_manifest_clean_train.json"],
    "Jon" : ["/home/pneekhara/JonData/train_list.txt"]
}

In [None]:
from pathlib import Path
import json
import librosa
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool

data_dir = ""
num_samples = 1000

def find_pitch_from_audio(wav_file):
    audio, sr = librosa.load(wav_file, sr=None)
    pitch, _, _ = librosa.pyin(audio, fmin=30, fmax=512, frame_length=1024, sr=sr, fill_na=0.0)
    return pitch[pitch != 0.0]
    
for speaker in manifest_files:
    print (speaker)
    all_pitch = []
    wav_files = []
    for json_f in manifest_files[speaker]:
        print (json_f)
        with open(json_f, "r") as f:
            
            for i, line in enumerate(tqdm(f.read().split("\n"))):
                if len(line) > 0:
                    line = json.loads(line)
                    wav_file = os.path.join( data_dir, line["audio_filepath"] )
                    wav_files.append(wav_file)
    with Pool(10) as p:
        all_pitches = p.map(find_pitch_from_audio, wav_files[:num_samples])
    all_pitch = np.concatenate(all_pitches)
    
    print(f"mean: {np.mean(all_pitch)}", speaker)
    print(f"std: {np.std(all_pitch)}", speaker)

In [None]:
print (1)

In [None]:
def generate_ngc_training_command():
    command_str = ' ngc batch run --name "ml-model.fastpitch_align" --image "nvidia/pytorch:21.03-py3" --ace nv-us-west-2 --instance dgx1v.32g.8.norm --result /results/'
    command_str += '--datasetid 78109:/HiFi8051 --workspace pneekhara-workspace:/mnt/'

In [None]:
mean: 214.32825973166476
std: 30.922863159682226

mean: 161.71734193655882
std: 36.34714766665941

In [None]:
b

In [None]:
speaker_id = 11697
run_name = "FastPitchAlign{}".format(speaker_id)
clean_other = "clean"
command_str = f'''ngc batch run --name "{run_name}" --image "nvidia/pytorch:21.03-py3" --ace nv-us-west-2 --instance dgx1v.32g.8.norm --result /results/ --datasetid 78419:/HiFiDataset --workspace pneekhara-workspace:/mnt/ --commandline "cd /raid/ && cp -r /HiFiDataset/ /raid/ && tar -xzf /raid/HiFiDataset/hi_fi_tts_dataset.tar.gz -C /raid/HiFiDataset && sed -i 's@wav/@/raid/HiFiDataset/Hi_Fi_TTS_v_0/wav/@g' /raid/HiFiDataset/Hi_Fi_TTS_v_0/{speaker_id}_manifest_{clean_other}_train.json && sed -i 's@wav/@/raid/HiFiDataset/Hi_Fi_TTS_v_0/wav/@g' /raid/HiFiDataset/Hi_Fi_TTS_v_0/{speaker_id}_manifest_{clean_other}_dev.json && cd /workspace/ && apt-get update && apt-get install -y libsndfile1 && git clone https://github.com/paarthneekhara/NeMo.git && cd NeMo && bash reinstall.sh && pip uninstall -y torchtext && work_dir_name=FastPitchAlign && HYDRA_FULL_ERROR=1 python examples/tts/fastpitch.py --config-name=fastpitch_align trainer.gpus=-1 exp_manager.exp_dir=/mnt/inprogress/\${NGC_JOB_ID}_\${work_dir_name} trainer.max_epochs=1000 train_dataset=/raid/HiFiDataset/Hi_Fi_TTS_v_0/{speaker_id}_manifest_{clean_other}_train.json validation_datasets=/raid/HiFiDataset/Hi_Fi_TTS_v_0/{speaker_id}_manifest_{clean_other}_dev.json trainer.check_val_every_n_epoch=50 prior_folder=/raid/HiFiDataset/Priors{speaker_id} && mv /mnt/inprogress/\${NGC_JOB_ID}_\${work_dir_name} /mnt/completed/\${NGC_JOB_ID}_\${work_dir_name}"'''

In [None]:
import matplotlib.pyplot as plt
from librosa.display import specshow
import json
import librosa
import numpy as np

val_manifest_file = "/home/pneekhara/JonData/val_list.json"
with open(val_manifest_file) as f:
    all_lines = f.read().split("\n")

for fmin in [30]:
    for fmax in [512]:
        for line in all_lines:
            record = json.loads(line)
#             actual_key = "real_actual_{}".format(6097)
            actual_wav_path = record['audio_filepath']
            y,_ = librosa.load(actual_wav_path, sr=44100)
            y = y[:5*44100]
            f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=fmin, fmax=fmax)
            times = librosa.times_like(f0)
            print (fmin, fmax)
            D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
            fig, ax = plt.subplots()
            img = specshow(D, x_axis='time', y_axis='log', ax=ax)
            ax.set(title='pYIN fundamental frequency estimation')
            fig.colorbar(img, ax=ax, format="%+2.f dB")
            ax.plot(times, f0, label='f0', color='cyan', linewidth=3)
            ax.legend(loc='upper right')
            plt.show()
