In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import promonet
import ppgs
import pysodic
from pathlib import Path
import torchaudio
import os
import IPython.display as ipd
import matplotlib.pyplot as plt

In [None]:
config_dir = promonet.ROOT_DIR / 'config' / 'ppgs-experiments'
ppgs_config_dir = config_dir / 'ppgs'
promonet_config_dir = config_dir / 'promonet'
model = 'w2v2fb-ppg'

In [None]:
def monophonize(audio_file):
    audio, sample_rate = torchaudio.load(audio_file)
    audio = audio.mean(dim=0, keepdim=True)
    torchaudio.save(audio_file, audio, sample_rate=sample_rate)

In [None]:
pitch, periodicity, loudness, voicing = pysodic.from_file('dog_bark.wav')
pitch[~voicing] = torch.nan
print(pitch.shape)
x = range(0, pitch.shape[-1])
plt.plot(x, pitch.squeeze(dim=0))
plt.plot(x, loudness.squeeze(dim=0))
plt.show()

In [None]:
audio_file = Path('dog_bark.wav')
pitch, periodicity, loudness, voicing = pysodic.from_file('dog_bark.wav')
mean = pitch.mean()
target_mean = 120
pitch = (pitch/(mean/target_mean))
x = range(0, pitch.shape[-1])
plt.plot(x, pitch.squeeze(dim=0))
plt.plot(x, loudness.squeeze(dim=0))
plt.show()
torch.save(pitch, 'dog_bark-pitch.pt')

In [None]:
vis_dir = Path('visualizations')
with torch.autocast('cuda'):
    ppgs.evaluate.visualize.from_ppg_to_image_file(
        ppgs.from_file(audio_file, gpu=0).cpu(),
        audio_file,
        vis_dir / f'fig-{audio_file}.jpg',
        font_filename='arial.ttf',
    )
    # ppgs.evaluate.visualize.from_ppg_to_image_file(reconstruction_inferred_ppg.T, audio_file, 'fig00.jpg', second_ppg=ppg[:, :-1].T, font_filename='arial.ttf')
ipd.display(ipd.Image(vis_dir / f'fig-{audio_file}.jpg'))

In [None]:
print(pitch.mean())

In [None]:
model = 'w2v2fb-ppg'
audio_file = Path('dog_bark.wav')
# audio_file = Path('dog.mp3')
# audio_file = Path('small-dog.mp3')
ipd.display(ipd.Audio(audio_file))
monophonize(audio_file)
output_file = audio_file.stem + '-reconstruction.wav'
command = 'python -m promonet '
command += '--audio_files ' + str(audio_file) + ' '
command += '--output_files ' + str(output_file) + ' '
command += '--pitch_files ' + str(audio_file.stem + '-pitch.pt') + ' '
command += f'--speaker_ids {16} '
# command += f'--speaker_ids {0} '
command += f'--config {ppgs_config_dir / (model + ".py")} {promonet_config_dir / (model + ".py")} '
command += f'--checkpoint /repos/promonet/runs/{model}/generator-00250000.pt '
command += f'--gpu 0'
print(command)
os.system(command)
ipd.Audio(output_file)

In [None]:
audio_file = Path('cat_meow.wav')
ipd.display(ipd.Audio(audio_file))
monophonize(audio_file)
output_file = audio_file.stem + '-reconstruction.wav'
command = 'python -m promonet '
command += '--audio_files ' + str(audio_file) + ' '
command += '--output_files ' + str(output_file) + ' '
# command += '--pitch_files ' + str(pitch_file) + ' '
command += f'--speaker_ids {37} '
# command += f'--speaker_ids {0} '
command += f'--config {ppgs_config_dir / (model + ".py")} {promonet_config_dir / (model + ".py")} '
command += f'--checkpoint /repos/promonet/runs/{model}/generator-00800000.pt '
command += f'--gpu 0'
print(command)
os.system(command)
ipd.Audio(output_file)

In [None]:
pitch, periodicity, loudness, voicing = pysodic.from_file('charlie_brown_short.mp3')
pitch[~voicing] = torch.nan
print(pitch.shape)
x = range(0, pitch.shape[-1])
plt.plot(x, pitch.squeeze(dim=0))
plt.plot(x, loudness.squeeze(dim=0))
plt.plot(x, 100*periodicity.squeeze(dim=0))
plt.show()

In [None]:
# Create original ppg file
model = 'mel-ppg'
ppg_file = Path(audio_file.parent / f'{audio_file.stem}-{model}.pt')
print(ppg_file)

command = 'python -m ppgs '
command += f'--sources {audio_file} '
command += f'--sinks {ppg_file} '
command += f'--config {ppgs_config_dir / (model + ".py")} '
command += f'--gpu 0'

print(command)

os.system(command)

ppg = torch.load(ppg_file)

In [None]:
vis_dir = Path('visualizations')
print(audio_file)
with torch.autocast('cuda'):
    ppgs.evaluate.visualize.from_ppg_to_image_file(
        ppg.T,
        audio_file,
        vis_dir / f'fig-{audio_file}.jpg',
        font_filename='arial.ttf',
    )
    # ppgs.evaluate.visualize.from_ppg_to_image_file(reconstruction_inferred_ppg.T, audio_file, 'fig00.jpg', second_ppg=ppg[:, :-1].T, font_filename='arial.ttf')
ipd.display(ipd.Image(vis_dir / f'fig-{audio_file}.jpg'))

In [None]:
audio_file = Path('charlie_brown_short.mp3')
model = 'mel-ppg'
ipd.display(ipd.Audio(audio_file))
monophonize(audio_file)
output_file = audio_file.stem + '-reconstruction.wav'
command = 'python -m promonet '
command += '--audio_files ' + str(audio_file) + ' '
command += '--output_files ' + str(output_file) + ' '
# command += '--pitch_files ' + str(pitch_file) + ' '
# command += f'--speaker_ids {16} '
command += f'--speaker_ids {13} '
# command += f'--speaker_ids {0} '
command += f'--config {ppgs_config_dir / (model + ".py")} {promonet_config_dir / (model + ".py")} '
command += f'--checkpoint /repos/promonet/runs/{model}/generator-00250000.pt '
command += f'--gpu 0'
print(command)
os.system(command)
ipd.Audio(output_file)

In [None]:
audio_file = Path('doodlebob.mp3')
ipd.display(ipd.Audio(audio_file))
monophonize(audio_file)
output_file = audio_file.stem + '-reconstruction.wav'
command = 'python -m promonet '
command += '--audio_files ' + str(audio_file) + ' '
command += '--output_files ' + str(output_file) + ' '
# command += '--pitch_files ' + str(pitch_file) + ' '
command += f'--speaker_ids {13} '
# command += f'--speaker_ids {0} '
command += f'--config {ppgs_config_dir / (model + ".py")} {promonet_config_dir / (model + ".py")} '
command += f'--checkpoint /repos/promonet/runs/{model}/generator-00800000.pt '
command += f'--gpu 0'
print(command)
os.system(command)
ipd.Audio(output_file)

In [None]:
# audio_file = Path('fresh_prince.wav')
audio_file = Path('moon.mp3')
model = 'w2v2fb-ppg'
audio, sr = torchaudio.load(audio_file)
audio = audio[..., :sr*8]
short_file = audio_file.stem + '-short.wav'
torchaudio.save(short_file, audio, sr)
ipd.display(ipd.Audio(short_file))
monophonize(audio_file)
output_file = audio_file.stem + '-reconstruction.wav'
command = 'python -m promonet '
command += '--audio_files ' + str(short_file) + ' '
command += '--output_files ' + str(output_file) + ' '
# command += '--pitch_files ' + str(pitch_file) + ' '
command += f'--speaker_ids {83} '
# command += f'--speaker_ids {37} '
# command += f'--speaker_ids {0} '
command += f'--config {ppgs_config_dir / (model + ".py")} {promonet_config_dir / (model + ".py")} '
command += f'--checkpoint /repos/promonet/runs/{model}/generator-00800000.pt '
command += f'--gpu 0'
print(command)
os.system(command)
ipd.Audio(output_file)

In [None]:
pitch, periodicity, loudness, voicing = pysodic.from_file('moon-short.wav', voicing_threshold=0.08)
pitch[~voicing] = torch.nan
print(pitch.shape)
x = range(0, pitch.shape[-1])
plt.plot(x, pitch.squeeze(dim=0))
plt.plot(x, loudness.squeeze(dim=0))
plt.plot(x, 100*periodicity.squeeze(dim=0))
plt.show()