### PyTorch implementation of Random Audio Style Transfer

based on:
https://pytorch.org/tutorials/advanced/neural_style_tutorial.html

In [None]:
from IPython.display import display, Audio

from model import RandomCNN, run_transfer
from utils import plot_spectrum, read_audio_spectrum, spectrum_to_audio

In [None]:
CONTENT_PATH = "wavs/songs/imperial.mp3"
STYLE_PATH = "wavs/songs/usa.mp3"

#CONTENT_PATH = "wavs/voices/boy.wav"
#STYLE_PATH = "wavs/voices/girl.wav"

CONTENT_PATH = "wavs/birds/MEX_ALTAMIRA_ORIOLE/MEX_Altamira_Oriole-ACelisM_01.mp3"
STYLE_PATH = "wavs/birds/BR_ALAGOAS_FOLIAGE/BR_AL_XC181063-PHINOV36_0101_LIMPO.mp3"

CONTENT_PATH = "wavs/birds/BR_ALAGOAS_FOLIAGE/BR_AL_XC181063-PHINOV36_0101_LIMPO.mp3"
STYLE_PATH = "wavs/birds/MEX_ALTAMIRA_ORIOLE/MEX_Altamira_Oriole-ACelisM_01.mp3"

content_s, content_sr, content_p = read_audio_spectrum(CONTENT_PATH)
style_s, style_sr, style_p = read_audio_spectrum(STYLE_PATH)

In [None]:
plot_spectrum(content_s)
content_wav = spectrum_to_audio(content_s)
display(Audio(content_wav, rate=content_sr))

plot_spectrum(style_s)
style_wav = spectrum_to_audio(style_s)
display(Audio(style_wav, rate=style_sr))

In [None]:
%%time
mcnn = RandomCNN(out_channels=16, kernel=(11, 3))
result = run_transfer(mcnn, content_s, style_s, num_steps=2000, content_weight=1e-1, style_weight=1e2)

In [None]:
result_s = result.cpu().data.numpy().squeeze()
plot_spectrum(result_s)

result_wav = spectrum_to_audio(result_s, p=content_p, rounds=150)
display(Audio(result_wav, rate=style_sr))