In [None]:
"""Headers"""

from __future__ import print_function
from PIL import Image
import os
import os.path
import numpy as np
import sys
if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle

import torch.utils.data as data
from torchvision.datasets.utils import download_url, check_integrity

import csv
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os.path
import sys
import librosa
import torch
import torch.utils.data
import torchvision
import torchvision.transforms as transforms

from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import IPython

import soundfile

np.random.seed(111)
torch.cuda.manual_seed_all(111)
torch.manual_seed(111)

melgan = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan')

from model import MaskCycleGANVC
from dataLoader import DataSet
from util import showAudio

# MelGAN Test

In [None]:
sample, fs = librosa.load('vcc2018_database_training/vcc2018_training/VCC2SM2/10001.wav')
train_data= melgan(torch.tensor(np.expand_dims(sample, axis = 0)))
y = melgan.inverse(train_data).cpu()
print('Original')
IPython.display.display(IPython.display.Audio(sample , rate = 44100 / 2))
print('Reconstructed')
IPython.display.display(IPython.display.Audio(y , rate = 44100 / 2))

# Load Datasets

In [None]:
IS_GPU = False
TRAIN_BS = 1

aSet = DataSet(fold="A")

print(aSet.train_data[0].shape)

if IS_GPU:
    aLoader = torch.utils.data.DataLoader(aSet, batch_size=TRAIN_BS,
                                          shuffle=True, num_workers=2)
else:
    aLoader = torch.utils.data.DataLoader(aSet, batch_size=TRAIN_BS,
                                          shuffle=True, num_workers=0)
print("Train set size: "+str(len(aSet)))

bSet = DataSet(fold="B")

if IS_GPU:
    bLoader = torch.utils.data.DataLoader(bSet, batch_size=TRAIN_BS,
                                          shuffle=True, num_workers=2)
else:
    bLoader = torch.utils.data.DataLoader(bSet, batch_size=TRAIN_BS,
                                          shuffle=True, num_workers=0)
print("Train set size: "+str(len(bSet)))

In [None]:
aTestSet = DataSet(fold="ATest")

if IS_GPU:
    aTestLoader = torch.utils.data.DataLoader(aTestSet, batch_size=TRAIN_BS,
                                          shuffle=False, num_workers=2)
else:
    aTestLoader = torch.utils.data.DataLoader(aTestSet, batch_size=TRAIN_BS,
                                          shuffle=False, num_workers=0)
print("Test set size: "+str(len(aTestSet)))

bTestSet = DataSet(fold="BTest")

if IS_GPU:
    bTestLoader = torch.utils.data.DataLoader(bTestSet, batch_size=TRAIN_BS,
                                          shuffle=False, num_workers=2)
else:
    bTestLoader = torch.utils.data.DataLoader(bTestSet, batch_size=TRAIN_BS,
                                          shuffle=False, num_workers=0)
print("Test set size: "+str(len(bTestSet)))

# Initiate Training

In [None]:
network = MaskCycleGANVC()

network.load('.', '64F1M2logParams_l=10')

network.train(aLoader, bLoader)

# Show Losses

In [None]:
plt.plot(network.genLossOverEpochs)
plt.xlabel('Epoch')
plt.ylabel('Generator Loss')
plt.show()
plt.plot(network.aDiscLossOverEpochs)
plt.xlabel('Epoch')
plt.ylabel('Disc A Loss')
plt.show()
plt.plot(network.bDiscLossOverEpochs)
plt.xlabel('Epoch')
plt.ylabel('Disc B Loss')
plt.show()
plt.plot(network.a2DiscLossOverEpochs)
plt.xlabel('Epoch')
plt.ylabel('Disc A2 Loss')
plt.show()
plt.plot(network.b2DiscLossOverEpochs)
plt.xlabel('Epoch')
plt.ylabel('Disc B2 Loss')
plt.show()

# Viewing Results
## B to A

In [None]:
#catgen.eval()
a_iterator = iter(aTestLoader)
adata, aMean, aStd = next(a_iterator)

b_iterator = iter(bTestLoader)
data, bMean, bStd = next(b_iterator)

inputs = torch.cat((data.clone(), torch.ones(data.shape)), axis = 1)
if IS_GPU:
  inputs = inputs.cuda()

# wrap them in Variable
inputs = Variable(inputs.float())
bToA = network.agen(inputs).cpu()
if IS_GPU:
    rec = network.bgen(torch.cat((network.agen(inputs).clone(), torch.ones(bToA.shape).cuda()), axis = 1)).cpu()
else:
    rec = network.bgen(torch.cat((network.agen(inputs).clone(), torch.ones(bToA.shape)), axis = 1)).cpu()

In [None]:
print('Original (Reconstructed)')
showAudio(data, bMean, bStd)
print('A Speaker')
showAudio(bToA, aMean, aStd)
print('B -> A -> B (should be same as Original)')
showAudio(rec, bMean, bStd)

# A to B

In [None]:
inputs = torch.cat((adata.clone(), torch.ones(adata.shape)), axis = 1)
if IS_GPU:
  inputs = inputs.cuda()

# wrap them in Variable
inputs = Variable(inputs.float())
aToB = network.bgen(inputs).cpu()
if IS_GPU:
    rec = network.agen(torch.cat((network.bgen(inputs).clone(), torch.ones(aToB.shape).cuda()), axis = 1)).cpu()
else:
    rec = network.agen(torch.cat((network.bgen(inputs).clone(), torch.ones(aToB.shape)), axis = 1))

In [None]:
print('Original (Reconstructed)')
showAudio(adata, aMean, aStd)
print('B Speaker')
showAudio(aToB, bMean, bStd)
print('A -> B -> A (Should be same as Original)')
showAudio(rec, aMean, aStd)