# AudioData

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from wavenet.wn.audiodata import AudioData
from wavenet.wn.utils import list_files

%matplotlib inline

x_len = 100
filelist = list_files('./audio')
ad = AudioData(filelist, x_len)

### __load_audio_from_wav

In [None]:
audio, dtype, sr = ad._load_audio_from_wav(filelist[0])

print('File: {}, dtype: {}, sample rate: {}'.format(
    filelist[0], dtype, sr))
print('Span: [{}, {}]'.format(np.min(audio), np.max(audio)))
print('Length: {} ({:.04f} seconds)'.format(len(audio), len(audio) / sr))
print('# unique values: {}'.format(len(np.unique(audio))))

### _extract_segment: 
#### using `start_idx`

In [None]:
x_len = 100
y_len = 50
start_idx = 500
audio, _, _ = ad._load_audio_from_wav(filelist[2])
x, y = ad._extract_segment(audio, x_len, y_len, start_idx=start_idx)

# compare extracted to manually extracted example
x_man = audio[start_idx:start_idx + x_len]
y_man = audio[start_idx + x_len:start_idx + x_len + y_len]

print('Lengths from method: {{x: {}, y: {}}}'.format(len(x), len(y)))
print('Lengths from test: {{x: {}, y: {}}}'.format(len(x_man), len(y_man)))

plt.subplot(211)
plt.plot(range(x_len), x, 'b')
plt.plot(range(x_len, x_len + y_len), y, 'r')
plt.subplot(212)
plt.plot(range(x_len), x_man, 'b')
plt.plot(range(x_len, x_len + y_len), y_man, 'r')

### _extract segment
#### no `start_idx`

In [None]:
x_len = 100
y_len = 50
audio, _, _ = ad._load_audio_from_wav('./audio/classical.wav')
x1, y1 = ad._extract_segment(audio, x_len, y_len)
x2, y2 = ad._extract_segment(audio, x_len, y_len)

# verify the two are different
plt.subplot(211)
plt.plot(range(x_len), x1, 'b')
plt.plot(range(x_len, x_len + y_len), y1, 'r')
plt.subplot(212)
plt.plot(range(x_len), x2, 'b')
plt.plot(range(x_len, x_len + y_len), y2, 'r')

### _to_tensor

In [None]:
x_len = 100
y_len = 50
audio, _, _ = ad._load_audio_from_wav('./audio/classical.wav')
x, y = ad._extract_segment(audio, x_len, y_len)

x_solo = ad._to_tensor(x)
x_wy, y_wy = ad._to_tensor(x, y=y)

print('Before (x): {}, Shape: {}'.format(type(x), x.shape))
print('After (x-solo): {}, Shape: {}'.format(type(x_solo), x_solo.shape))
print('After (x): {}, Shape: {}'.format(type(x_wy), x_wy.shape))
print('')
print('Before (y): {}, Shape: {}'.format(type(y), y.shape))
print('After (y): {}, Shape: {}'.format(type(y_wy), y_wy.shape))

### _quantize

In [None]:
x_len = 100
y_len = 50
audio, _, _ = ad._load_audio_from_wav('./audio/classical.wav')
x, y = ad._extract_segment(audio, x_len, y_len)

x = ad.encoder.encode(x)
y = ad.encoder.encode(y)
x_q = ad._quantize(x)
y_q = ad._quantize(y, label=True)

print('Before (x): span: {}, # unique: {}'.format(
    (np.min(x), np.max(x)), len(np.unique(x))))
print('After (x): span: {}, # unique: {}'.format(
    (np.min(x_q), np.max(x_q)), len(np.unique(x_q))))
print('Before (y): span: {}, # unique: {}'.format(
    (np.min(y), np.max(y)), len(np.unique(y))))
print('After (y): span: {}, # unique: {}'.format(
    (np.min(y_q), np.max(y_q)), len(np.unique(y_q))))

### save_wav

In [None]:
filename_out = './tmp.wav'
audio, dtype, sr = ad._load_audio_from_wav('./audio/classical.wav')
print('Original: length: {}, dtype: {}, sample rate: {}'.format(
    len(audio), dtype, sr))

ad.save_wav(filename_out, audio, sr)

audio_out, dtype_out, sr_out = ad._load_audio_from_wav(filename_out)
print('Saved: length: {}, dtype: {}, sample rate: {}'.format(
    len(audio_out), dtype_out, sr_out))

plt.subplot(211)
plt.plot(range(len(audio)), audio, 'b')
plt.subplot(212)
plt.plot(range(len(audio_out)), audio_out, 'b')

### label2value

In [None]:
x_len = 100
y_len = 50
audio, _, _ = ad._load_audio_from_wav('./audio/classical.wav')
x, y = ad._extract_segment(audio, x_len, y_len)

y_enc = ad.encoder.encode(y)
y_q = ad._quantize(y_enc, label=True)

y_val = ad.label2value(y_q)

idxs = np.random.randint(y_len, size=6)
print('Before: {}'.format(y_enc[idxs]))
print('Quant: {}'.format(y_q[idxs]))
print('After: {}'.format(y_val[idxs]))

### preprocess

In [None]:
x_len = 100
y_len = 50
audio, _, _ = ad._load_audio_from_wav('./audio/classical.wav')
x, y = ad._extract_segment(audio, x_len, y_len)

x_solo = ad.preprocess(x)
x_wy, y_wy = ad.preprocess(x, y=y)

print('Before (x): span: {}, # unique: {}'.format(
    (np.min(x), np.max(x)), len(np.unique(x))))
print('After (x-solo): span: {}, # unique: {}'.format(
    (np.min(x_solo), np.max(x_solo)), len(np.unique(x_solo))))
print('After (x): span: {}, # unique: {}'.format(
    (np.min(x_wy), np.max(x_wy)), len(np.unique(x_wy))))
print('')
print('Before (y): span: {}, # unique: {}'.format(
    (np.min(y), np.max(y)), len(np.unique(y))))
print('After (y): span: {}, # unique: {}'.format(
    (np.min(y_wy), np.max(y_wy)), len(np.unique(y_wy))))

# MuEncoder

In [None]:
import copy
import numpy as np
import matplotlib.pyplot as plt
from wavenet.wn.audiodata import AudioData

%matplotlib inline

x_len = 100
filelist = ['./audio/classical.wav']
ad = AudioData(filelist, x_len)


### normalize

In [None]:
audio, dtype, sr = ad._load_audio_from_wav(filelist[0])
print('x: span: {}, # unique: {}'.format(
    (np.min(audio), np.max(audio)), len(np.unique(audio))))

# specify span using strings
x_s_dr = ad.encoder.normalize(audio, span='datarange')
x_s_mm = ad.encoder.normalize(audio, span='minmax')
print('span="datarange": span: {}, # unique: {}'.format(
    (np.min(x_s_dr), np.max(x_s_dr)), len(np.unique(x_s_dr))))
print('span="minmax": span: {}, # unique: {}'.format(
    (np.min(x_s_mm), np.max(x_s_mm)), len(np.unique(x_s_mm))))

# specify span using span=(min, max)
x_set = ad.encoder.normalize(audio, span=(-20000, 20000))
print('span=span: span: {}, # unique: {}'.format(
    (np.min(x_set), np.max(x_set)), len(np.unique(x_set))))

# span=None, default to datarange
x_dr = ad.encoder.normalize(audio)
print('span=None: span: {}, # unique: {}'.format(
    (np.min(x_dr), np.max(x_dr)), len(np.unique(x_dr))))

# span=None and datarange=None
ad_copy = copy.deepcopy(ad)
ad_copy.encoder.datarange = None
x_mm = ad_copy.encoder.normalize(audio)
print('span=None, datarange=None: span: {}, # unique: {}'.format(
    (np.min(x_mm), np.max(x_mm)), len(np.unique(x_mm))))

### expand

In [None]:
x, _, _ = ad._load_audio_from_wav(filelist[0])
print('x: span: {}, # unique: {}'.format(
    (np.min(x), np.max(x)), len(np.unique(x))))

x_norm = ad.encoder.normalize(x)
print('x normed: span: {}, # unique: {}'.format(
    (np.min(x_norm), np.max(x_norm)), len(np.unique(x_norm))))

x_expand = ad.encoder.expand(x_norm)
print('x expanded: span: {}, # unique: {}'.format(
    (np.min(x_expand), np.max(x_expand)), len(np.unique(x_expand))))

plt.subplot(211)
plt.plot(range(len(x)), x, 'b')
plt.subplot(212)
plt.plot(range(len(x_expand)), x_expand, 'b')

### encode & decode

In [None]:
x, _, _ = ad._load_audio_from_wav(filelist[0])
print('x: span: {}, # unique: {}'.format(
    (np.min(x), np.max(x)), len(np.unique(x))))

x_enc = ad.encoder.encode(x)
print('x encoded: span: {}, # unique: {}'.format(
    (np.min(x_enc), np.max(x_enc)), len(np.unique(x_enc))))

x_dec = ad.encoder.decode(x_enc)
print('x decoded: span: {}, # unique: {}'.format(
    (np.min(x_dec), np.max(x_dec)), len(np.unique(x_dec))))

plt.subplot(311)
plt.plot(range(len(x)), x, 'b')
plt.subplot(312)
plt.plot(range(len(x_enc)), x_enc, 'b')
plt.subplot(313)
plt.plot(range(len(x_dec)), x_dec, 'b')

# Model

In [None]:
import os
import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt

from wavenet.wn.audiodata import AudioData, AudioLoader
from wavenet.wn.models import Model

%matplotlib inline

x_len = 2**10
num_classes = 256
num_layers = 9
num_blocks = 2
num_hidden = 128
kernel_size = 2
learn_rate = 0.001
step_size = 50
gamma = 0.5
batch_size = 8
num_workers = 1
num_epochs = 10

filelist = ['./audio/classical.wav']
dataset = AudioData(filelist, x_len, num_classes=num_classes, 
                    store_tracks=True)
dataloader = AudioLoader(dataset, batch_size=batch_size, 
                         num_workers=num_workers)
wave_model = Model(x_len, num_channels=1, num_classes=num_classes, 
                   num_blocks=num_blocks, num_layers=num_layers,
                   num_hidden=num_hidden, kernel_size=kernel_size)

### set_device

In [None]:
print('Original device: {}'.format(wave_model.device))

wave_model.set_device(torch.device('cpu'))
print('Manually set device: {}'.format(wave_model.device))

wave_model.set_device()
print('Auto set device: {}'.format(wave_model.device))

### train

In [None]:
wave_model.criterion = nn.CrossEntropyLoss()
wave_model.optimizer = optim.Adam(wave_model.parameters(), 
                                  lr=learn_rate)
wave_model.scheduler = optim.lr_scheduler.StepLR(wave_model.optimizer, 
                                                 step_size=step_size, 
                                                 gamma=gamma)

wave_model.train(dataloader, num_epochs=num_epochs, disp_interval=1)

# Generator

In [None]:
import os
import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt

from wavenet.wn.audiodata import AudioData, AudioLoader
from wavenet.wn.models import Model, Generator

%matplotlib inline

x_len = 2**8
num_classes = 256
num_layers = 7
num_blocks = 2
num_hidden = 128
kernel_size = 2
learn_rate = 0.001
step_size = 50
gamma = 0.5
batch_size = 8
num_workers = 1
num_epochs = 10
model_file = 'model-7.pt'

filelist = ['./audio/classical.wav']
dataset = AudioData(filelist, x_len, num_classes=num_classes, 
                    store_tracks=True)
dataloader = AudioLoader(dataset, batch_size=batch_size, 
                         num_workers=num_workers)
wave_model = Model(x_len, num_channels=1, num_classes=num_classes, 
                   num_blocks=num_blocks, num_layers=num_layers,
                   num_hidden=num_hidden, kernel_size=kernel_size)

if os.path.isfile(model_file):
    wave_model.load_state_dict(torch.load(model_file))
else:
    wave_model.criterion = nn.CrossEntropyLoss()
    wave_model.optimizer = optim.Adam(wave_model.parameters(), 
                                      lr=learn_rate)
    wave_model.scheduler = optim.lr_scheduler.StepLR(wave_model.optimizer, 
                                                     step_size=step_size, 
                                                     gamma=gamma)

    wave_model.set_device(torch.device('cpu'))
    wave_model.train(dataloader, num_epochs=num_epochs, disp_interval=1)
    torch.save(wave_model.state_dict(), model_file)

wave_generator = Generator(wave_model, dataset)

### _shift_insert

In [None]:
n_samples = 3
x = np.linspace(-2**15, 2**15, 6)
print('x:')
print('\tspan: {}, # unique: {}'.format(
    (np.min(x), np.max(x)), len(np.unique(x))))

x = dataset._to_tensor(dataset.preprocess(x))
x = torch.unsqueeze(x, 0)
print('\ttype: {}, shape: {}'.format(type(x), x.shape))
print('\tdata: {}'.format(x.data))

x_s = wave_generator._shift_insert(x, np.array([0]))
print('x shifted:')
print('\ttype: {}, shape: {}'.format(type(x_s), x_s.shape))
print('\tdata: {}'.format(x_s.data))

### tensor2numpy

In [None]:
x, _, _ = dataset._load_audio_from_wav(filelist[0])
x = dataset._to_tensor(dataset.preprocess(x))
x = torch.unsqueeze(x, 0)
print('x: type: {}, shape: {}'.format(type(x), x.shape))

x_np = wave_generator.tensor2numpy(x)
print('x numpy: type: {}, shape: {}'.format(type(x_np), x_np.shape))

### predict

In [None]:
x, _, _ = dataset._load_audio_from_wav(filelist[0])
x = dataset._to_tensor(dataset.preprocess(x))
x = torch.unsqueeze(x, 0)

y = wave_generator.predict(x)
print('y: type: {}, shape: {}'.format(type(y), y.shape))

### run

In [None]:
num_samples = 100
start_idx = 10000

audio, _, _ = dataset._load_audio_from_wav(filelist[0])
x = audio[start_idx:start_idx + x_len]
x_samp = audio[start_idx:start_idx + x_len + num_samples]

y = wave_generator.run(x, num_samples, disp_interval=10)
print('y:')
print('\ntype: {}, shape: {}'.format(type(y), y.shape))
print('\nspan: {}, # unique: {}'.format(
    (np.min(y), np.max(y)), len(np.unique(y))))

plt.plot(range(x_len + num_samples), x_samp, 'b')
plt.plot(range(x_len, x_len + num_samples), y, 'r')