# Deep Learning Assignment 1 - Problem 2
## Saber Sheybani
See the report file for documentation and discussion.

In [16]:
import os 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="5"

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt
import librosa
from librosa.display import specshow as specshow

In [17]:
hop_length=512
n_fft=1024
s, sr=librosa.load('train_clean_male.wav', sr=None)
S=librosa.stft(s, n_fft=n_fft, hop_length=hop_length)
sn, sr=librosa.load('train_dirty_male.wav', sr=None)
X=librosa.stft(sn, n_fft=n_fft, hop_length=hop_length)

## Problem 2 - Question 7

In [18]:
# Take the amplitude of the spectrogram and visualize it
S_mag = np.abs(S)
S_phase = np.angle(S)
X_mag = np.abs(X)
#specshow(librosa.amplitude_to_db(S,
#                                                ref=np.max),
#                        y_axis='log', x_axis='time')
#plt.title('Power spectrogram')
#plt.colorbar(format='%+2.0f dB')
#plt.tight_layout()

## Problem 2 - Question 8,9: Train a network

In [29]:
# We need a new network class that returns the output of all the layers, consuming more memory. 

class DenoiseNet(nn.Module):
    def __init__(self, nin, nout):
        super(DenoiseNet, self).__init__()
        self.fc1 = nn.Linear(nin, 512)
        self.fc2 = nn.Linear(512, 1024)
        self.fc3 = nn.Linear(1024, 1024)
        self.outl = nn.Linear(1024, nout)

    def forward(self, x):
        z = F.relu(self.fc1(x))
        z = F.leaky_relu(self.fc2(z))
        z = F.relu(self.fc3(z))
        y = self.outl(z)
        return F.relu(y)
        #return F.log_softmax(x,dim=0)

In [36]:
learning_rate=0.001
net = DenoiseNet(S_mag.shape[0], X_mag.shape[0])
optimizer = optim.Adam(net.parameters(), lr = learning_rate)
criterion = nn.MSELoss()


In [37]:
epochs=1
log_interval=10
maxIter=800

In [None]:
sp_data, target = Variable(torch.from_numpy(np.transpose(S_mag))), Variable(torch.from_numpy(np.transpose(X_mag)))
loss_orig = criterion(sp_data, target)
# run the main training loop
for epoch in range(epochs):
    for i in range(maxIter):
        optimizer.zero_grad()
        net_out = net(sp_data)
        loss = criterion(net_out, target)        
        loss.backward()
        optimizer.step()
        #pred = net_out.data.max(1)[1]  # get the index of the max log-probability
        #correct += (net_out.data-target.data).sum()
        if i % log_interval == 0:
            print("Iteration:", i, "Loss Change = ", loss.data/loss_orig.data)
            
loss_change = loss.data/loss_orig.data
print("New Error / Original Error = ", loss_change)

In [33]:
# Save the results in time-domain
S_hat = np.transpose(net_out.data.numpy())*np.exp(1j*S_phase)
s_hat = librosa.istft(S_hat, hop_length=hop_length)
librosa.output.write_wav('train_output1.wav', s_hat, sr=sr)

In [31]:
# Mini-batching

train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(S_mag), torch.from_numpy(X_mag))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
batch_size=32

epochs=3
log_interval=5
test_batch_size=1024
maxIter=1

sp_data, target = Variable(torch.from_numpy(S_mag)), Variable(torch.from_numpy(X_mag))
loss_orig = criterion(sp_data, target)

for epoch in range(epochs):
    for batch_idx, (sp_data, target) in enumerate(train_loader):
        sp_data, target = Variable(sp_data), Variable(target)
        optimizer.zero_grad()
        net_out = net(sp_data)
        loss = criterion(net_out, target)        
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print("Iteration:", i, "Loss Change = ", loss.data/loss_orig.data)
loss_change = loss.data/loss_orig.data
print("New Error / Original Error = ", loss_change)

Iteration: 368 Loss Change =  
 2.6173
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 9.4759
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 4.6225
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 5.0051
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 4.8757
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 2.3903
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 7.3473
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 1.2159
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 2.8141
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 5.0597
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 1.3054
[torch.FloatTensor of size 1]

Iteration: 368 Loss Change =  
 2.5931
[torch.FloatTensor of size 1]

New Error / Original Error =  
 2.5931
[torch.FloatTensor of size 1]



## Problem 2 - Question 10,11: Test 1

In [39]:
hop_length=512
n_fft=1024
s, sr=librosa.load('test_x_01.wav', sr=None)
S=librosa.stft(s, n_fft=n_fft, hop_length=hop_length)

S_mag = np.abs(S)
S_phase = np.angle(S)

sp_data = Variable(torch.from_numpy(np.transpose(S_mag)))
net_out = net(sp_data)

S_hat = np.transpose(net_out.data.numpy())*np.exp(1j*S_phase)
s_hat = librosa.istft(S_hat, hop_length=hop_length)
librosa.output.write_wav('test_s_01_recons.wav', s_hat, sr=sr)

## Problem 2 - Question 12: Test 2

In [40]:
hop_length=512
n_fft=1024
s, sr=librosa.load('test_x_02.wav', sr=None)
S=librosa.stft(s, n_fft=n_fft, hop_length=hop_length)

S_mag = np.abs(S)
S_phase = np.angle(S)

sp_data = Variable(torch.from_numpy(np.transpose(S_mag)))
net_out = net(sp_data)

S_hat = np.transpose(net_out.data.numpy())*np.exp(1j*S_phase)
s_hat = librosa.istft(S_hat, hop_length=hop_length)
librosa.output.write_wav('test_s_02_recons.wav', s_hat, sr=sr)