# Problem 1

In [1]:

import torch
from torch.utils.data import Dataset, DataLoader
from  torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline 
import librosa
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


cuda:0


## Creating a DataSet class for the Data

In [2]:
class AudioData(Dataset):
    def __init__(self, path):
        #download read data  
        sn, sr=librosa.load(path + r'\data\train_dirty_male.wav', sr=None)
        X = librosa.stft(sn, n_fft=1024, hop_length=512)
        absX = np.abs(X).T.reshape(-1,1,513)
        self.x_data =torch.tensor(absX)
        

        s, sr=librosa.load(path + r'\data\train_clean_male.wav', sr=None)
        S = librosa.stft(s, n_fft=1024, hop_length=512)
        absS = np.abs(S).T.reshape(-1,513)
        self.y_data = torch.tensor(absS)

        
        self.len = self.x_data.shape[0]
        
        self.clean = librosa.istft(S,hop_length=512)
        self.dirty = (X/np.abs(X))  
    
    
        self.snr = 10 * np.log10(np.sum(np.square(s)) / np.sum(np.square(s-sn)))
    def __getitem__(self, index):
        #return one item on the index
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        #return data length
        return self.len

## Loading audio train data

In [3]:
path = r'C:\Users\Nick\Documents\GitHub\DeepLearningSystems\Module 2'
data = AudioData(path)
print(f'The Original SNR is {data.snr}')
train_loader = DataLoader(dataset = data, batch_size = 128, shuffle = True)

The Original SNR is 6.509321928024292


## Building a four layer neural network

In [4]:
class CNN(nn.Module):
    def __init__ (self):
        super(CNN, self).__init__()
        #no stride 510, non pool 57
        self.l1 = nn.Conv1d (1,3,3)
        self.l2 = nn.Conv1d (3,9,3)
        self.l3 = nn.Linear (1134,700)
        self.l4 = nn.Linear (700,513)
        
        self.p1 = nn.MaxPool1d(3, 2)
        self.p2 = nn.MaxPool1d(3, 2)
        
        self.b1 = nn.BatchNorm1d(3)
        self.b2 = nn.BatchNorm1d(9)
        self.b3 = nn.BatchNorm1d(700)
        
        
        self.d1 = nn.Dropout(.20)
        self.d2 = nn.Dropout(.10)

        
    def forward(self,x):
        out1 = self.d1(self.b1(self.p1(F.relu(self.l1(x)))))
        out2 = self.b2(self.p2(F.relu(self.l2(out1))))
        out2 = self.d2(out2.view(-1, out2.shape[1]*out2.shape[2]))
        out3 = self.b3(F.relu(self.l3(out2)))
        out3 = F.relu(self.l4(out3))
        return out1, out2, out3
  

In [5]:
if False:
    tst3 = nn.Conv1d (1,3,3)
    tst4 = nn.MaxPool1d(3, 2)
    tst5 = nn.Conv1d (3,9,3)
    tst6 = nn.MaxPool1d(3,2)
    dat = data.x_data
    meme = tst6(tst5(tst4(tst3(dat))))
    print(meme.shape[1] * meme.shape[2])      

## Setting the optimizor to Adam and the Loss to MSE


In [6]:
net2 = CNN()
print(net2.parameters)
optA = optim.Adam(net2.parameters(), lr = 0.001)
critA = nn.MSELoss()
epochs = 501
printA = 50

<bound method Module.parameters of CNN(
  (l1): Conv1d(1, 3, kernel_size=(3,), stride=(1,))
  (l2): Conv1d(3, 9, kernel_size=(3,), stride=(1,))
  (l3): Linear(in_features=1134, out_features=700, bias=True)
  (l4): Linear(in_features=700, out_features=513, bias=True)
  (p1): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (p2): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (b1): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (b2): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (b3): BatchNorm1d(700, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (d1): Dropout(p=0.2, inplace=False)
  (d2): Dropout(p=0.1, inplace=False)
)>


## Training over 500 epochs and printing every 50 itterations

In [7]:
for epoch in range(epochs):
    lossL = []
    for i,dat in enumerate(train_loader):
        inputs, labels = dat
        inputs, labels = Variable(inputs), Variable(labels)
        optA.zero_grad()
        out1,out2,net_out = net2(inputs)
        loss = critA(net_out, labels)
        lossL.append(loss.item())
        loss.backward()
        optA.step()
    if (epoch%printA) == 0:
        ntst = data.dirty
        lib = data.clean
        out1,out2,net_out = net2(data.x_data)
        ntst2= np.multiply(ntst, net_out.detach().numpy().T)
        ntest3 = librosa.istft(ntst2,hop_length=512)
        SNR = 10 * np.log10(np.sum(np.square(lib)) / np.sum(np.square(lib-ntest3)))
        print(f'The Loss = {np.mean(lossL):0.5f} on Epoch {epoch} and the SNR is {SNR:0.2f}')
        

The Loss = 0.09612 on Epoch 0 and the SNR is 3.63
The Loss = 0.01224 on Epoch 50 and the SNR is 10.09
The Loss = 0.01016 on Epoch 100 and the SNR is 10.93
The Loss = 0.00838 on Epoch 150 and the SNR is 11.48
The Loss = 0.00710 on Epoch 200 and the SNR is 11.86
The Loss = 0.00669 on Epoch 250 and the SNR is 11.91
The Loss = 0.00643 on Epoch 300 and the SNR is 11.81
The Loss = 0.00613 on Epoch 350 and the SNR is 11.25
The Loss = 0.00529 on Epoch 400 and the SNR is 10.92
The Loss = 0.00550 on Epoch 450 and the SNR is 11.23
The Loss = 0.00473 on Epoch 500 and the SNR is 11.07


## Testintg Model On Test Data

In [8]:
st, sr=librosa.load(path + r'\data\test_x_01.wav', sr=None)
Xt=librosa.stft(st, n_fft=1024, hop_length=512)
absXt =np.abs(Xt).T.reshape(-1,1,513)
absXt.shape
tdata = Variable(torch.tensor(absXt))
out1, out2, net_out = net2(tdata)
tst = (Xt/np.abs(Xt))
tst2= np.multiply(tst, net_out.detach().numpy().reshape(142,513).T)
sh_test = librosa.istft(tst2,hop_length=512)
librosa.output.write_wav(path + r'\data\test_s_01_recons.wav', sh_test, sr)

In [9]:
st, sr=librosa.load(path + r'\data\test_x_02.wav', sr=None)
Xt=librosa.stft(st, n_fft=1024, hop_length=512)
absXt =np.abs(Xt).T.reshape(-1,1,513)
absXt.shape
tdata = Variable(torch.tensor(absXt))
out1, out2, net_out = net2(tdata)
tst = (Xt/np.abs(Xt))
tst2= np.multiply(tst, net_out.detach().numpy().reshape(380,513).T)
sh_test = librosa.istft(tst2,hop_length=512)
librosa.output.write_wav(path + r'\data\test_s_02_recons.wav', sh_test, sr)

# Problem 2

In [10]:

import torch
from torch.utils.data import Dataset, DataLoader
from  torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline 
import librosa
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
    

cuda:0


## Loading Data & Converting to 2440,1,20,513

In [11]:
path = r'C:\Users\Nick\Documents\GitHub\DeepLearningSystems\Module 2'
sn, sr=librosa.load(path + r'\data\train_dirty_male.wav', sr=None)
X = librosa.stft(sn, n_fft=1024, hop_length=512)
dirty = (X/np.abs(X))
absX = np.abs(X).T
x_data =torch.tensor(absX)


newx = np.empty((2440,20,513))  
for i in range(2440):
    newx[i] = x_data[i:20+i]
newx = torch.tensor(newx.reshape((-1,1,20,513))).to(torch.device("cuda:0"))
print(newx.shape)

torch.Size([2440, 1, 20, 513])


In [12]:
s, sr=librosa.load(path + r'\data\train_clean_male.wav', sr=None)
S = librosa.stft(s, n_fft=1024, hop_length=512)
clean = librosa.istft(S,hop_length=512)
absS = np.abs(S).T
y_data = torch.tensor(absS)
newy = np.empty((2440,513))  
for i in range(19,2459):
    newy[i-19] = y_data[i]
newy = torch.tensor(newy).to(torch.device("cuda:0"))
print(newy.shape)    

torch.Size([2440, 513])


In [13]:
print(f' The new x shape is {newx.shape} and the new y shape is {newy.shape}')

 The new x shape is torch.Size([2440, 1, 20, 513]) and the new y shape is torch.Size([2440, 513])


In [14]:
#BatchNormAfterActivation
class CNN2(nn.Module):
    def __init__ (self):
        super(CNN2, self).__init__()
        self.l1 = nn.Conv2d (1,3,1)
        self.l2 = nn.Conv2d (3,6,3)
        
        self.l3 = nn.Linear (1008,1000)
        self.l4 = nn.Linear (1000,513)
    
        self.p1 = nn.MaxPool2d(3, 2)
        self.p2 = nn.MaxPool2d(3, 3)
        
        self.b1 = nn.BatchNorm2d(3)
        self.b2 = nn.BatchNorm2d(6)
        self.b3 = nn.BatchNorm1d(1000)
        
        
        self.d1 = nn.Dropout2d(.20)
        self.d2 = nn.Dropout2d(.10)

    
    def forward(self,x):
        out1 = self.d1(self.b1(self.p1(F.relu(self.l1(x)))))
        
        out2 = self.d2(self.b2(self.p2(F.relu(self.l2(out1)))))
        out2 = out2.view(-1, out2.shape[1]*out2.shape[2] *  out2.shape[3])

        out3 = self.b3(F.relu(self.l3(out2)))
        out4 = F.relu(self.l4(out3))
        return out1, out2, out4
  

In [15]:
if False:
    tst3 = nn.Conv2d (1,3,1).double().cuda()
    tst4 = nn.MaxPool2d(3, 2)
    tst5 = nn.Conv2d (3,6,3).double().cuda()
    tst6 = nn.MaxPool2d(3,3)
    meme = tst6(tst5(tst4(tst3(newx))))
    print(meme.shape[1] * meme.shape[2] * meme.shape[3] ) 
    del meme, tst3, tst4, tst5, tst6
    torch.cuda.empty_cache()

## Setting Model Optimizor, Loss , & Epochs

In [16]:
net3 = CNN2().double()
net3.to(torch.device("cuda:0"))
print(net3.parameters)
optA3 = optim.Adam(net3.parameters(), lr = 0.001)
critA3 = nn.MSELoss()

<bound method Module.parameters of CNN2(
  (l1): Conv2d(1, 3, kernel_size=(1, 1), stride=(1, 1))
  (l2): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
  (l3): Linear(in_features=1008, out_features=1000, bias=True)
  (l4): Linear(in_features=1000, out_features=513, bias=True)
  (p1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (p2): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (b1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (b2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (b3): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (d1): Dropout2d(p=0.2, inplace=False)
  (d2): Dropout2d(p=0.1, inplace=False)
)>


## Training Conv2D Model Printing Loss & SNR

In [17]:
%%time
srt = sr
totEpoch = 801
tt = (np.random.rand(19*513)/100).reshape((19,513))
for epoch in range(totEpoch):
    #inputs, labels = torch.tensor(newx.reshape((-1,1,20,513))), torch.tensor(newy)
    inputs, labels = Variable(newx), Variable(newy)
    optA3.zero_grad()
    out1,out2,net_out = net3(inputs)
    loss = critA3(net_out, labels)
    loss.backward()
    optA3.step()
    if epoch%100 == 0:
        huh = np.append(tt,net_out.detach().cpu().numpy()).reshape(2459,513)
        ntst2= np.multiply(dirty, huh.T)
        ntest3 = librosa.istft(ntst2,hop_length=512)
        SNR = 10 * np.log10(np.sum(np.square(clean)) / np.sum(np.square(clean-ntest3)))
        print(f'The Loss = {loss:0.5f} at Epoch {epoch} and the SNR is {SNR:0.2f}')
        if epoch==totEpoch -1:
            librosa.output.write_wav(path + r'\data\male_clean(2d)).wav', ntest3 , srt)
        del huh, ntst2, ntest3, SNR
        torch.cuda.empty_cache()

del inputs, labels, net_out
torch.cuda.empty_cache()

The Loss = 0.23861 at Epoch 0 and the SNR is -2.64
The Loss = 0.04644 at Epoch 100 and the SNR is 3.84
The Loss = 0.02051 at Epoch 200 and the SNR is 7.49
The Loss = 0.01294 at Epoch 300 and the SNR is 9.35
The Loss = 0.01005 at Epoch 400 and the SNR is 10.33
The Loss = 0.00933 at Epoch 500 and the SNR is 10.64
The Loss = 0.00789 at Epoch 600 and the SNR is 11.24
The Loss = 0.00743 at Epoch 700 and the SNR is 11.52
The Loss = 0.00647 at Epoch 800 and the SNR is 12.02
Wall time: 10min 52s


## Testing Model on Test Data

In [18]:
fname = r'\test_x_01'
sn, sr=librosa.load(path + r'\data' + fname + '.wav', sr=None)
Xt = librosa.stft(sn, n_fft=1024, hop_length=512)


absXt = np.abs(Xt).T
xt_data =torch.tensor(absXt)
newxt = np.empty((123,20,513))  
for i in range(123):
    newxt[i] = xt_data[i:20+i]
newxt = torch.tensor(newxt.reshape((-1,1,20,513))).to(torch.device("cuda:0"))
out1, out2, net_out2 = net3(newxt)

dirtyt = (Xt/np.abs(Xt))
tt = (np.random.rand(19*513)/100).reshape((19,513))

huh2 = np.append(tt,net_out2.detach().cpu().numpy()).reshape(142,513)
tst2t = np.multiply(dirtyt,huh2.T)
sh_testt = librosa.istft(tst2t,hop_length=512)
librosa.output.write_wav(path + r'\data' + fname + '_recons(2d).wav', sh_testt, sr)
del sn, sr, Xt, absXt, xt_data, newxt, out1, out2, net_out2, dirtyt, tt, huh2, tst2t, sh_testt
torch.cuda.empty_cache()

In [19]:
fname = r'\test_x_02'
sn, sr=librosa.load(path + r'\data' + fname + '.wav', sr=None)
Xt = librosa.stft(sn, n_fft=1024, hop_length=512)
absXt = np.abs(Xt).T
xt_data =torch.tensor(absXt)
newxt = np.empty((361,20,513))  
for i in range(361):
    newxt[i] = xt_data[i:20+i]
newxt = torch.tensor(newxt.reshape((-1,1,20,513))).to(torch.device("cuda:0"))
out1, out2, net_out2 = net3(newxt)

dirtyt = (Xt/np.abs(Xt))
tt = (np.random.rand(19*513)/100).reshape((19,513))

huh2 = np.append(tt,net_out2.detach().cpu().numpy()).reshape(380,513)
tst2t = np.multiply(dirtyt,huh2.T)
sh_testt = librosa.istft(tst2t,hop_length=512)
librosa.output.write_wav(path + r'\data' + fname + '_recons(2d).wav', sh_testt, sr)

del sn, sr, Xt, absXt, xt_data, newxt, out1, out2, net_out2, dirtyt, tt, huh2, tst2t, sh_testt
torch.cuda.empty_cache()