In [82]:
import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import os
import numpy as np
import torch.nn as nn
import pandas as pd
import librosa
import librosa.display
from torch.utils.data import sampler
import torch.optim as optim
import json
import cv2

print(torch.__version__)
print(torchaudio.__version__)

1.11.0
0.11.0


In [83]:
class DAPSDatasetHelper():
    #initialization 
    def __init__(self):
        self.sample_rate=8000
        self.dir= "./daps_dict_unet2.json"
        openfile=open(self.dir, "r")
        self.dataset_dict= json.load(openfile)
        indx=2
        self.keys={}
        for key  in self.dataset_dict.keys():
            if(key=="produced"):
                self.keys[1]=key
            else:
                self.keys[indx]=key
                indx+=1

        self.num_files_per_category=len(self.dataset_dict["produced"].keys())

    #get the indexed file and sample rate
    def get_indxd_file(self,indx,isLabel=False):
        if(isLabel):
            category=self.keys[1]
        else:
            #category=self.keys[np.random.randint(2,len(self.keys))]
            #consider one category for now
            category=self.keys[8]
        data=cv2.imread(self.dataset_dict[category][str(indx)],0)
        data= cv2.normalize(data, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        Id= self.dataset_dict[category][str(indx)].split("/")[-1].split('.')[0]
        return (data,Id)

In [84]:
class DAPS(Dataset):
    def __init__(self):
        #super().__init__(self)
        self.daps= DAPSDatasetHelper()

    def __getitem__(self, index):
        data,id=self.daps.get_indxd_file(index)
        label,id=self.daps.get_indxd_file(index,True)
        return (data,label)

    def __len__(self):
        return (len(self.daps.keys)-1)*self.daps.num_files_per_category

In [85]:
dtype = torch.float32 # we will be using float throughout this tutorial
USE_GPU = True
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)

cuda


In [86]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [87]:
NUM_TRAIN=8000
NUM_VAL=1000
NUM_TEST=1000
print_every = 100
dataset_train = DAPS()
loader_train = DataLoader(dataset_train, batch_size=16, num_workers=0,
                          sampler=sampler.SequentialSampler(range(NUM_TRAIN)))
loader_val= DataLoader(dataset_train, batch_size=16, num_workers=0,
                          sampler=sampler.SequentialSampler(range(NUM_TRAIN,NUM_VAL+NUM_TRAIN)))
loader_test = DataLoader(dataset_train, batch_size=16, num_workers=0,
                          sampler=sampler.SequentialSampler(range(NUM_VAL+NUM_TRAIN, NUM_VAL+NUM_TRAIN+NUM_TEST)))

helper= DAPSDatasetHelper()


In [88]:
class block(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                                kernel_size=(3,3),stride=1,padding=1)
        #self.bnorm1=nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=(3,3),stride=1,padding=1)
        #self.bnorm2=nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        return x


class encoder(nn.Module):
    def __init__(self, c_list):
        super().__init__()
        self.pool = nn.MaxPool2d(kernel_size=(2,1),stride=(2,1))
        self.blocks = nn.ModuleList([block(c_list[i], c_list[i+1]) for i in range(len(c_list)-1)])

    def forward(self, x):
        filters = []
        for block in self.blocks:
            x = block(x)
            #print("conv output",x.shape)
            filters.append(x)
            x = self.pool(x)
            #print("pool output",x.shape)
        return filters


class decoder(nn.Module):
    def __init__(self, c_list):
        super().__init__()
        self.up_cov = nn.ModuleList([nn.ConvTranspose2d(c_list[i], c_list[i+1], kernel_size=(2,1), stride=(2,1))
                                                                    for i in range(len(c_list)-1)])
        self.conv_blocks = nn.ModuleList([block(c_list[i], c_list[i+1]) for i in range(len(c_list)-1)])
        

    def forward(self, x, features):    
        for i in range(len(self.conv_blocks)):
            x = self.up_cov[i](x)
           # print("up conv output",features[i].shape, x.shape)
            x = torch.cat([x, features[i]], dim=1)
            x = self.conv_blocks[i](x)
            #print("concat output",x.shape)
        return x

class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        c_list=[1, 16, 32, 64, 128]
        self.enc_blocks = encoder(c_list)
        self.dec_blocks = decoder(c_list[::-1][:-1])
        self.last_layer = nn.Conv2d(c_list[1], 1, kernel_size=1,stride=1,padding=0)

    def forward(self, x):
        filters = self.enc_blocks(x)
        x = self.dec_blocks(filters[-1], filters[::-1][1:])
        return self.last_layer(x)

In [89]:
#SNR loss where yhat is average power spectrum of de-nonlinearised/denoised signal/ clean signal power 
def Loss_SNR(yhat,y):
    yhat=yhat.cpu().detach().numpy()
    y=y.cpu().detach().numpy()
    loss= np.mean(10*np.log(np.abs(yhat-y)/y))
    loss=torch.from_numpy(loss)
    loss=loss.to(device=device,dtype=dtype)
    return loss

#SNR loss where yhat is average power spectrum of de-nonlinearised/denoised signal/ clean signal power 
def Loss_MSE(yhat,y,lossfn):
    loss=lossfn(yhat,y)
    return loss


In [94]:
def check_accuracy(loader, model):
    print('Checking accuracy on validation set')
    model.eval()  # set model to evaluation mode
    lossfn=nn.MSELoss()
    mse=0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=dtype)
            x=x.unsqueeze(1)
            y=y.unsqueeze(1) 
            yhat = model(x)
            mse+= lossfn(y,yhat)
        mse = mse / len(loader)
        print("Validation loss is", mse.cpu().numpy())

In [95]:
print_every=1
def train_model(model, optimizer ,epochs=1):
    """
    Train a model on DAPS.
    
    Inputs:
    - model_fn: A Python function that performs the forward pass of the model.
      It should have the signature scores = model_fn(x, params) where x is a
      PyTorch Tensor of image data, params is a list of PyTorch Tensors giving
      model weights, and scores is a PyTorch Tensor of shape (N, C) giving
      scores for the elements in x.
    - params: List of PyTorch Tensors giving weights for the model
    - learning_rate: Python scalar giving the learning rate to use for SGD
    
    Returns: Nothing
    """
    lossfn=nn.MSELoss()
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            
            #print(y.shape)          
            #iterate over each stft frame 
            x=x.to(device=device, dtype=dtype)
            y=y.to(device=device, dtype=dtype)

            x=x.unsqueeze(1)
            y=y.unsqueeze(1) 

            # Forward pass: compute scores and loss
            yhat = model(x)
            
            loss = Loss_MSE(yhat, y, lossfn)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()
            # Update parameters. We don't want to backpropagate through the

            

        if t % print_every == 0:
            print('Epoch %d, Iteration %d, loss = %.4f' % (e, t, loss.item()))
            #check_accuracy_part2(loader_val, model_fn, params)
            
        check_accuracy(loader_val,model)


In [96]:
learning_rate = 1e-4

model = UNet()
# you can use Nesterov momentum in optim.SGD
#optimizer = optim.SGD(model.parameters(), lr=learning_rate,
#                     momentum=0.9, nesterov=True)
# you can use Nesterov momentum in optim.SGD
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [97]:
train_model(model, optimizer,30)

Epoch 0, Iteration 499, loss = 0.0310
Checking accuracy on validation set


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.