In [1]:
import glob
# !pip install librosa # in colab, you’ll need to install this
import librosa
import numpy as np
import torchvision
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import pandas as pd
from random import sample
import matplotlib.pyplot as plt
import soundfile
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
print(device)

cuda:0


In [None]:
path = '/nobackup/potem/timit-homework/tr'
save_path = '/nobackup/potem/dl_data'
val_path = '/nobackup/potem/timit-homework/v'
test_path = '/nobackup/potem/timit-homework/te'

def read_file(file_path):
    s, sr = librosa.load(file_path, sr=None)
    S = librosa.stft(s, n_fft=1024, hop_length=512)
    S_abs = np.abs(S)
    
    return torch.from_numpy(S.T), torch.from_numpy(S_abs.T), sr

def load_files(path=path, types = ['trn','trx', 'trs']):
    #noise, mixed, source

    #Everything padded to 150
    S_shape = (150, 513)
    
    for t in types:
        temp_data = []
        temp_data_abs = []
        snr = []
        for filename in glob.glob(f'{path}/{t}*.wav'):
            S, S_abs, sr = read_file(filename)
            
            #pad = (padding_left, padding_right, padding_top, padding_bottom)
            S = F.pad(S, 
                      pad=(0, 0, 0, S_shape[0] - S.shape[0]))
            S_abs = F.pad(S_abs, 
                      pad=(0, 0, 0, S_shape[0] - S_abs.shape[0]))
            
#             S = S.narrow(0, 0, S_shape[0])
#             S_abs = S_abs.narrow(0, 0, S_shape[0])
#             print(sr)
#             break

            temp_data.append(S)
            temp_data_abs.append(S_abs)
            snr.append(torch.tensor(sr))
            
        temp_data = torch.stack(temp_data)
        temp_data_abs = torch.stack(temp_data_abs)
        snr = torch.stack(snr)
       
        torch.save(temp_data, f'{save_path}/{t}_tensor.pt') 
        torch.save(temp_data_abs, f'{save_path}/{t}_abs_tensor.pt') 
        torch.save(snr, f'{save_path}/{t}_snr_tensor.pt') 
        
        
# data = load_files(path)
# data = load_files(test_path, ['te'])
# data = load_files(val_path, ['vs', 'vx','vn'])

In [6]:
path = '/N/slate/potem/dl_data/dl_data'

In [33]:
def IBM(source, noise):
    return (source > noise).float()

def get_data(path, types = ['vs', 'vx','vn']):
    data = {}
    for x in types:
        data[x] = None
        data[x + '_abs'] = None
        
    for x in types:
        data[x] = torch.load(f'{path}/{x}_tensor.pt')
        data[x + '_abs'] = torch.load(f'{path}/{x}_abs_tensor.pt')

        snr_path = f'{path}/{x}_snr_tensor.pt'
        
        if os.path.isfile(snr_path):
            data[x + '_snr'] = torch.load(snr_path)

    return data



class PrepareData(Dataset):
    def __init__(self, X, y):
        super(PrepareData, self).__init__()
        self.X = X
        self.y = y
        
    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    
def create_dataloader(x, y, batchsize):
    data = PrepareData(x, y)
   
    return DataLoader(dataset=data, shuffle=False, 
                      batch_size=batchsize)
    

    
class RNN(nn.Module):
    def __init__(self, parameters):
        super(RNN, self).__init__()
        
        in_dim = parameters['in_dim']
        out_dim = parameters['out_dim']
        
        self.type_init = parameters['type_init']
        self.activation = parameters['activation']
        self.dropout = parameters['dropout']
        
        self.lstm = nn.LSTM(input_size=parameters['in_dim'],
                            hidden_size=parameters['hidden_dim'],
                            num_layers=parameters['num_layers'],
                            batch_first=True,
                            # dropout=parameters['dropout'],
                           )
        
        if self.type_init == 'xavier':
            for name, param in self.lstm.named_parameters():
                if 'bias' in name:
                     nn.init.constant(param, 0.0)
                elif 'weight' in name:
                     nn.init.xavier_normal(param)

    def forward(self, x):
        x = self.lstm(x)

        return x

    
    

def loss_snr(dataloader, loss_fn, model):
    total_loss = 0
    
    model.eval()
    model.to(device)
    
    with torch.no_grad():
        sum_sq = torch.tensor(0.)
        diff_sum = torch.tensor(0.)
        total_loss = 0
        
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            sum_sq = sum_sq + (y * y).sum()
            pred = model(X)
            loss = loss_fn(pred[0], y).item()
            
            total_loss = total_loss + loss
            diff = torch.square(y - pred[0]).sum()
            diff_sum = diff_sum + diff
    
    total_loss = total_loss / len(dataloader)
    
    #adding 10**20 gave overflow when unpacking
    snr = (10 * torch.log10((sum_sq / diff_sum))).item()
           
    return total_loss, snr
    
    
    
def train_model(model, dataloader, val_dataloader, 
          loss_fn, optimizer):
    
    model.to(device)
    model.train()
    
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
 
        # Compute prediction error
        pred = model(X)
        
#         print('X :', X.shape)
#         print('y :', y.shape)
#         print('pred :', pred[0].shape)
#         break

        loss = loss_fn(pred[0], y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    loss_train, snr_train = loss_snr(dataloader, loss_fn, model)
    loss_val, snr_val = loss_snr(val_dataloader, loss_fn, model)
 
    return [loss_train, snr_train, loss_val, snr_val]

def reconstruct(X, X_abs, S_abs, sr, filename):
    S_recons = (X.numpy() / X_abs.numpy()) * S_abs
    S_recons_sound = librosa.istft(S_recons, 
                                   hop_length=512, )
    soundfile.write(filename, S_recons_sound, sr)

def rnn_process(parameters):
    train = get_data(path, ['trn','trx', 'trs'])
    val = get_data(path, ['vs', 'vx','vn'])
    # M_train = IBM(train['trs_abs'], train['trn_abs'])
    # M_val = IBM(val['vs_abs'], val['vn_abs'])

    train_dataloader = create_dataloader(train['trx_abs'], 
                                         train['trs_abs'], 
                                         parameters['batchsize'])
    val_dataloader = create_dataloader(val['vx_abs'], 
                                      val['vs_abs'], 
                                      parameters['batchsize'])
    
    print('Train size :', len(train_dataloader))
    print('Val size :', len(val_dataloader))
    # print('Dataset ', 

    model = RNN(parameters)
    filename = 'hw3_qns3_loss.pkl.gz'
        
    print(model)
       
    model.train()
    loss_fn = nn.MSELoss()
    optimizer_adam = torch.optim.Adam(model.parameters(), 
                                      lr=parameters['lr_rate'])
           
    #train_loss, train_snr, val_loss, val_snr
    all_loss = []
    for t in range(parameters['epoch']):
        loss = train_model(model, train_dataloader, 
                     val_dataloader, loss_fn, 
                     optimizer_adam)
       
        all_loss.append(loss)
        
        if t % 10 == 0:
            print('Epoch :', t)
            print('Train loss : ', loss[0])
            print('Train snr : ', loss[1])
            print('Val loss : ', loss[2])
            print('Val snr : ', loss[3])

    (pd.DataFrame(data=all_loss, 
                  columns=['train_loss', 'train_snr', 'val_loss', 'val_snr'])
     .to_pickle(filename)
    )
        
    torch.save(model.state_dict(), f'./hw_3_RNN.model')
    
    test = get_data(path, ['te'])
    x_abs = test['te_abs']
    x_original = test['te']
    save_path = parameters['pred_path']
    snr = test['te_snr']
    
    for index in range(len(test['te_abs'])):
        T = x_original[index]
        T_abs = x_abs[index]
        T_unsq = torch.unsqueeze(T_abs, dim=0)
        T_unsq = T_unsq.to(device)
        
        pred_t = model(T_unsq)
        pred_t = torch.squeeze(pred_t[0], dim=0)
        
        pred_t_abs = pred_t.detach().cpu().data.numpy()

        filename = f'{save_path}/{index}_pred.wav'
        
        reconstruct(T.T, T_abs.T, pred_t_abs.T, 
                    snr[index], filename)

    
rnn_parameters = {
    "in_dim": 513,
    "out_dim": 513,
    "hidden_dim": 513,
    "conv1d_in_dim": 118,
    "conv1d_out_dim": 513,
    "batchsize": 10,
    "dropout": 0.01,
    'num_layers': 1,
    "activation": "ReLU",
    "type_init": "xavier",
    "lr_rate": 0.01,
    "epoch": 100,
    "in_channel": 1,
    "out_channel": 1,
    "kernel_size":10,
    "conv2d":{
        "in_channel": 1,
        "out_channel": 3,
        "kernel_size":(2, 2),
        "stride": 3,
        "activation": "ReLU",
        "type_init": "xavier",
        "pool_kernel_size": (2,2),
        "pool_stride": 3,
        "in_dim": 95,
        "out_dim": 513,
        "dropout": None,
        "2nd_kernel_size": (2,2),
        "2nd_in_channel": 3,
        "2nd_out_channel":5
    },
    "pred_path": '/N/slate/potem/dl_data/dl_data/pred',
    "stride": 2,
    "2d_stride": 2,
    "pool_kernel_size": 5,
    "pool_stride": 1,
}


rnn_process(rnn_parameters)

### SNR values

In [35]:
print('\n\n---- Hw3 : Qns 3 -------------------- \n')

df = pd.read_pickle('hw3_qns3_loss.pkl.gz')
df = df.sort_values(by=['val_snr'], ascending =False)
print(df.head())




---- Hw3 : Qns 3 -------------------- 

    train_loss  train_snr  val_loss   val_snr
15    0.016419   0.710551  0.016751  0.623522
10    0.016528   0.681877  0.016766  0.619776
6     0.016580   0.668182  0.016769  0.618804
19    0.016357   0.726820  0.016771  0.618320
12    0.016506   0.687663  0.016774  0.617706
