In [1]:
import os
import time
import numpy as np
import importlib
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import dataset

In [2]:
importlib.reload(dataset)

<module 'dataset' from '/home/pakumar/teams/ece251c-team-11/dataset.py'>

In [4]:
# define device type - cuda:0 or cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
kwargs = {'num_workers': 4, 'pin_memory': False} if device.type == "cuda" else {}

# Additional Info when using cuda
if device.type == 'cuda':
    print("Number of GPU devices:", torch.cuda.device_count())
    print("GPU device name:", torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 3), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3, 3), 'GB')

Number of GPU devices: 1
GPU device name: GeForce RTX 2080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [4]:
NUM_EPOCHS = 1
TRAIN_BATCH_SIZE = 4
TEST_BATCH_SIZE = 1

In [6]:
class CausalConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv_layer = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(3, 2),
            stride=(2, 1),
            padding=(0, 1)
        )
        
        self.norm = nn.BatchNorm2d(num_features=out_channels)
        self.activation = nn.ReLU()

    def forward(self, x):
        """
        2D Causal convolution.
        Args:
            x: [batch_size, num_channels, F, T]
        Returns:
            [B, C, F, T]
        """
        x = self.conv_layer(x)
        x = x[:, :, :, :-1]  # chomp size
        x = self.norm(x)
        x = self.activation(x)
        
        return x

In [7]:
class CausalTransConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, is_last=False, output_padding=(0, 0)):
        super().__init__()
        
        self.conv = nn.ConvTranspose2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(3, 2),
            stride=(2, 1),
            output_padding=output_padding
        
        )
        self.norm = nn.BatchNorm2d(num_features=out_channels)
        if is_last:
            self.activation = nn.ReLU()
        else:
            self.activation = nn.ELU()

    def forward(self, x):
        """
        2D Causal convolution.
        Args:
            x: [B, C, F, T]
        Returns:
            [B, C, F, T]
        """
        x = self.conv(x)
        x = x[:, :, :, :-1]  # chomp size
        x = self.norm(x)
        x = self.activation(x)
        
        return x

In [8]:
class ConvRecNet(nn.Module):
    """
    Input: [batch size, channels=1, T, n_fft]
    Output: [batch size, T, n_fft]
    """
    
    def __init__(self):
        super(ConvRecNet, self).__init__()
        
        # Encoder
        self.conv_block_1 = CausalConvBlock(1, 16)
        self.conv_block_2 = CausalConvBlock(16, 32)
        self.conv_block_3 = CausalConvBlock(32, 64)
        self.conv_block_4 = CausalConvBlock(64, 128)
        self.conv_block_5 = CausalConvBlock(128, 256)
        
        # LSTM
        self.lstm_layer = nn.LSTM(input_size=1024, hidden_size=1024, num_layers=2, batch_first=True)
        
        self.tran_conv_block_1 = CausalTransConvBlock(256 + 256, 128)
        self.tran_conv_block_2 = CausalTransConvBlock(128 + 128, 64)
        self.tran_conv_block_3 = CausalTransConvBlock(64 + 64, 32)
        self.tran_conv_block_4 = CausalTransConvBlock(32 + 32, 16, output_padding=(1, 0))
        self.tran_conv_block_5 = CausalTransConvBlock(16 + 16, 1, is_last=True)

        
    def forward(self, x):
        self.lstm_layer.flatten_parameters()

        e_1 = self.conv_block_1(x)
        e_2 = self.conv_block_2(e_1)
        e_3 = self.conv_block_3(e_2)
        e_4 = self.conv_block_4(e_3)
        e_5 = self.conv_block_5(e_4)  # [2, 256, 4, 200]

        batch_size, n_channels, n_f_bins, n_frame_size = e_5.shape

        # [2, 256, 4, 200] = [2, 1024, 200] => [2, 200, 1024]
        lstm_in = e_5.reshape(batch_size, n_channels * n_f_bins, n_frame_size).permute(0, 2, 1)
        lstm_out, _ = self.lstm_layer(lstm_in)  # [2, 200, 1024]
        lstm_out = lstm_out.permute(0, 2, 1).reshape(batch_size, n_channels, n_f_bins, n_frame_size)  # [2, 256, 4, 200]

        d_1 = self.tran_conv_block_1(torch.cat((lstm_out, e_5), 1))
        d_2 = self.tran_conv_block_2(torch.cat((d_1, e_4), 1))
        d_3 = self.tran_conv_block_3(torch.cat((d_2, e_3), 1))
        d_4 = self.tran_conv_block_4(torch.cat((d_3, e_2), 1))
        d_5 = self.tran_conv_block_5(torch.cat((d_4, e_1), 1))

        return d_5

In [11]:
model = ConvRecNet()
a = torch.rand(2, 1, 161, 200)
print(model(a).shape)

torch.Size([2, 1, 161, 200])


In [12]:
# train_dataset = dataset.SpeechDataset('train')
# train_dataloader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, collate_fn=dataset.custom_collate_fn)
# print("Train dataset size:", len(train_dataloader))

# itr = next(iter(train_dataloader))

# # Print shape of spectrogram across a batch
# for k in range(len(itr[0])):
#     print(itr[0][k].shape, itr[1][k].shape)

In [6]:
test_dataset = dataset.SpeechDataset('test')
test_dataloader = DataLoader(test_dataset, batch_size=1, collate_fn=dataset.custom_collate_fn)
print("Test dataset size:", len(test_dataloader))

Test dataset size: 4259


In [None]:
MAX_LEN_ = []
MIN_LEN_ = []
for split in ['train', 'test', 'val']:
    dataset_ = dataset.SpeechDataset(split)
    dataloader_ = DataLoader(dataset_, batch_size=16, collate_fn=dataset.custom_collate_fn)
    print("%s dataset size: %d" %(split, len(dataloader_)))

    max_ = 0
    min_ = float("inf")
    for itr, (x_batch, y_batch) in tqdm(enumerate(dataloader_)):
        for k in range(len(x_batch)):
            max_ = max(max_, x_batch[k].size()[1])
            min_ = min(min_, x_batch[k].size()[1])
    MAX_LEN_.append(max_)
    MIN_LEN_.append(min_)

train dataset size: 3105


0it [00:00, ?it/s]

In [34]:
# understand dataloader
for i in range(3):
    clean_spec, noisy_spec = next(iter(test_dataloader))
    print(len(clean_spec), len(noisy_spec))  # (4, 4)

    
    print(clean_spec[0].shape, clean_spec[1].shape, clean_spec[2].shape, clean_spec[3].shape)
    print(noisy_spec[0].shape, noisy_spec[1].shape, noisy_spec[2].shape, noisy_spec[3].shape)
    print("\n")

2 2


IndexError: list index out of range

In [22]:
start_time = time.time()

for epoch in tqdm(range(NUM_EPOCHS)):
    for itr, (x_batch, y_batch) in enumerate(test_dataloader):
        print(itr)
        
#         x_batch = torch.tensor(x_batch)

#         print(x_batch.shape, y_batch.shape)
        print(x_batch[0].shape, x_batch[1].shape, x_batch[2].shape, x_batch[3].shape)

        if itr > 1:
            break

  0%|          | 0/1 [00:00<?, ?it/s]

0
torch.Size([1025, 127]) torch.Size([1025, 122]) torch.Size([1025, 20]) torch.Size([1025, 408])
1
torch.Size([1025, 149]) torch.Size([1025, 292]) torch.Size([1025, 186]) torch.Size([1025, 419])
2
torch.Size([1025, 289]) torch.Size([1025, 175]) torch.Size([1025, 111]) torch.Size([1025, 22])


In [16]:
print(len(itr[0]), len(itr[1]))
print(itr[0][0].shape)

4 4
torch.Size([1025, 111])
