## Import Datasets

In [17]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
import torchvision
import torchsummary


In [2]:
PATH = "/data/datasets/hyundai"
ACC_PICKLE = os.path.join(PATH, "stationary_accel_data.pickle")
SND_PICKLE = os.path.join(PATH, "stationary_sound_data.pickle")

with open(ACC_PICKLE, "rb") as f:
    acc_list = pickle.load(f)

with open(SND_PICKLE, "rb") as f:
    snd_list = pickle.load(f)

## PreProcess Data

In [3]:
def slice_window(data, window_size, hop_len):
    #Slice data and concatenate them
    #return them as numpy
    windows = []
    data_len = data.shape[0]
    n_windows = int((data_len - window_size - hop_len) / hop_len)

    for i in range(n_windows):
        window = data[i * hop_len : (i * hop_len) + window_size]
        windows.append(window)
    
    return np.array(windows)

In [4]:
WINDOW_SIZE = 1500
HOP_LEN = 256

acc_data = np.concatenate(list(map(lambda x : slice_window(x, window_size=WINDOW_SIZE, hop_len=HOP_LEN), acc_list)), axis=0)
snd_data = np.concatenate(list(map(lambda x : slice_window(x, window_size=WINDOW_SIZE, hop_len=HOP_LEN), snd_list)), axis=0)

print("DATA LOADED ACC : {} SND : {}".format(acc_data.shape, snd_data.shape))

DATA LOADED ACC : (82023, 1500, 12) SND : (82023, 1500, 8)


In [11]:
np.save(os.path.join(PATH, "stationary_acc_win_1500_hop_256.npy"), acc_data)
np.save(os.path.join(PATH, "stationary_snd_win_1500_hop_256.npy"), snd_data)

In [5]:
class mu_law_encoder(nn.Module):
    def __init__(self, quantization_channels=256, rescale_factor=100):
        super().__init__()
        self.encoder = torchaudio.transforms.MuLawEncoding(quantization_channels=quantization_channels)
        self.rescale_factor = rescale_factor
    
    def forward(self, x):
        x = x / self.rescale_factor
        x = self.encoder(x)
        return x

class mu_law_decoder(nn.Module):
    def init(self, quantization_channels=256, rescale_factor=100):
        super().__init__()
        self.quantization_channels = quantization_channels
        self.rescale_factor = rescale_factor
        self.decoder = torchaudio.transforms.MuLawDecoding(quantization_channels=quantization_channels)
    
    def forward(self, x):
        x = self.decoder(x)
        x = x * self.rescale_factor
        return x

In [58]:
class Wavenet_Dataset(torch.utils.data.Dataset): 
  def __init__(self, x, y, receptive_field, transform=None):
    self.x_data = x
    self.y_data = y
    
    print("x shape : {}  y shape : {}".format(self.x_data.shape, self.y_data.shape))
    
    self.transform = transform
    self.receptive_field = receptive_field
    
    self.normalizer = torchvision.transforms.Normalize((0.5,), (0.5,))

  def __len__(self): 
    return len(self.x_data)

  def __getitem__(self, idx): 
    x = self.x_data[idx, :, :]
    y = self.y_data[idx, self.receptive_field:, :]

    if self.transform is not None:
        x = self.transform(x).float()
        y = self.transform(y)
    
    #x = self.normalizer(x) #normalize
    x /= 255.
    
    return x, y

In [62]:
BATCH = 16
EPOCH = 30

transform = torchvision.transforms.Compose([
                                    torchvision.transforms.ToTensor(),
                                    mu_law_encoder()
                                    ])
receptive_field = 1500 - 1474
dataset = Wavenet_Dataset(x=acc_data, y=snd_data, receptive_field=receptive_field, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=BATCH)

#Define Loss and optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.5, 0.999))
loss_fn = nn.CrossEntropyLoss()

x shape : (82023, 1500, 12)  y shape : (82023, 1500, 8)


In [63]:
x, y = next(iter(dataloader))
x.shape, y.shape

(torch.Size([16, 1, 1500, 12]), torch.Size([16, 1, 1474, 8]))

## Define Model

In [7]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [8]:
class ANC_base_model(nn.Module):
    def __init__(self, input_size=(1500, 12)):
        super().__init__()
        self.input_size = input_size
        self.time = self.input_size[0]
        self.input_chans = self.input_size[1]
        
        self.feature = nn.Sequential(
            nn.Conv1d(in_channels=self.input_chans, out_channels=256, kernel_size=7),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(in_channels=256, out_channels=512, kernel_size=7),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(in_channels=512, out_channels=512, kernel_size=3, dilation=2),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(in_channels=512, out_channels=512, kernel_size=3, dilation=2),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(in_channels=512, out_channels=512, kernel_size=3, dilation=2),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(in_channels=512, out_channels=8, kernel_size=3)
        )
        
        self.decision = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels = 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels = 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels = 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels = 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels = 256, kernel_size=3, padding=1)
        )
        
    
    def forward(self, x):
        x = self.feature(x)
        x = x.unsqueeze(1)
        x = self.decision(x)
        print(x.shape)
        return x

In [9]:
import models

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="2"

device = "cuda" if torch.cuda.is_available() else "cpu"

layers = 10
stacks = 1
model = ANC_base_model()

if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

model = model.to(device)
torchsummary.summary(model, (12, 1500))

torch.Size([2, 256, 8, 1474])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 256, 1494]          21,760
       BatchNorm1d-2            [-1, 256, 1494]             512
         LeakyReLU-3            [-1, 256, 1494]               0
            Conv1d-4            [-1, 512, 1488]         918,016
       BatchNorm1d-5            [-1, 512, 1488]           1,024
         LeakyReLU-6            [-1, 512, 1488]               0
            Conv1d-7            [-1, 512, 1484]         786,944
       BatchNorm1d-8            [-1, 512, 1484]           1,024
         LeakyReLU-9            [-1, 512, 1484]               0
           Conv1d-10            [-1, 512, 1480]         786,944
      BatchNorm1d-11            [-1, 512, 1480]           1,024
        LeakyReLU-12            [-1, 512, 1480]               0
           Conv1d-13            [-1, 512, 1476]         786,944
      Bat