In [1]:
import os
import sys
import glob

import numpy as np
import mxnet as mx
from mxnet import gluon, autograd ,nd
from mxnet.gluon import nn, rnn,utils
import mxnet.gluon.data.dataset as dataset
from mxnet.gluon.data.vision import datasets
from mxnet.gluon.data import DataLoader
import mxnet.ndarray as F
from mxnet.gluon.data.vision import transforms

from tqdm import tqdm, trange

from mxnet.gluon.data.vision.datasets import image

In [2]:
from utils.common import *

In [3]:
from utils.align import Align

In [4]:
class LipsDataset(dataset.Dataset):
    def __init__(self, root, align_root, flag=1, transform=None):
        self._root = os.path.expanduser(root)
        self._align_root = align_root
        self._flag = flag
        self._transform = transform
        self._exts = ['.jpg', '.jpeg', '.png']
        self._list_images(self._root)

    def _list_images(self, root):
        self.labels = []
        self.items = []
        
        folder_path = glob.glob(os.path.join(root, "*","*"))
    
        for folder in folder_path:
            label_index = os.path.split(folder)[-1]
            filename = glob.glob(os.path.join(folder, "*"))
            filename.sort()
            label = os.path.split(folder)[-1]
            self.items.append((filename, label))
            
    def align_generation(self,file_nm,padding=75):
        align = Align(self._align_root+file_nm+'.align')
        return nd.array(align.sentence(padding))
    
    def __getitem__(self, idx):
        img = list()
        for image_name in self.items[idx][0]:
            tmp_img = image.imread(image_name, self._flag)
            if self._transform is not None:
                tmp_img =  self._transform(tmp_img)
            img.append(tmp_img)
        img = nd.stack(*img)
        #print(self.items[idx][0][0])
        label = self.align_generation(self.items[idx][1])
        return img, label

    def __len__(self):
        return len(self.items)
    
ctx = mx.cpu()

In [5]:
class LipNet3D(nn.Block):
    def __init__(self,dr_rate, **kwargs):
        super(LipNet3D, self).__init__(**kwargs)
        
        with self.name_scope():
            self.conv1 = nn.Conv3D(32,kernel_size=(3,5,5),strides=(1,2,2),padding=(1,2,2))
            self.bn1 = nn.InstanceNorm(in_channels=32)
            self.dr1 = nn.Dropout(dr_rate)
            self.pool1 = nn.MaxPool3D((1,2,2),(1,2,2))
            
            self.conv2 = nn.Conv3D(64,kernel_size=(3,5,5),strides=(1,1,1),padding=(1,2,2))
            self.bn2 = nn.InstanceNorm(in_channels=64)
            self.dr2 = nn.Dropout(dr_rate)
            self.pool2 = nn.MaxPool3D((1,2,2),(1,2,2))
            
            self.conv3 = nn.Conv3D(96,kernel_size=(3,3,3),strides=(1,1,1),padding=(1,2,2))
            self.bn3 = nn.InstanceNorm(in_channels=96)
            self.dr3 = nn.Dropout(dr_rate)
            self.pool3 = nn.MaxPool3D((1,2,2),(1,2,2))
            
            self.gru1 = rnn.GRU(256,bidirectional=True)
            self.gru2 = rnn.GRU(256,bidirectional=True)
            
            self.dense = nn.Dense(27+1,flatten=False)
            
    def summary(self,desc,out):
        print("=======================================")
        print("{d} shape : {o}".format(d=desc,o=out.shape))
            
            
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.dr1(out)
        out = self.pool1(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu(out)
        out = self.dr2(out)
        out = self.pool2(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
        out = F.relu(out)
        out = self.dr3(out)
        out = self.pool3(out)
        
        out = nd.transpose(out,(2,0,1,3,4))
        #out = out.swapaxes(1,2)
        out = out.reshape((out.shape[0],out.shape[1],-1))
        
        print (np.shape(out))
        
        out = self.gru1(out)
        out = self.gru2(out)
        out = self.dense(out)
        out = F.log_softmax(out,axis=2)
        #out = out.swapaxes(0,1)
        out = nd.transpose(out,(1,0,2))
        
        return out

In [6]:
class LipNet2D(nn.HybridBlock):
    def __init__(self,dr_rate, num_filter=[48] * 10, **kwargs):
        super(LipNet2D, self).__init__(**kwargs)
        
        with self.name_scope():
            self.convs = gluon.nn.HybridSequential()
            self.bns = gluon.nn.HybridSequential()
            self.maxpools = gluon.nn.HybridSequential()
            self.dropouts = gluon.nn.HybridSequential()
            for f in num_filter:
                self.convs.add(nn.Conv2D(channels=f, kernel_size=(3,5), strides=(1,2), padding=(1,2)))
                self.bns.add(nn.InstanceNorm(in_channels=f))
                self.maxpools.add(nn.Dropout(0))
                self.dropouts.add(nn.MaxPool2D((1,2), (1,2)))
                  
            self.gru1 = rnn.GRU(256,bidirectional=True)
            self.gru2 = rnn.GRU(256,bidirectional=True)
            
            self.dense = nn.Dense(27+1,flatten=False)
            
    def summary(self,desc,out):
        print("=======================================")
        print("{d} shape : {o}".format(d=desc,o=out.shape))
            
            
    def hybrid_forward(self, F, x):
        out_conv = [dropouts(maxpools(bns(conv(F.slice_axis(x, axis=-1, begin=idx, end=idx+1).squeeze(axis=-1))))) for idx, (conv, bns, maxpools, dropouts) in enumerate(zip(self.convs, self.bns, self.maxpools, self.dropouts))]
        out = F.concat(*out_conv, dim=-1)
        
        out = F.transpose(out,(2,0,1,3))
        out = F.reshape(out, (75, 16,-1)) ## seq_len, batch
        out = self.gru1(out)
        out = self.gru2(out)
        out = self.dense(out)
        out = F.log_softmax(out,axis=2)
        out = F.transpose(out,(1,0,2))
        
        return out

In [8]:
def char_conv(out):
    out_conv = list()
    for i in range(out.shape[0]):
        tmp_str = ''
        for j in range(out.shape[1]):
            if int(out[i][j]) >=0:
                tmp_char = int2char(int(out[i][j]))
                if int(out[i][j]) == 27:
                    tmp_char = ''
                tmp_str = tmp_str+tmp_char
        out_conv.append(tmp_str)
    return out_conv

In [9]:
input_transform  = transforms.Compose([transforms.ToTensor()
                                    , transforms.Normalize((0.7136,0.4906,0.3283),(0.1138,0.1078,0.0917))
                                 ])

In [10]:
training_dataset = LipsDataset('./datasets/TARGET/','./datasets/align/',transform=input_transform)
train_dataloader = mx.gluon.data.DataLoader(training_dataset, batch_size=16, shuffle=True,num_workers=4)

In [11]:
net = LipNet2D(0.5) # net = LipNet3D(0.5)
net.initialize(ctx=ctx)

In [12]:
net.hybridize()

In [13]:
for input_data, label in tqdm(train_dataloader):
    input_data = nd.transpose(input_data,(0,2,1,3,4))
    input_data = input_data.copyto(ctx)
    label = label.copyto(ctx)
    with autograd.train_mode():
        pred = net(input_data)
    label = label.asnumpy()
    pred = pred.argmax(2).asnumpy()
    break

  0%|          | 0/2005 [00:00<?, ?it/s]Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 170, in worker_loop
    data_queue.put((idx, batch))
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 341, in put
    obj = _ForkingPickler.dumps(obj)
  File "/usr/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
  File "/home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 63, in reduce_ndarray
    pid, fd, shape, dtype = data._to_shared_mem()
  File "/home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/ndarray/ndarray.py", line 200, in _to_shared_mem
   

infer_shape error. Arguments:
  data: (16, 3, 75, 50, 100)





ValueError: Deferred initialization failed because shape cannot be inferred. Error in operator lipnet2d_test0_concat0: [17:15:37] src/operator/nn/concat.cc:66: Check failed: shape_assign(&(*in_shape)[i], dshape) Incompatible input shape: expected [16,48,75,0], got [16,3,75,50]

Stack trace returned 10 entries:
[bt] (0) /home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x36bac2) [0x7fcd868ebac2]
[bt] (1) /home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x36c0a8) [0x7fcd868ec0a8]
[bt] (2) /home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x5fabf8) [0x7fcd86b7abf8]
[bt] (3) /home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x2f6e64f) [0x7fcd894ee64f]
[bt] (4) /home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/libmxnet.so(+0x2f711a4) [0x7fcd894f11a4]
[bt] (5) /home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/libmxnet.so(MXSymbolInferShape+0x1539) [0x7fcd894686a9]
[bt] (6) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7fcdd8687e40]
[bt] (7) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x2eb) [0x7fcdd86878ab]
[bt] (8) /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(_ctypes_callproc+0x2cf) [0x7fcdd889b95f]
[bt] (9) /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(+0x8949) [0x7fcdd8892949]



Process Process-4:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 170, in worker_loop
    data_queue.put((idx, batch))
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 341, in put
    obj = _ForkingPickler.dumps(obj)
  File "/usr/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
  File "/home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 63, in reduce_ndarray
    pid, fd, shape, dtype = data._to_shared_mem()
  File "/home/hyungjunkim/python/venv/dl3.6/lib/python3.6/site-packages/mxnet/ndarray/ndarray.py", line 200, in _to_shared_mem
    self.handle, ctypes.byref(shared_pid), 

In [None]:
label_conv = char_conv(label)
pred_conv = char_conv(pred)

In [None]:
label_conv

In [None]:
pred_conv