In [50]:
import os
import random
import pickle
import numpy as np
import cv2
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader
import librosa
import time
import copy
import python_speech_features




class Voxceleb_head_movements_derivative(data.Dataset):
    def __init__(self,
                 dataset_dir,
                 train='train'):
        self.train = train
        self.num_frames = 64  
        self.root  = '/data2/lchen63/voxceleb/'
                 
        if self.train=='train':
            _file = open(os.path.join(dataset_dir, "train.pkl"), "rb")
            self.data = pickle.load(_file)
            _file.close()
        elif self.train =='test':
            _file = open(os.path.join(dataset_dir, "test.pkl"), "rb")
            self.data = pickle.load(_file)
            _file.close()


    def __getitem__(self, index):
            tmp = self.data[index][0].split('/')
            mean = np.load( os.path.join(self.root, 'unzip', self.data[index][0],'mean.npy' ))
            if len(self.data[index][2]) ==1:
                landmark = np.load( os.path.join(self.root, 'unzip', self.data[index][0], self.data[index][2][0] + '.npy' ))
                middle = int(self.data[index][1] / 2)
                in_max = middle - 64
                if (in_max <= 0):
                    in_max = 10
                out_max = self.data[index][1] - 64
                if out_max < middle:
                    tmp = out_max
                    out_max = middle
                    middle = tmp
                elif out_max == middle:
                    middle -= 5
                in_start  = random.choice([x for x in range(0,in_max)])
                out_start = random.choice([x for x in range(middle,out_max)])
                in_lmark = landmark[in_start:in_start+ 64] - mean
                out_lmark = landmark[out_start:out_start+ 64] - mean
            else:
                in_landmark = np.load( os.path.join(self.root, 'unzip', self.data[index][0], self.data[index][2][0] + '.npy' ))
                r  = random.choice([x for x in range(1,len(self.data[index][2]))])
                out_landmark = np.load( os.path.join(self.root, 'unzip', self.data[index][0], self.data[index][2][r] + '.npy' ))
                in_max = in_landmark.shape[0] - 64
                out_max = out_landmark.shape[0] - 64
                in_start  = random.choice([x for x in range(0,in_max)])
                out_start = random.choice([x for x in range(0,out_max)])                  
                in_lmark = in_landmark[in_start:in_start + 64] - mean
                out_lmark = out_landmark[out_start:out_start + 64] - mean
                  
            in_lmark = torch.FloatTensor(in_lmark)
            out_lmark = torch.FloatTensor(out_lmark)
            mean = torch.FloatTensor(mean)        
            return in_lmark, out_lmark, mean
               
    def __len__(self):
        
            return len(self.data)

class LRWdataset1D_single_gt(data.Dataset):
    def __init__(self,
                 dataset_dir,
                 output_shape=[128, 128],
                 train='train'):
        self.train = train
        self.dataset_dir = dataset_dir
        self.output_shape = tuple(output_shape)

        if not len(output_shape) in [2, 3]:
            raise ValueError("[*] output_shape must be [H,W] or [C,H,W]")

        if self.train=='train':
            _file = open(os.path.join(dataset_dir, "new_img_full_gt_train.pkl"), "rb")
            self.train_data = pickle.load(_file)
            _file.close()
        elif self.train =='test':
            _file = open(os.path.join(dataset_dir, "new_img_full_gt_test.pkl"), "rb")
            self.test_data = pickle.load(_file)
            _file.close()
        elif self.train =='demo' :
            _file = open(os.path.join(dataset_dir, "new_img_full_gt_demo.pkl"), "rb")
            self.demo_data = pickle.load(_file)
            _file.close()

        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])

    def __getitem__(self, index):
        # In training phase, it return real_image, wrong_image, text
        if self.train=='train':

                #load righ img
                image_path = '../dataset/regions/' +  self.train_data[index][0]
                landmark_path = '../dataset/landmark1d/' + self.train_data[index][0][:-8] + '.npy'

                landmark = np.load(landmark_path) * 5.0

                right_landmark = landmark[self.train_data[index][1] - 1]
                right_landmark = torch.FloatTensor(right_landmark.reshape(-1))
                
                im = cv2.imread(image_path)
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                im = cv2.resize(im, self.output_shape)
                im = self.transform(im)
                right_img = torch.FloatTensor(im)

                r = random.choice(
                    [x for x in range(1,30)])
                example_path =   image_path[:-8] + '_%03d.jpg'%r
                example_landmark = landmark[r - 1]
                example_landmark = torch.FloatTensor(example_landmark.reshape(-1))

                example_img = cv2.imread(example_path)
                example_img = cv2.cvtColor(example_img, cv2.COLOR_BGR2RGB)
                example_img = cv2.resize(example_img, self.output_shape)
                example_img = self.transform(example_img)

                return example_img, example_landmark, right_img,right_landmark

        elif self.train =='test':
            # try:
                #load righ img
            image_path = '../dataset/regions/' +  self.test_data[index][0]
            landmark_path = '../dataset/landmark1d/' + self.test_data[index][0][:-8] + '.npy'
            landmark = np.load(landmark_path) * 5.0
            right_landmark = landmark[self.test_data[index][1] - 1]
            
            right_landmark = torch.FloatTensor(right_landmark.reshape(-1))
            
            im = cv2.imread(image_path)
            
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            im = cv2.resize(im, self.output_shape)
            im = self.transform(im)
            right_img = torch.FloatTensor(im)
            
            example_path =   '../image/musk1_region.jpg'
            example_landmark = np.load('../image/musk1.npy')
            
            example_landmark = torch.FloatTensor(example_landmark.reshape(-1)) * 5.0

            example_img = cv2.imread(example_path)
            example_img = cv2.cvtColor(example_img, cv2.COLOR_BGR2RGB)
            example_img = cv2.resize(example_img, self.output_shape)
            example_img = self.transform(example_img)

            return example_img, example_landmark, right_img,right_landmark
                
        

In [58]:
from torch.utils.data import DataLoader

dataset = Voxceleb_head_movements_derivative( '/data2/lchen63/voxceleb/txt', 'train')
data_loader = DataLoader(dataset,
                         batch_size=2,
                         num_workers=1,                          
                         shuffle=True, drop_last=True)
for step, (in_lmark, out_lmark, mean) in enumerate(data_loader):
    if step == 1000:
        break
#     print (in_lmark.shape)
#     print ( out_lmark.shape)
    print (in_lmark)
    if in_lmark.shape != [2,64,68,3] or out_lmark.shape[0:] != [2,64,68,3]:
        print ('=============')
        print (in_lmark.shape)
        print (out_lmark.shape)
#     print(mean.shape)
    

tensor([[[[ -2.3184,  -3.1692,  -2.6494],
          [ -0.2537,  -2.6517,  -3.8363],
          [  0.8955,  -1.6766,  -4.6313],
          ...,
          [  0.5473,  -3.1542,   0.2111],
          [  0.0597,  -1.6269,  -0.1522],
          [ -1.8706,  -2.1045,  -0.5340]],

         [[ -2.3184,  -1.1692,  -2.7177],
          [ -0.2537,  -2.6517,  -3.9134],
          [  0.8955,  -1.6766,  -4.8237],
          ...,
          [ -2.4527,  -1.1542,  -0.7165],
          [ -2.9403,  -1.6269,  -1.0698],
          [ -1.8706,  -0.1045,  -1.3979]],

         [[  0.6816,   0.8308,  -1.2617],
          [  0.7463,  -0.6517,  -2.3899],
          [  1.8955,   0.3234,  -3.1877],
          ...,
          [ -1.4527,  -1.1542,  -0.3343],
          [ -1.9403,  -1.6269,  -0.4964],
          [ -3.8706,  -0.1045,  -0.9219]],

         ...,

         [[ -1.3184, -21.1692,  -2.8887],
          [ -5.2537, -19.6517,  -2.2289],
          [-10.1045, -16.6766,  -1.6845],
          ...,
          [-12.4527,  -0.1542,   3.33

RuntimeError: Traceback (most recent call last):
  File "/data/lchen63/anaconda2/lib/python2.7/site-packages/torch/utils/data/_utils/worker.py", line 99, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/data/lchen63/anaconda2/lib/python2.7/site-packages/torch/utils/data/_utils/collate.py", line 68, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/data/lchen63/anaconda2/lib/python2.7/site-packages/torch/utils/data/_utils/collate.py", line 43, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 64 and 57 in dimension 1 at /opt/conda/conda-bld/pytorch_1556653194318/work/aten/src/TH/generic/THTensor.cpp:711


In [1]:
tmp = []
for i in range(128):
    tmp.append(i)
    
    
b = tmp - tmp[3:3+64]
print (b)

TypeError: unsupported operand type(s) for -: 'list' and 'list'