## Aim is to obtain a dataset class

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import numpy as np

In [9]:
root_dir = '/home/sri/lip_reading/data/'
clean_files_path = 'clean_files.csv'

df = pd.read_csv(root_dir + clean_files_path)

In [10]:
def get_files(root_dir, file_path, is_train = True):
    '''
    Get records for either train or test
    '''
    df = pd.read_csv(root_dir + file_path)
    if is_train:
        return df[df['is_train'] == 1]
    else:
        return df[df['is_train'] == 0]

In [12]:
t_df = get_files(root_dir, clean_files_path, is_train=True)

In [32]:
a = t_df.iloc[9]

In [35]:
a

id                                                   14
mp4                train/trainval/ywgdCrcx6oM/50001.mp4
mp3                train/trainval/ywgdCrcx6oM/50001.mp3
txt                train/trainval/ywgdCrcx6oM/50001.txt
mp4_new       xtrainval/xtrainval/ywgdCrcx6oM/50001.mp4
frames                                               78
resolution                                           90
errors                                                0
no_face                                               0
no_pts                                                0
less_pts                                              0
verify_pts                                           51
bad_bounds                                            0
is_train                                              1
Name: 9, dtype: object

In [36]:
a['mp4']

'train/trainval/ywgdCrcx6oM/50001.mp4'

In [37]:
a['mp3']

'train/trainval/ywgdCrcx6oM/50001.mp3'

In [38]:
a['txt']

'train/trainval/ywgdCrcx6oM/50001.txt'

### A record describes a single datapoint. Each record consists of an mp4 file, mp3 file and the full text representation.

In [25]:
from torch.utils.data import Dataset

In [151]:
import imageio
from PIL import Image

class LRWDataset(Dataset):
    COL_MP4 = 'mp4'
    COL_MP3 = 'mp3'
    COL_TXT = 'txt'
    
    def __init__(self, root_dir, clean_files_path, is_train=True):
        self.root_dir = root_dir
        self.df = self._get_files(root_dir, clean_files_path, is_train)
        
    def __len__(self):
        return len(self.df)
        
    def __getitem__(self, idx):
        mp4, mp3, txt = self._get_records(idx)
        
        reversed_mp3 = self._get_reversed_mp3_as_tensor(self.root_dir + mp3)
        reversed_txt = self._get_reversed_txt_as_tensor(self.root_dir + txt)
        reversed_mp4 = self._get_reversed_frames_as_tensors(self.root_dir + mp4)
        
        return reversed_mp4, reversed_mp3, reversed_txt
    
    def _get_files(self, root_dir, file_path, is_train = True):
        '''
        Get records for either train or test
        '''
        df = pd.read_csv(root_dir + file_path)
        if is_train:
            return df[df['is_train'] == 1]
        else:
            return df[df['is_train'] == 0]
    
    def _get_records(self, idx):
        record = df.iloc[idx]
        mp4 = record[LRWDataset.COL_MP4]
        mp3 = record[LRWDataset.COL_MP3]
        txt = record[LRWDataset.COL_TXT]
        
        return mp4, mp3, txt
    
    def _get_reversed_mp3_as_tensor(self, mp3_path):
        return mp3_path
    
    def _get_reversed_txt_as_tensor(self, txt_path):
        return txt_path
    
    def _get_reversed_frames_as_tensors(self, mp4_file):
        reader = imageio.get_reader(mp4_file)
        reader = imageio.get_reader(mp4_file)
        imgs = np.array(reader.get_data(0))
        imgs = imgs.reshape(1, *imgs.shape)
        count = reader.count_frames()
        for i in range(1, count):
            frame = np.array(reader.get_data(i))
            frame = frame.reshape(1, *frame.shape)
            imgs = np.vstack((imgs, frame))
        frames = torch.from_numpy(imgs)
        rev_frames = torch.flip(frames, [0])
        return rev_frames

In [152]:
dataset = LRWDataset(root_dir, clean_files_path, is_train=True)

In [153]:
t_mp4 = dataset[18]

In [154]:
t_mp4[0].shape

torch.Size([38, 224, 224, 3])