In [1]:
import numpy as np

In [2]:
# Dataset directions
Dataset_dir = r'D:\Animation\HWs\02\Dataset.npy'
Dataset_max_len_Padded_dir = r'D:\Animation\HWs\02\Dataset_max_len_Padded.npy'
Dataset_max_occ_len_Padded_dir = r'D:\Animation\HWs\02\Dataset_max_occ_len_Padded.npy'

In [3]:
# Loading dataset
Dataset = np.load(Dataset_dir)
print('Number of rows in the dataset: %i' % (Dataset.shape[0]))
print('Number of columns in the dataset: %i' % (Dataset.shape[1]))

Number of rows in the dataset: 4016
Number of columns in the dataset: 48


In [4]:
# Normalization
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
Dataset[:, 3:] = scaler.fit_transform(Dataset[:, 3:])

In [5]:
# # Normalization
# from sklearn.preprocessing import StandardScaler

# sc = StandardScaler()
# Dataset[:, 3:] = sc.fit_transform(Dataset[:, 3:])

In [6]:
# Set videos, actions and actors labels from zero
for index in range(Dataset.shape[0]):
    Dataset[index, 0] -= 1
    Dataset[index, 1] -= 1
    Dataset[index, 2] -= 1

In [7]:
# Global variables
videos_len = Dataset[-1, 0].astype(int) + 1

In [8]:
# Determine maximum length and maximum occured length
num_frames = np.bincount(Dataset[:, 0].astype(int))
max_len = np.max(num_frames)
max_occur_len = np.argmax(np.bincount(num_frames))

In [9]:
# Sequence-Padding to reach all videos to length of max length
def max_length_pad_sequence(dataset):
    ds = np.copy(dataset)
    for vid_index in range(videos_len):
        start_index = vid_index * max_len
        pad_length = max_len - num_frames[vid_index]
        pad_mtx = np.zeros((pad_length, ds.shape[1]))
        pad_mtx[:, 0:3] = ds[start_index, 0:3]
        ds = np.insert(ds, start_index + num_frames[vid_index], pad_mtx, axis=0)
    
    return ds

In [10]:
# Sequence-Padding to reach all videos to max occured length
def max_occur_pad_sequence(dataset):
    ds = np.copy(dataset)
    for vid_index in range(videos_len):
        end_index = (vid_index + 1) * max_occur_len
        start_index = vid_index * max_occur_len
        pad_length = max_occur_len - num_frames[vid_index]
        if pad_length < 0:
            for i in range(np.abs(pad_length)):
                ds = np.delete(ds, end_index, axis=0)
        else:
            pad_mtx = np.zeros((pad_length, ds.shape[1]))
            pad_mtx[:, 0:3] = ds[start_index, 0:3]
            ds = np.insert(ds, start_index + num_frames[vid_index], pad_mtx, axis=0)
                
    return ds          

In [11]:
# Generate padded datasets, in this datasets all sample videos have same length
Dataset_max_len_pad_seq = max_length_pad_sequence(Dataset)
Dataset_max_occ_pad_seq = max_occur_pad_sequence(Dataset)

In [12]:
# Dimensions of datasets after padding
print('Dataset from max length sequence padding:')
print('Number of rows in the dataset: %i' % (Dataset_max_len_pad_seq.shape[0]))
print('Number of columns in the dataset: %i' % (Dataset_max_len_pad_seq.shape[1]))

Dataset from max length sequence padding:
Number of rows in the dataset: 7525
Number of columns in the dataset: 48


In [13]:
print('Dataset from max occurance length sequence padding:')
print('Number of rows in the dataset: %i' % (Dataset_max_occ_pad_seq.shape[0]))
print('Number of columns in the dataset: %i' % (Dataset_max_occ_pad_seq.shape[1]))

Dataset from max occurance length sequence padding:
Number of rows in the dataset: 3440
Number of columns in the dataset: 48


In [14]:
# Reshape datasets to form (sample,timestep,features)
Dataset_max_len_pad_seq = Dataset_max_len_pad_seq.reshape((videos_len, max_len, Dataset_max_len_pad_seq.shape[1]))
print('New shape of dataset: ' + str(Dataset_max_len_pad_seq.shape))
Dataset_max_occ_pad_seq = Dataset_max_occ_pad_seq.reshape((videos_len, max_occur_len, Dataset_max_occ_pad_seq.shape[1]))
print('New shape of dataset: ' + str(Dataset_max_occ_pad_seq.shape))

New shape of dataset: (215, 35, 48)
New shape of dataset: (215, 16, 48)


In [15]:
# Save new datasets to .npy file
np.save(Dataset_max_len_Padded_dir, Dataset_max_len_pad_seq)
np.save(Dataset_max_occ_len_Padded_dir, Dataset_max_occ_pad_seq)