In [5]:
"""
FEATURE REDUCTION TO 5 FEATURES

This code extracts all the OpenPose keypoints from individual frame JSON files from all the input ASL video data. 
From that we extract all the required Face, Body and Hand keypoints. 

Principal component analysis is run on all the data to extract top 5 principal components. The input data is projected onto 
these 5 principal components to reduce the dimentionality from 148 to 5. 

The result of this dimentionality reduction is stored in the following files:
    key_points_5.npy
    key_points_5.pkl
    

NOTE: This code doesn't map the OpenPose data to corresponding ASL English sentences. 
That is done in different ipynb file:
    For Frame to True Translation mapping, Use - gloss_frame_connection.ipynb
    For Frame to Gloss mapping, Use - gloss_target_conversion.ipynb
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import pickle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import collections

In [6]:
def get_face_points(x,i):
    f_data = x[i]
    # get just the points around the mouth/lips
    start_pt_no = 48
    end_pt_no = 67
    f_data = f_data[start_pt_no*3:(end_pt_no+1)*3]
#     print(len(f_data)/3)
    return f_data

def get_pose_points(x,i):
    p_data = x[i]
    # get neck and hands
    start_pt_no = 0
    end_pt_no = 7
    p_data_final = p_data[start_pt_no*3:(end_pt_no+1)*3]
    
#     # get neck and cheek points
    start_pt_no = 15
    end_pt_no = 18
    p_data_final.append(p_data[start_pt_no*3:(end_pt_no+1)*3])
    p_data_final = np.hstack(p_data_final).copy()
#     print(len(p_data_final)/3)
    return p_data_final

In [7]:
def get_data(data):
    x = data['people'][0]
    key_points = []
    for i in x:
#         print(i)
        if i == 'pose_keypoints_2d':
#             print(np.array(get_pose_points(x,i)).shape)
            key_points.append(get_pose_points(x,i))
        if i == 'face_keypoints_2d':
#             print(np.array(get_face_points(x,i)).shape)
            key_points.append(get_face_points(x,i))
        if i == 'hand_left_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
        if i =='hand_right_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
    key_points = np.hstack(key_points).copy()
#     normed_keypoints = (key_points - key_points.mean(axis=0)) / key_points.std(axis=0)
#     return normed_keypoints
    key_points = key_points[cleaned_indexes]
    return key_points

In [4]:
signer_names=['SIA02','SIB01','SIC02']
story = ['story01-front_x264']
frame = '00000000000'
data = pd.read_json(signer_names[0]+'-'+story[0]+'/SIA02-story01-front_x264_000000000000_keypoints.json')

In [4]:
remove_indexes = [i for i in range(2,222,3)]
total_indexes = [i for i in range(0,222)]
cleaned_indexes = np.setdiff1d(total_indexes,remove_indexes)

In [8]:
signer_names=['SIA02','SIB01','SIC02']
story_name = 'story'
frame = '00000000000'
train_key_points = []
for story in range(1,80):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = signer+'-'+s_no+'-front_x264'
            trans_dir = sub_name+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4

                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                train_key_points_np = np.array(get_data(data))
#                 print(train_key_points_np.shape)
#                 train_key_points_np = train_key_points_np[:][cleaned_indexes]
#                 print(train_key_points_np.shape)
                train_key_points_np_5 = train_key_points_np
                if sub_name not in dict_entry:
                    dict_entry[sub_name] = train_key_points_np_5
                else:
                    dict_entry[sub_name] = np.vstack((dict_entry[sub_name],train_key_points_np_5))
#                 train_key_points.append(get_data(data))
            train_key_points.append(dict_entry)
            print(train_key_points[np.array(train_key_points).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass


SIA02-story01-front_x264/
Inner file -  SIA02-story01-front_x264/SIA02-story01-front_x264_000000001145_keypoints.json
(1145, 148)

SIB01-story01-front_x264/
Inner file -  SIB01-story01-front_x264/SIB01-story01-front_x264_000000000947_keypoints.json
(947, 148)

SIC02-story01-front_x264/
Inner file -  SIC02-story01-front_x264/SIC02-story01-front_x264_000000007502_keypoints.json
(7502, 148)

SIA02-story02-front_x264/
Inner file -  SIA02-story02-front_x264/SIA02-story02-front_x264_000000000704_keypoints.json
(704, 148)

SIB01-story02-front_x264/
Inner file -  SIB01-story02-front_x264/SIB01-story02-front_x264_000000000664_keypoints.json
(664, 148)

SIC02-story02-front_x264/
Inner file -  SIC02-story02-front_x264/SIC02-story02-front_x264_000000001692_keypoints.json
(1692, 148)

SIA02-story03-front_x264/
Inner file -  SIA02-story03-front_x264/SIA02-story03-front_x264_000000000000_keypoints.json
'SIA02-story03-front_x264'

SIA02-story04-front_x264/
Inner file -  SIA02-story04-front_x264/SIA02

In [9]:
print(train_key_points)

[{'SIA02-story01-front_x264': array([[307.166, 127.119, 305.919, ..., 457.3  , 182.72 , 454.907],
       [307.17 , 128.355, 305.92 , ..., 456.396, 183.223, 454.961],
       [307.172, 127.12 , 305.911, ..., 461.22 , 179.836, 454.883],
       ...,
       [315.059, 120.604, 313.725, ..., 433.73 , 148.794, 436.103],
       [315.057, 120.604, 313.752, ..., 467.244, 157.31 , 462.692],
       [315.045, 120.603, 313.748, ..., 436.708, 151.836, 438.998]])}, {'SIB01-story01-front_x264': array([[295.458, 120.535, 304.59 , ..., 464.929, 218.591, 465.712],
       [294.161, 120.626, 304.606, ..., 405.032, 184.627, 405.032],
       [294.152, 120.587, 304.604, ..., 464.873, 221.3  , 469.178],
       ...,
       [294.134, 127.114, 303.285, ..., 460.952, 212.616, 471.039],
       [294.135, 128.368, 303.305, ..., 460.809, 212.951, 470.508],
       [292.875, 128.373, 303.303, ..., 462.108, 212.977, 471.42 ]])}, {'SIC02-story01-front_x264': array([[308.507, 119.314, 312.432, ..., 408.831, 211.825, 411.632]

In [10]:
count = 0
t_key_points = []
for i in train_key_points:
    for key in i:
        if count == 0:
            t_key_points = i[key]
            count += 1
        else:
            t_key_points = np.vstack((t_key_points, i[key]))

t_key_points = np.array(t_key_points)
print(t_key_points.shape)

(23574, 148)


In [11]:
pca = PCA(n_components=5)
pca.fit(t_key_points)
print(pca.explained_variance_ratio_)  

[0.48664617 0.21471096 0.15142894 0.08328793 0.01246498]


In [12]:
print(pca.components_.T.shape)

(148, 5)


In [30]:
key_point_data = np.array(all_key_points)

In [18]:
signer_names=['SIA02','SIB01','SIC02']
story_name = 'story'
frame = '00000000000'
train_key_points = []
for story in range(1,80):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = signer+'-'+s_no+'-front_x264'
            trans_dir = sub_name+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4

                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                train_key_points_np = np.array(get_data(data))
#                 print(train_key_points_np.shape)
#                 train_key_points_np = train_key_points_np[:][cleaned_indexes]
#                 print(train_key_points_np.shape)
                train_key_points_np_5 = np.matmul(train_key_points_np, pca.components_.T)
#                 print(train_key_points_np_5.shape)
                if sub_name not in dict_entry:
                    dict_entry[sub_name] = train_key_points_np_5
                else:
                    dict_entry[sub_name] = np.vstack((dict_entry[sub_name],train_key_points_np_5))
#                 train_key_points.append(get_data(data))
            train_key_points.append(dict_entry)
            print(train_key_points[np.array(train_key_points).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass
    


SIA02-story01-front_x264/
Inner file -  SIA02-story01-front_x264/SIA02-story01-front_x264_000000001145_keypoints.json
(1145, 5)

SIB01-story01-front_x264/
Inner file -  SIB01-story01-front_x264/SIB01-story01-front_x264_000000000947_keypoints.json
(947, 5)

SIC02-story01-front_x264/
Inner file -  SIC02-story01-front_x264/SIC02-story01-front_x264_000000007502_keypoints.json
(7502, 5)

SIA02-story02-front_x264/
Inner file -  SIA02-story02-front_x264/SIA02-story02-front_x264_000000000704_keypoints.json
(704, 5)

SIB01-story02-front_x264/
Inner file -  SIB01-story02-front_x264/SIB01-story02-front_x264_000000000664_keypoints.json
(664, 5)

SIC02-story02-front_x264/
Inner file -  SIC02-story02-front_x264/SIC02-story02-front_x264_000000001692_keypoints.json
(1692, 5)

SIA02-story03-front_x264/
Inner file -  SIA02-story03-front_x264/SIA02-story03-front_x264_000000000000_keypoints.json
'SIA02-story03-front_x264'

SIA02-story04-front_x264/
Inner file -  SIA02-story04-front_x264/SIA02-story04-fro

In [19]:
print(len(train_key_points))

8


In [29]:
with open('key_points_5.pkl','wb') as f:
    pickle.dump(train_key_points, f)

In [30]:
np.save('key_points_5.npy', train_key_points)

In [48]:
loaded_data = np.load('key_points_5.npy')
for key in loaded_data:
    for k in key:
        print(key[k][1:20].shape)
        break
    break

(19, 5)


In [12]:
fl_name = 'SIA02-story01-front_x264'+'\SIA02-story01-front_x264_000000000000_keypoints.json'
fl_data = pd.read_json(fl_name)
print(get_data(fl_data).shape)

pose_keypoints_2d
(36,)
face_keypoints_2d
(60,)
hand_left_keypoints_2d
(63,)
hand_right_keypoints_2d
(63,)
pose_keypoints_3d
face_keypoints_3d
hand_left_keypoints_3d
hand_right_keypoints_3d
(148,)
