In [4]:
"""
FEATURE REDUCTION TO 42 FEATURES (PRETRAINING)

This code extracts all the OpenPose keypoints from individual frame JSON files from all the input ASL video data. 
From that we extract all the required Face, Body and Hand keypoints. 

Principal component analysis is run on all the data. Top 5 principal components were calculated. The features contributing to 
each of these components were analyzed and top 42 most common features among all these components were selected. 
Out of all the features just these 42 features were selected. 

The result of this dimentionality reduction is stored in the following files:
    pretrain_key_points_42.npy
    pretrain_key_points_42.pkl
    

NOTE: After running this code, you have to run preprocess_pretraining_data.ipynb before pretraining the network.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import pickle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import collections
import os

In [1]:
def get_face_points(x,i):
    f_data = x[i]
    # get just the points around the mouth/lips
    start_pt_no = 48
    end_pt_no = 67
    f_data = f_data[start_pt_no*3:(end_pt_no+1)*3]
#     print(len(f_data)/3)
    return f_data

def get_pose_points(x,i):
    p_data = x[i]
    # get neck and hands
    start_pt_no = 0
    end_pt_no = 7
    p_data_final = p_data[start_pt_no*3:(end_pt_no+1)*3]
    
#     # get neck and cheek points
    start_pt_no = 15
    end_pt_no = 18
    p_data_final.append(p_data[start_pt_no*3:(end_pt_no+1)*3])
    p_data_final = np.hstack(p_data_final).copy()
#     print(len(p_data_final)/3)
    return p_data_final

In [2]:
def get_data(data):
    x = data['people'][0]
    key_points = []
    for i in x:
#         print(i)
        if i == 'pose_keypoints_2d':
#             print(np.array(get_pose_points(x,i)).shape)
            key_points.append(get_pose_points(x,i))
        if i == 'face_keypoints_2d':
#             print(np.array(get_face_points(x,i)).shape)
            key_points.append(get_face_points(x,i))
        if i == 'hand_left_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
        if i =='hand_right_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
    key_points = np.hstack(key_points).copy()
#     normed_keypoints = (key_points - key_points.mean(axis=0)) / key_points.std(axis=0)
#     return normed_keypoints
    key_points = key_points[cleaned_indexes]
    return key_points

In [4]:
signer_names=['SIA02','SIB01','SIC02']
story = ['story01-front_x264']
frame = '00000000000'
data = pd.read_json('1/1_000000000001_keypoints.json')
print(data)
print(os.path.getsize('1/1_000000000001_keypoints.json'))

                                              people  version
0  {'pose_keypoints_2d': [213.175, 111.481, 0.769...      1.2
3709


In [5]:
remove_indexes = [i for i in range(2,222,3)]
total_indexes = [i for i in range(0,222)]
cleaned_indexes = np.setdiff1d(total_indexes,remove_indexes)

# this is the feat_li_42 list obtained by us after running to_JSON_PCA_m.ipynb file on the actual training data.
# just copy and pase that list here
feat_li_42 = [115, 119, 137, 139, 129, 131, 121, 123, 12, 111, 113, 117, 125, 135, 147, 127, 0, 8, 108, 110, 106, 112, 114, 116, 124, 22, 100, 102, 104, 94, 96, 9, 109, 145, 30, 48, 28, 24, 26, 46, 16, 18]
print(feat_li_42)

[115, 119, 137, 139, 129, 131, 121, 123, 12, 111, 113, 117, 125, 135, 147, 127, 0, 8, 108, 110, 106, 112, 114, 116, 124, 22, 100, 102, 104, 94, 96, 9, 109, 145, 30, 48, 28, 24, 26, 46, 16, 18]


In [7]:
signer_names=[i for i in range(1,45)]
story_name = ''
frame = '00000000000'
train_key_points = []
for story in range(1,2):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = str(signer)
            trans_dir = str(signer)+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4
                elif x<10000:
                    space_counter=5
                elif x<100000:
                    space_counter=6

#                 print(i)
                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                if os.path.getsize(file_name) < 3000:
                    pass
                else:
                    train_key_points_np = np.array(get_data(data))
#                     print(train_key_points_np.shape)
    #                 train_key_points_np = train_key_points_np[:][cleaned_indexes]
    #                 print(train_key_points_np.shape)
                    train_key_points_np_5 = train_key_points_np[:][feat_li_42]
    #                 print(train_key_points_np_5.shape)
                    if sub_name not in dict_entry:
                        dict_entry[sub_name] = train_key_points_np_5
                    else:
                        dict_entry[sub_name] = np.vstack((dict_entry[sub_name],train_key_points_np_5))
#                 train_key_points.append(get_data(data))
            train_key_points.append(dict_entry)
            print(train_key_points[np.array(train_key_points).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass
    


1/
Inner file -  1/1_000000012764_keypoints.json
(12485, 42)

2/
Inner file -  2/2_000000012178_keypoints.json
(11927, 42)

3/
Inner file -  3/3_000000007813_keypoints.json
(7787, 42)

4/
Inner file -  4/4_000000006335_keypoints.json
(6197, 42)

5/
Inner file -  5/5_000000006344_keypoints.json
(6255, 42)

6/
Inner file -  6/6_000000005474_keypoints.json
(5376, 42)

7/
Inner file -  7/7_000000005684_keypoints.json
(5572, 42)

8/
Inner file -  8/8_000000005640_keypoints.json
(5589, 42)

9/
Inner file -  9/9_000000004824_keypoints.json
(4785, 42)

10/
Inner file -  10/10_000000013378_keypoints.json
(13125, 42)

11/
Inner file -  11/11_000000008461_keypoints.json
(8194, 42)

12/
Inner file -  12/12_000000007286_keypoints.json
(7007, 42)

13/
Inner file -  13/13_000000004245_keypoints.json
(4105, 42)

14/
Inner file -  14/14_000000003958_keypoints.json
(3923, 42)

15/
Inner file -  15/15_000000008405_keypoints.json
(7840, 42)

16/
Inner file -  16/16_000000012428_keypoints.json
(12357, 42)

In [9]:
print(train_key_points)

[{'1': array([[469.825, 470.293, 494.63 , ..., 170.164, 193.71 , 225.025],
       [481.866, 476.093, 502.337, ..., 201.544, 194.958, 225.043],
       [464.393, 465.462, 498.591, ..., 201.667, 193.734, 225.039],
       ...,
       [264.751, 246.217, 232.656, ..., 198.989, 197.589, 228.887],
       [263.963, 245.883, 228.706, ..., 198.44 , 197.595, 227.632],
       [260.278, 259.802, 226.489, ..., 197.984, 197.569, 228.893]])}, {'2': array([[467.072, 469.509, 471.457, ..., 174.819, 176.7  , 205.457],
       [484.502, 469.038, 487.366, ..., 163.719, 176.693, 208.049],
       [489.082, 468.813, 500.409, ..., 183.783, 175.423, 208.029],
       ...,
       [232.646, 212.269, 197.578, ..., 220.57 , 204.166, 235.501],
       [232.533, 211.402, 190.272, ..., 220.562, 204.164, 235.481],
       [230.217, 210.758, 192.226, ..., 220.17 , 204.167, 235.489]])}, {'3': array([[477.818, 475.77 , 501.377, ..., 190.168, 222.37 , 243.239],
       [478.403, 472.133, 495.646, ..., 202.002, 222.383, 239.374],

In [10]:
with open('pretrain_key_points_42.pkl','wb') as f:
    pickle.dump(train_key_points, f)

In [11]:
np.save('pretrain_key_points_42.npy', train_key_points)