In [1]:
"""
FEATURE REDUCTION TO 5 FEATURES (PRETRAINING)

This code extracts all the OpenPose keypoints from individual frame JSON files from all the input ASL video data. 
From that we extract all the required Face, Body and Hand keypoints. 

Principal component analysis is run on all the data to extract top 5 principal components. The input data is projected onto 
these 5 principal components to reduce the dimentionality from 148 to 5. 

The result of this dimentionality reduction is stored in the following files:
    pretrain_key_points_5.npy
    pretrain_key_points_5.pkl
    

NOTE: After running this code, you have to run preprocess_pretraining_data.ipynb before pretraining the network.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import pickle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import collections
import os

In [2]:
def get_face_points(x,i):
    f_data = x[i]
    # get just the points around the mouth/lips
    start_pt_no = 48
    end_pt_no = 67
    f_data = f_data[start_pt_no*3:(end_pt_no+1)*3]
#     print(len(f_data)/3)
    return f_data

def get_pose_points(x,i):
    p_data = x[i]
    # get neck and hands
    start_pt_no = 0
    end_pt_no = 7
    p_data_final = p_data[start_pt_no*3:(end_pt_no+1)*3]
    
#     # get neck and cheek points
    start_pt_no = 15
    end_pt_no = 18
    p_data_final.append(p_data[start_pt_no*3:(end_pt_no+1)*3])
    p_data_final = np.hstack(p_data_final).copy()
#     print(len(p_data_final)/3)
    return p_data_final

In [3]:
def get_data(data):
    x = data['people'][0]
    key_points = []
    for i in x:
#         print(i)
        if i == 'pose_keypoints_2d':
#             print(np.array(get_pose_points(x,i)).shape)
            key_points.append(get_pose_points(x,i))
        if i == 'face_keypoints_2d':
#             print(np.array(get_face_points(x,i)).shape)
            key_points.append(get_face_points(x,i))
        if i == 'hand_left_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
        if i =='hand_right_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
    key_points = np.hstack(key_points).copy()
#     normed_keypoints = (key_points - key_points.mean(axis=0)) / key_points.std(axis=0)
#     return normed_keypoints
    key_points = key_points[cleaned_indexes]
    return key_points

In [4]:
signer_names=['SIA02','SIB01','SIC02']
story = ['story01-front_x264']
frame = '00000000000'
data = pd.read_json('1/1_000000000001_keypoints.json')
print(data)
print(os.path.getsize('1/1_000000000001_keypoints.json'))

                                              people  version
0  {'pose_keypoints_2d': [213.175, 111.481, 0.769...      1.2
3709


In [8]:
remove_indexes = [i for i in range(2,222,3)]
total_indexes = [i for i in range(0,222)]
cleaned_indexes = np.setdiff1d(total_indexes,remove_indexes)
feat_li_42 = [115, 119, 137, 139, 129, 131, 121, 123, 12, 111, 113, 117, 125, 135, 147, 127, 0, 8, 108, 110, 106, 112, 114, 116, 124, 22, 100, 102, 104, 94, 96, 9, 109, 145, 30, 48, 28, 24, 26, 46, 16, 18]
print(cleaned_indexes)

[  0   1   3   4   6   7   9  10  12  13  15  16  18  19  21  22  24  25
  27  28  30  31  33  34  36  37  39  40  42  43  45  46  48  49  51  52
  54  55  57  58  60  61  63  64  66  67  69  70  72  73  75  76  78  79
  81  82  84  85  87  88  90  91  93  94  96  97  99 100 102 103 105 106
 108 109 111 112 114 115 117 118 120 121 123 124 126 127 129 130 132 133
 135 136 138 139 141 142 144 145 147 148 150 151 153 154 156 157 159 160
 162 163 165 166 168 169 171 172 174 175 177 178 180 181 183 184 186 187
 189 190 192 193 195 196 198 199 201 202 204 205 207 208 210 211 213 214
 216 217 219 220]


In [4]:
space_counter = 1
all_key_points = []
for signer in signer_names:
    for i in range(0,900):
        x = int(i/10)
        if x<1:
            space_counter=1
        elif x<10:
            space_counter=2
        elif x<100:
            space_counter=3

        fname=frame[-space_counter::-1]+str(i)
        file_name = signer+'-'+story[0]+'/'+signer+'-'+story[0]+'_'+fname+'_keypoints.json'
        data = pd.read_json(file_name)
        all_key_points.append(get_data(data))
#         print(len(all_key_points))

print(np.array(all_key_points).shape)

NameError: name 'signer_names' is not defined

In [16]:
signer_names=[i for i in range(1,45)]
story_name = ''
frame = '00000000000'
to_PCA_data = []
for story in range(1,2):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = str(signer)
            trans_dir = str(signer)+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4
                elif x<10000:
                    space_counter=5
                elif x<100000:
                    space_counter=6

#                 print(i)
                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                if os.path.getsize(file_name) < 3000:
                    pass
                else:
                    to_PCA_data_np = np.array(get_data(data))
#                     print(to_PCA_data_np.shape)
#                     to_PCA_data_np = to_PCA_data_np[:][cleaned_indexes]
#                     print(to_PCA_data_np.shape)
#                     train_key_points_np_5 = train_key_points_np[:][feat_li_42]
    #                 print(train_key_points_np_5.shape)
                    if sub_name not in dict_entry:
                        dict_entry[sub_name] = to_PCA_data_np
                    else:
                        dict_entry[sub_name] = np.vstack((dict_entry[sub_name],to_PCA_data_np))
#                 train_key_points.append(get_data(data))
            to_PCA_data.append(dict_entry)
            print(to_PCA_data[np.array(to_PCA_data).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass
    


1/
Inner file -  1/1_000000012764_keypoints.json
(12485, 148)

2/
Inner file -  2/2_000000012178_keypoints.json
(11927, 148)

3/
Inner file -  3/3_000000007813_keypoints.json
(7787, 148)

4/
Inner file -  4/4_000000006335_keypoints.json
(6197, 148)

5/
Inner file -  5/5_000000006344_keypoints.json
(6255, 148)

6/
Inner file -  6/6_000000005474_keypoints.json
(5376, 148)

7/
Inner file -  7/7_000000005684_keypoints.json
(5572, 148)

8/
Inner file -  8/8_000000005640_keypoints.json
(5589, 148)

9/
Inner file -  9/9_000000004824_keypoints.json
(4785, 148)

10/
Inner file -  10/10_000000013378_keypoints.json
(13125, 148)

11/
Inner file -  11/11_000000008461_keypoints.json
(8194, 148)

12/
Inner file -  12/12_000000007286_keypoints.json
(7007, 148)

13/
Inner file -  13/13_000000004245_keypoints.json
(4105, 148)

14/
Inner file -  14/14_000000003958_keypoints.json
(3923, 148)

15/
Inner file -  15/15_000000008405_keypoints.json
(7840, 148)

16/
Inner file -  16/16_000000012428_keypoints.j

KeyboardInterrupt: 

In [24]:
count = 0
PCA_data = []
for i in to_PCA_data:
    for key in i:
        if count == 0:
            PCA_data = i[key]
            count += 1
        else:
            PCA_data = np.vstack((PCA_data,i[key]))
print(PCA_data.shape)

(260337, 148)


In [26]:
pca = PCA(n_components=5)
pca.fit(PCA_data)
print(pca.explained_variance_ratio_)  

[0.52869415 0.20674262 0.11957657 0.06587834 0.02296087]


In [29]:
print(pca.components_.T.shape)

(148, 5)


In [32]:
signer_names=[i for i in range(1,45)]
story_name = ''
frame = '00000000000'
train_key_points = []
for story in range(1,2):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = str(signer)
            trans_dir = str(signer)+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4
                elif x<10000:
                    space_counter=5
                elif x<100000:
                    space_counter=6

#                 print(i)
                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                if os.path.getsize(file_name) < 3000:
                    pass
                else:
                    train_key_points_np = np.array(get_data(data))
#                     print(train_key_points_np.shape)
    #                 train_key_points_np = train_key_points_np[:][cleaned_indexes]
    #                 print(train_key_points_np.shape)
                    train_key_points_np_5 = np.matmul(train_key_points_np, pca.components_.T)
#                     print(train_key_points_np_5.shape)
                    if sub_name not in dict_entry:
                        dict_entry[sub_name] = train_key_points_np_5
                    else:
                        dict_entry[sub_name] = np.vstack((dict_entry[sub_name],train_key_points_np_5))
#                 train_key_points.append(get_data(data))
            train_key_points.append(dict_entry)
            print(train_key_points[np.array(train_key_points).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass
    


1/
Inner file -  1/1_000000012764_keypoints.json
(12485, 5)

2/
Inner file -  2/2_000000012178_keypoints.json
(11927, 5)

3/
Inner file -  3/3_000000007813_keypoints.json
(7787, 5)

4/
Inner file -  4/4_000000006335_keypoints.json
(6197, 5)

5/
Inner file -  5/5_000000006344_keypoints.json
(6255, 5)

6/
Inner file -  6/6_000000005474_keypoints.json
(5376, 5)

7/
Inner file -  7/7_000000005684_keypoints.json
(5572, 5)

8/
Inner file -  8/8_000000005640_keypoints.json
(5589, 5)

9/
Inner file -  9/9_000000004824_keypoints.json
(4785, 5)

10/
Inner file -  10/10_000000013378_keypoints.json
(13125, 5)

11/
Inner file -  11/11_000000008461_keypoints.json
(8194, 5)

12/
Inner file -  12/12_000000007286_keypoints.json
(7007, 5)

13/
Inner file -  13/13_000000004245_keypoints.json
(4105, 5)

14/
Inner file -  14/14_000000003958_keypoints.json
(3923, 5)

15/
Inner file -  15/15_000000008405_keypoints.json
(7840, 5)

16/
Inner file -  16/16_000000012428_keypoints.json
(12357, 5)

17/
Inner file

In [33]:
print(len(train_key_points))

44


In [34]:
print(train_key_points)

[{'1': array([[-2949.92304661,  1769.25171967,   845.59396398,   541.55254938,
        -1068.13247124],
       [-2974.61584368,  1831.16806754,   860.33254266,   540.36910146,
        -1185.57884609],
       [ -288.47848394,  2119.15329914,   869.50997017,   527.20847587,
        -1208.36909069],
       ...,
       [-2691.86224579,   807.01264199,   557.83583801,  1014.63070889,
        -1119.90381992],
       [-2697.98213807,   800.14957182,   527.00340152,   994.01825185,
        -1124.6914811 ],
       [-2708.04958718,   802.36470057,   543.3654821 ,  1005.00360051,
        -1124.75893955]])}, {'2': array([[ -281.38287432,  2071.58739164,   854.04556731,   479.1870902 ,
        -1087.59797634],
       [-2896.80330369,  1806.46071132,   816.83481948,   482.32509834,
        -1016.64276588],
       [-2858.07930907,  1796.42592766,   876.21518556,   476.85202023,
        -1112.89862305],
       ...,
       [-2757.49679441,   637.06097562,   571.97663786,   937.92611938,
        -1271.9

In [35]:
with open('pretrain_key_points_5.pkl','wb') as f:
    pickle.dump(train_key_points, f)

In [36]:
np.save('pretrain_key_points_5.npy', train_key_points)