In [31]:
"""
FEATURE REDUCTION TO 42 FEATURES

This code extracts all the OpenPose keypoints from individual frame JSON files from all the input ASL video data. 
From that we extract all the required Face, Body and Hand keypoints. 

Principal component analysis is run on all the data. Top 5 principal components were calculated. The features contributing to 
each of these components were analyzed and top 42 most common features among all these components were selected. 
Out of all the features just these 42 features were selected. 

The result of this dimentionality reduction is stored in the following files:
    key_points_42.npy
    key_points_42.pkl
    

NOTE: This code doesn't map the OpenPose data to corresponding ASL English sentences. 
That is done in different ipynb file:
    For Frame to True Translation mapping, Use - gloss_frame_connection.ipynb
    For Frame to Gloss mapping, Use - gloss_target_conversion.ipynb
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import pickle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import collections

In [12]:
def get_face_points(x,i):
    f_data = x[i]
    # get just the points around the mouth/lips
    start_pt_no = 48
    end_pt_no = 67
    f_data = f_data[start_pt_no*3:(end_pt_no+1)*3]
#     print(len(f_data)/3)
    return f_data

def get_pose_points(x,i):
    p_data = x[i]
    # get neck and hands
    start_pt_no = 0
    end_pt_no = 7
    p_data_final = p_data[start_pt_no*3:(end_pt_no+1)*3]
    
#     # get neck and cheek points
    start_pt_no = 15
    end_pt_no = 18
    p_data_final.append(p_data[start_pt_no*3:(end_pt_no+1)*3])
    p_data_final = np.hstack(p_data_final).copy()
#     print(len(p_data_final)/3)
    return p_data_final

In [13]:
def get_data(data):
    x = data['people'][0]
    key_points = []
    for i in x:
#         print(i)
        if i == 'pose_keypoints_2d':
#             print(np.array(get_pose_points(x,i)).shape)
            key_points.append(get_pose_points(x,i))
        if i == 'face_keypoints_2d':
#             print(np.array(get_face_points(x,i)).shape)
            key_points.append(get_face_points(x,i))
        if i == 'hand_left_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
        if i =='hand_right_keypoints_2d':
#             print(len(x[i])/3)
#             print(np.array(x[i]).shape)
            key_points.append(x[i])
    key_points = np.hstack(key_points).copy()
#     normed_keypoints = (key_points - key_points.mean(axis=0)) / key_points.std(axis=0)
#     return normed_keypoints
    key_points = key_points[cleaned_indexes]
    return key_points

In [14]:
signer_names=['SIA02','SIB01','SIC02']
story = ['story01-front_x264']
frame = '00000000000'
data = pd.read_json(signer_names[0]+'-'+story[0]+'/SIA02-story01-front_x264_000000000000_keypoints.json')

In [15]:
remove_indexes = [i for i in range(2,222,3)]
total_indexes = [i for i in range(0,222)]
cleaned_indexes = np.setdiff1d(total_indexes,remove_indexes)

In [18]:
signer_names=['SIA02','SIB01','SIC02']
story_name = 'story'
frame = '00000000000'
train_key_points = []
for story in range(1,80):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = signer+'-'+s_no+'-front_x264'
            trans_dir = sub_name+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4

                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                train_key_points_np = np.array(get_data(data))
#                 print(train_key_points_np.shape)
#                 train_key_points_np = train_key_points_np[:][cleaned_indexes]
#                 print(train_key_points_np.shape)
                train_key_points_np_5 = train_key_points_np
                if sub_name not in dict_entry:
                    dict_entry[sub_name] = train_key_points_np_5
                else:
                    dict_entry[sub_name] = np.vstack((dict_entry[sub_name],train_key_points_np_5))
#                 train_key_points.append(get_data(data))
            train_key_points.append(dict_entry)
            print(train_key_points[np.array(train_key_points).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass


SIA02-story01-front_x264/
Inner file -  SIA02-story01-front_x264/SIA02-story01-front_x264_000000001145_keypoints.json
(1145, 148)

SIB01-story01-front_x264/
Inner file -  SIB01-story01-front_x264/SIB01-story01-front_x264_000000000947_keypoints.json
(947, 148)

SIC02-story01-front_x264/
Inner file -  SIC02-story01-front_x264/SIC02-story01-front_x264_000000007502_keypoints.json
(7502, 148)

SIA02-story02-front_x264/
Inner file -  SIA02-story02-front_x264/SIA02-story02-front_x264_000000000704_keypoints.json
(704, 148)

SIB01-story02-front_x264/
Inner file -  SIB01-story02-front_x264/SIB01-story02-front_x264_000000000664_keypoints.json
(664, 148)

SIC02-story02-front_x264/
Inner file -  SIC02-story02-front_x264/SIC02-story02-front_x264_000000001692_keypoints.json
(1692, 148)

SIA02-story03-front_x264/
Inner file -  SIA02-story03-front_x264/SIA02-story03-front_x264_000000000000_keypoints.json
'SIA02-story03-front_x264'

SIA02-story04-front_x264/
Inner file -  SIA02-story04-front_x264/SIA02

In [23]:
count = 0
t_key_points = []
for i in train_key_points:
    for key in i:
        if count == 0:
            t_key_points = i[key]
            count += 1
        else:
            t_key_points = np.vstack((t_key_points, i[key]))

all_key_points = np.array(t_key_points)
print(t_key_points.shape)

(12654, 148)


In [24]:
print(cleaned_indexes)

[  0   1   3   4   6   7   9  10  12  13  15  16  18  19  21  22  24  25
  27  28  30  31  33  34  36  37  39  40  42  43  45  46  48  49  51  52
  54  55  57  58  60  61  63  64  66  67  69  70  72  73  75  76  78  79
  81  82  84  85  87  88  90  91  93  94  96  97  99 100 102 103 105 106
 108 109 111 112 114 115 117 118 120 121 123 124 126 127 129 130 132 133
 135 136 138 139 141 142 144 145 147 148 150 151 153 154 156 157 159 160
 162 163 165 166 168 169 171 172 174 175 177 178 180 181 183 184 186 187
 189 190 192 193 195 196 198 199 201 202 204 205 207 208 210 211 213 214
 216 217 219 220]


In [25]:
pca_trafo = PCA()
pca_data = pca_trafo.fit_transform(np.array(all_key_points))
# print(pca_trafo.explained_variance_ratio_)
temp = []
top_feat_PC = []
for i in pca_trafo.components_[0:5]:
#     print(np.sort(i))
    print(np.argsort(i))
    top_feat_PC.append(np.argsort(i))
#     print(np.sort(i))
# print(list(np.round(temp,4)))
# sns.heatmap(np.log(pca_trafo.inverse_transform(np.eye(np.array(all_key_points).shape[0]))), cmap="hot", cbar=False)

[ 81  89  79  87  97  95  77  85 105  93 103  73 101  71  83  75  91  99
  69  67  65  15 123 121 129 131 127 137 119 139 135 145 147 143 125 115
 133 117 141 113 111 109 107   9 104 102 100  96  94  92  98  88  86  84
  90  80  78  82  76  64  74  72  70  14  66  68   7  13   5   3  11  43
  45  41  47  61  53   1  63  51  31  59  29  55  39  49  33  27  25  17
  57   6  35  37  19  21  23  10   2  36  56  38  22  40  34  58  54  20
  32  42  60  52   4  30  28  62  50  44  26   0  46  16  48  24  18  12
   8 108 110 114 112 106 116 118 124 120 122 132 130 126 140 128 134 138
 136 142 144 146]
[ 97 105  89  95 103  87  81 101  93  85  79  99  91  77  83  75  73  71
  69  65  67  15  64  98  14 100  90  66 102  82 104  92  74  94  84  68
  96  76  86  78  88  70  80  72 144 146 142 140 136 134 138 132   4  22
  13 128  12 126 130 124   2  10 106 122  36  34  54  18  32  56  38  58
  16  28  30 120  20  50  60  48  52  40  62  46  26  24  44  42   0 116
 118  23   8 114 112 108  11  19 

In [26]:
print(top_feat_PC)

[array([ 81,  89,  79,  87,  97,  95,  77,  85, 105,  93, 103,  73, 101,
        71,  83,  75,  91,  99,  69,  67,  65,  15, 123, 121, 129, 131,
       127, 137, 119, 139, 135, 145, 147, 143, 125, 115, 133, 117, 141,
       113, 111, 109, 107,   9, 104, 102, 100,  96,  94,  92,  98,  88,
        86,  84,  90,  80,  78,  82,  76,  64,  74,  72,  70,  14,  66,
        68,   7,  13,   5,   3,  11,  43,  45,  41,  47,  61,  53,   1,
        63,  51,  31,  59,  29,  55,  39,  49,  33,  27,  25,  17,  57,
         6,  35,  37,  19,  21,  23,  10,   2,  36,  56,  38,  22,  40,
        34,  58,  54,  20,  32,  42,  60,  52,   4,  30,  28,  62,  50,
        44,  26,   0,  46,  16,  48,  24,  18,  12,   8, 108, 110, 114,
       112, 106, 116, 118, 124, 120, 122, 132, 130, 126, 140, 128, 134,
       138, 136, 142, 144, 146], dtype=int64), array([ 97, 105,  89,  95, 103,  87,  81, 101,  93,  85,  79,  99,  91,
        77,  83,  75,  73,  71,  69,  65,  67,  15,  64,  98,  14, 100,
        90,  66,

In [28]:
top_features = {}
for li in top_feat_PC:
    li = li[-1:-41:-1]
    for feat in li:
        if feat not in top_features:
            top_features[feat] = 1
        else:
            top_features[feat] +=1

sorted_top_feat = sorted(top_features.items(), key=lambda x: x[1])[:-43:-1]
# print(sorted_top_feat)

feat_li_42 = []
for i in sorted_top_feat:
    feat_li_42.append(i[0])
    
print(feat_li_42)

[21, 25, 47, 3, 5, 111, 113, 141, 115, 125, 133, 143, 119, 12, 17, 64, 14, 66, 98, 68, 90, 100, 82, 74, 70, 92, 33, 59, 53, 51, 31, 27, 63, 61, 49, 43, 45, 6, 7, 9, 107, 109]


In [29]:
signer_names=['SIA02','SIB01','SIC02']
story_name = 'story'
frame = '00000000000'
train_key_points = []
for story in range(1,80):
    if story<10:
        s_no = '0'+str(story)
    else:
        s_no = str(story)
    s_no = story_name+s_no
    try:
        for signer in signer_names:
            dict_entry = {}
            sub_name = signer+'-'+s_no+'-front_x264'
            trans_dir = sub_name+'/'
            print()
            print(trans_dir)
            i = 0
            stop = False
            while not stop:
                x = int(i/10)
                if x<1:
                    space_counter=1
                elif x<10:
                    space_counter=2
                elif x<100:
                    space_counter=3
                elif x<1000:
                    space_counter=4

                fname=frame[-space_counter::-1]+str(i)
                file_name = trans_dir+sub_name+'_'+fname+'_keypoints.json'
#                 print(file_name)
                i+=1
                try:
                    data = pd.read_json(file_name)
                except:
                    stop = True
                    print("Inner file - ",file_name)
                    break
                train_key_points_np = np.array(get_data(data))
#                 print(train_key_points_np.shape)
#                 train_key_points_np = train_key_points_np[:][cleaned_indexes]
#                 print(train_key_points_np.shape)
                train_key_points_np_5 = train_key_points_np[:][feat_li_42]
#                 print(train_key_points_np_5.shape)
                if sub_name not in dict_entry:
                    dict_entry[sub_name] = train_key_points_np_5
                else:
                    dict_entry[sub_name] = np.vstack((dict_entry[sub_name],train_key_points_np_5))
#                 train_key_points.append(get_data(data))
            train_key_points.append(dict_entry)
            print(train_key_points[np.array(train_key_points).shape[0]-1][sub_name].shape)
    except Exception as e:
        print(e)
        pass
    


SIA02-story01-front_x264/
Inner file -  SIA02-story01-front_x264/SIA02-story01-front_x264_000000001145_keypoints.json
(1145, 42)

SIB01-story01-front_x264/
Inner file -  SIB01-story01-front_x264/SIB01-story01-front_x264_000000000947_keypoints.json
(947, 42)

SIC02-story01-front_x264/
Inner file -  SIC02-story01-front_x264/SIC02-story01-front_x264_000000007502_keypoints.json
(7502, 42)

SIA02-story02-front_x264/
Inner file -  SIA02-story02-front_x264/SIA02-story02-front_x264_000000000704_keypoints.json
(704, 42)

SIB01-story02-front_x264/
Inner file -  SIB01-story02-front_x264/SIB01-story02-front_x264_000000000664_keypoints.json
(664, 42)

SIC02-story02-front_x264/
Inner file -  SIC02-story02-front_x264/SIC02-story02-front_x264_000000001692_keypoints.json
(1692, 42)

SIA02-story03-front_x264/
Inner file -  SIA02-story03-front_x264/SIA02-story03-front_x264_000000000000_keypoints.json
'SIA02-story03-front_x264'

SIA02-story04-front_x264/
Inner file -  SIA02-story04-front_x264/SIA02-story

In [30]:
print(train_key_points)

[{'SIA02-story01-front_x264': array([[130.96 , 147.318, 149.652, ..., 412.903, 440.072, 425.715],
       [130.962, 147.426, 149.729, ..., 412.944, 437.256, 425.772],
       [130.975, 147.299, 149.629, ..., 412.93 , 373.465, 426.118],
       ...,
       [128.383, 143.416, 146.937, ..., 407.709, 407.149, 442.274],
       [128.381, 143.422, 146.943, ..., 407.717, 406.704, 442.664],
       [128.389, 143.758, 147.322, ..., 409.029, 407.407, 441.287]])}, {'SIB01-story01-front_x264': array([[119.21 , 145.705, 149.527, ..., 402.519, 439.085, 409.717],
       [117.937, 145.96 , 149.829, ..., 402.517, 414.961, 410.99 ],
       [117.958, 145.835, 149.685, ..., 402.485, 414.381, 409.684],
       ...,
       [119.273, 146.693, 150.427, ..., 393.376, 417.887, 422.542],
       [119.265, 146.819, 150.159, ..., 392.101, 418.911, 420.463],
       [119.261, 146.906, 150.596, ..., 393.363, 419.038, 421.366]])}, {'SIC02-story01-front_x264': array([[127.094, 138.244, 141.413, ..., 375.076, 376.821, 369.219]

In [16]:
with open('key_points_42.pkl','wb') as f:
    pickle.dump(train_key_points, f)

In [17]:
np.save('key_points_42.npy', train_key_points)

In [48]:
loaded_data = np.load('key_points_5.npy')
for key in loaded_data:
    for k in key:
        print(key[k][1:20].shape)
        break
    break

(19, 5)


In [12]:
fl_name = 'SIA02-story01-front_x264'+'\SIA02-story01-front_x264_000000000000_keypoints.json'
fl_data = pd.read_json(fl_name)
print(get_data(fl_data).shape)

pose_keypoints_2d
(36,)
face_keypoints_2d
(60,)
hand_left_keypoints_2d
(63,)
hand_right_keypoints_2d
(63,)
pose_keypoints_3d
face_keypoints_3d
hand_left_keypoints_3d
hand_right_keypoints_3d
(148,)
