## Gait Video Study
#### Postprocessing the created 3D keypoints (via intrinsic and extrinsic matrix) for lower body and feet and combining together both front and side view 3D coordinates to get the final combined coordinates. Further, scaling all average hip heights to a constant to normalize for subject heights in our dataset.
References: 

http://personal.cityu.edu.hk/meachan/Online%20Anthropometry/Chapter2/Ch2-5.htm
https://msis.jsc.nasa.gov/sections/section03.htm

In [1]:
import numpy as np
import cv2
import os
import glob
import matplotlib.pyplot as plt
import pandas as pd
import shutil
import time
import warnings
warnings.filterwarnings("ignore")
from IPython.display import display, HTML

In [2]:
#Path for 3d data 
path_2d = 'C:\\Users\\purpl\\Box\\Gait Video Project\\GaitVideoData\\video\\openpose_data\\' 
path = 'C:\\Users\\purpl\\Box\\Gait Video Project\\GaitVideoData\\video\\3D_data\\' 
#Path to save the merged coordinates from both cameras 
frame_path_merged = 'C:\\Users\\purpl\\Box\\Gait Video Project\\GaitVideoData\\video\\multi_view_merged_data\\'

#Configuration for which to run the code for 
cohorts = ['\\HOA', '\\MS', '\\PD', '\\ExtraHOA']
trials = ['\\beam_walking', '\\walking']
cameras = ['\\feet\\', '\\lower_body\\']

#Dimensions of the image 
w = 800
h = 448

#Bounds for 3D coordinates 
x_min, y_min, z_min = 0.0001, 0.0001, 0.0001
x_max, y_max, z_max = 87, 310, 120

#Hard limits for coordinates, beyond which we delete the respective coordinate before adjusting for 
#z-coordinate or upper bounding for all coordinates 
#We upper bound the z- coordinate between 120 cm(~20 cm higher than the average American hip height) and 180 cm
#(which we assume as our hard upper limit for z-coordinate, beyond which, we assume that coordinate it 
#is recording is not correct, hence we delete that markers' recordings i.e. set x, y, z, confidence = 
#0, 0, 0, 0) to 120 cm. Further later on, to eliminate the size demographics from our datasets, we will 
#scale all coordinates to make sure that the hip height for each subject is same as the 
#average American hip height http://personal.cityu.edu.hk/meachan/Online%20Anthropometry/Chapter2/Ch2-5.htm
limit_x_min, limit_y_min, limit_z_min = -15, -50, -120
limit_x_max, limit_y_max, limit_z_max = 120, 400, 180

#Counts for sanity check after post processing 
x_coord_negative, y_coord_negative, z_coord_negative = 0, 0, 0
global sanity_x, sanity_y, sanity_z

#Order for the merged coordinates 
order = ['right hip', 'right knee', 'right ankle', 'left hip', 'left knee', 'left ankle', 'left toe 1', 'left toe 2', \
         'left heel', 'right toe 1', 'right toe 2', 'right heel']

#Constant hip height for hip height normalization across all subjects 
const_hip_height = 100 #in cm

### Utility functions 

In [3]:
def hard_limit(frame_csv):
    #Zero out all x, y, z, and confidence for coordinates that violate the 
    #hard constraints 
    hard_condition = (frame_csv.x>limit_x_max)|(frame_csv.y>limit_y_max)\
    |(frame_csv.z>limit_z_max)|(frame_csv.x<limit_x_min)|(frame_csv.y<limit_y_min)|\
    (frame_csv.z<limit_z_min)
    frame_csv[hard_condition] = 0
    return frame_csv    

In [4]:
def shift_nonnegative(frame_csv):
    #If the minimum z-value is negative, hence a shift is needed 
    #Make sure to not shift the 0 z-coordinate for the markers that are missing 
    #(i.e. have x, y, z, confidence = 0, 0, 0, 0)
    if (frame_csv.x.min()<0): 
        frame_csv.x[frame_csv.confidence!=0]+=(-1*frame_csv.x.min() + x_min)
    if (frame_csv.y.min()<0): 
        frame_csv.y[frame_csv.confidence!=0]+=(-1*frame_csv.y.min() + y_min)
    if (frame_csv.z.min()<0): 
        frame_csv.z[frame_csv.confidence!=0]+=(-1*frame_csv.z.min() + z_min)
    return frame_csv

In [5]:
def upper_bound(frame_csv):
    #Upper bound the x, y, z coordinates if they exceed the max bounds 
    frame_csv.x[frame_csv.x>x_max] = x_max
    frame_csv.y[frame_csv.y>y_max] = y_max
    frame_csv.z[frame_csv.z>z_max] = z_max  
    return frame_csv

In [6]:
def sanity_check(frame_csv):
    #Sanity check to check if there are any x<0, or y<0 or z<0 that 
    #we may need to treat 
    global sanity_x, sanity_y, sanity_z
    if ((frame_csv.x<0).sum()>0 | (frame_csv.x>x_max+x_min).sum()>0):
        print (frame_csv)
        sanity_x+=1
        print ('x-coordinate sanity not satisfied for ', frame)
    if ((frame_csv.y<0).sum()>0 | (frame_csv.y>y_max+y_min).sum()>0):
        print (frame_csv)
        sanity_y+=1
        print ('y-coordinate sanity not satisfied for ', frame)
    if ((frame_csv.z<0).sum()>0 | (frame_csv.z>z_max+z_min).sum()>0):
        print (frame_csv)
        sanity_z+=1
        print ('z-coordinate sanity not satisfied for ', frame)

In [7]:
#To check if the particular marker is not missing/is available in the frame 
def marker_available(coordinate_name, frame_path):
    frame_csv = pd.read_csv(frame_path, index_col = 0)
#     print (frame_csv)
    missing = (frame_csv.loc[coordinate_name].x==0) & (frame_csv.loc[coordinate_name].y==0) & (frame_csv.loc[coordinate_name].z==0) #Marker is missing in the frame
#     print ('Bool missing', not(missing))
    fill_list = list(frame_csv.loc[coordinate_name].values)
#     print ('Fill up values', fill_list)
    return not(missing), fill_list  

In [8]:
#Create the temporary interpolation dataframe for interpolation for a particular marker in current frame
def create_missing_value_fillup_df(sorted_frames, idx, marker_name):
    num_coords_each_side = 3 
    forward_idx, backward_idx, forward_count, backward_count = 1, 1, 0, 0
    #3 forward and 3 backward coordinates and 1 current coordinate as rows 
    #4 time, x, y, z and confidence score as columns 
    interpolation_df = pd.DataFrame(np.zeros([2*num_coords_each_side+1, 5])*np.nan)
#     print ('Initial Interpolate DF:', interpolation_df)
    while (forward_count<3):
        forward_frame = sorted_frames[idx+forward_idx]
#         print ('forward frame', forward_frame)
        available, fill_list = marker_available(marker_name, forward_frame)
#         print ('Availability', available)
        if available:
#             print ('forward frame', forward_frame)
            forward_count+=1
            interpolation_df.iloc[num_coords_each_side+forward_count] = [forward_idx]+fill_list 
            #Fill in indices 4, 5, 6
        forward_idx+=1
        
    while (backward_count<3):
        backward_frame = sorted_frames[idx-backward_idx]
#         print ('backward frame', backward_frame)
        available, fill_list = marker_available(marker_name, backward_frame)
        if available:
#             print ('backward frame', backward_frame)
            interpolation_df.iloc[2-backward_count] = [-1*backward_idx]+fill_list #Fill in indices 0, 1, 2
            backward_count+=1
        backward_idx+=1
    interpolation_df.columns = ['time', 'x', 'y', 'z', 'conf']
    #Setting time 0 for current frame w.r.t. forward having positive time and backward frames having negative time
    interpolation_df.iloc[3].time = 0 #Row 3 is for current frame and column 0 is for time index 
#     print ('Interpolate DF: ', interpolation_df)
    return interpolation_df

In [9]:
#For missing value treatment:
#1. Keep a threshold i.e. if more than 8 out of 12 coordinates are missing 
#(i.e. more than or equal to 75% of coordinates are missing), then ignore that frame 
#2. For a marker missing from the current frame, we are using interpolation with 3 nearest frames with the 
#marker non-missing in the forward and backward direction from the frames and keep time for these frames as part of 
#the interpolation process and use quadratic interpolation 
def missing_value_treatment(sorted_frames, idx, marker_name):
    #Consecutive no. of frames travelled in the sequence to fill up the current frame
    interpolate_df = create_missing_value_fillup_df(sorted_frames, idx, marker_name)
    #Fill the particular marker in current frame using interpolation wrt time 
    #Set the time column as index of this dataframe 
    interpolate_df['time']-=interpolate_df['time'][0]
    interpolate_df.set_index('time')
    #Quadratic will automatically use the index values as the corresponding time 
    #Hence this a index aware interpolation 
    interpolate_df.interpolate(method = 'quadratic', inplace = True)
    #Extract the row at the 3rd index filled in using interpolation 
    #and set up the missing value and save as new csv when all missing markers 
    #are done filling for the current frame 
    interpolate_df.reset_index()
    #                             print ('Filled interpolate DF:', interpolate_df)
    #Not retaining the time we had used in interpolation step
    return interpolate_df.iloc[3].values[1:]

### Postprocessing for both front (lower body) and side camera (feet)

In [8]:
for cohort in cohorts:
    for trial in trials:
        for camera in cameras:
            frame_path = path+cohort+trial+camera #Path to save the frames to 
            if (os.path.exists(frame_path)):
                videos = os.listdir(frame_path)
#             print (len(videos))
                for video in videos:
                    if not os.path.exists(frame_path+video+'\\processed3d'):
                        os.makedirs(frame_path+video+'\\processed3d')
                    frames = glob.glob(frame_path+video+'\\*.csv')
                    for frame in frames:
    #                     print (frame)
                        csv_path = frame_path+video+'\\processed3d\\'+frame.split('\\')[-1]
                        if not os.path.exists(csv_path):
                            try:
                                frame_csv = pd.read_csv(frame, index_col = 0)
                                #To delete (convert x, y, z, confidence to 0, 0, 0, 0) the coordinates 
                                #which do not satisfy the hard limits and hence 
                                #are most probably not the coordinates for the subject in the frame
                                frame_csv = hard_limit(frame_csv)
                                #To shift up the z-coordinates (and sometime x, y-also)of the frame 
                                #if any z (and sometimes x, y also) is <0 
                                frame_csv = shift_nonnegative(frame_csv)
                                #To upper bound the x>width of the treadmill to the width 
                                # and y>length of the treadmill to the length 
                                frame_csv = upper_bound(frame_csv)
                                #Sanity check to check if there are any x<0, or y<0 or z<0 that 
                                #we may need to treat 
                                if ((frame_csv.x<0).sum()>0):
                                    x_coord_negative+=1
                                    print (frame)
                                if ((frame_csv.y<0).sum()>0):
                                    y_coord_negative+=1
                                    print (frame)
                                if ((frame_csv.z<0).sum()>0):
                                    z_coord_negative+=1
                                    print (frame)
#                                 print (frame_csv)
                                frame_csv.to_csv(csv_path)
                            except Exception as e:
                                print (e)
                    print (video, 'Done!')
print ('Count of negative x, y and z coordinates are:', x_coord_negative, y_coord_negative, z_coord_negative)

InkedGVS_212_T_T1_0_Trim Done!
InkedGVS_212_T_T2_0_Trim Done!
InkedGVS_213_T_T1_0_Trim Done!
InkedGVS_213_T_T2_0_Trim Done!
InkedGVS_214_T_T2_0_Trim Done!
InkedGVS_215_T_T1_0_Trim Done!
InkedGVS_215_T_T2_0_Trim Done!
InkedGVS_216_T_T1_0_Trim Done!
InkedGVS_216_T_T2_0_Trim Done!
InkedGVS_217_T_T1_0_Trim Done!
InkedGVS_217_T_T2_0_Trim Done!
InkedGVS_218_T_T1_0_Trim Done!
InkedGVS_218_T_T2_0_Trim Done!
InkedGVS_219_T_T1_0_Trim Done!
InkedGVS_219_T_T2_0_Trim Done!
InkedGVS_212_T_T1_1_Trim Done!
InkedGVS_212_T_T2_1_Trim Done!
InkedGVS_213_T_T1_1_Trim Done!
InkedGVS_213_T_T2_1_Trim Done!
InkedGVS_214_T_T1_1_Trim Done!
InkedGVS_214_T_T2_1_Trim Done!
InkedGVS_215_T_T1_1_Trim Done!
InkedGVS_215_T_T2_1_Trim Done!
InkedGVS_216_T_T1_1_Trim Done!
InkedGVS_216_T_T2_1_Trim Done!
InkedGVS_217_T_T1_1_Trim Done!
InkedGVS_217_T_T2_1_Trim Done!
InkedGVS_218_T_T1_1_Trim Done!
InkedGVS_218_T_T2_1_Trim Done!
InkedGVS_219_T_T1_1_Trim Done!
InkedGVS_219_T_T2_1_Trim Done!
InkedGVS_212_W_T1_0_Trim Done!
InkedGVS

In [29]:
#Sanity Checks!
#Check that x-coordinate is between 0 and 87 cm (width of the treadmill), y coordinate is between 0 and 310 cm 
#(length of the treadmill) and z-coordinate is between 0 and ~120 (maximum length of subject's lower body)
sanity_x, sanity_y, sanity_z = 0, 0, 0
for cohort in cohorts:
    for trial in trials:
        for camera in cameras:
            frame_path = path+cohort+trial+camera #Path to save the frames to 
            if (os.path.exists(frame_path)):
                videos = os.listdir(frame_path)
#             print (len(videos))
            for video in videos:
                frames = glob.glob(frame_path+video+'\\processed3d\\*.csv')
                for frame in frames:
#                     print (frame)
                    frame_csv = pd.read_csv(frame, index_col = 0)
                    sanity_check(frame_csv)
                print (video, 'Done!')
                print ('Sanities (x, y, z in that order) now:', sanity_x, sanity_y, sanity_z)
print ('Count of violating sanity x, y and z coordinates are: ', sanity_x, sanity_y, sanity_z)

InkedGVS_212_T_T1_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_212_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_213_T_T1_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_213_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_214_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_215_T_T1_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_215_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_216_T_T1_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_216_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_217_T_T1_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_217_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_218_T_T1_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_218_T_T2_0_Trim Done!
Sanities (x, y, z in that order) now: 0 0 0
InkedGVS_219_T_T1_0_Trim 

### Combine together both front and side view to get final 3D coordinates 

In [23]:
# ## Run only once to create directories 
# #To create all directories for saving the 3D
# for cohort in cohorts:
#     for trial in trials:
#         for camera in cameras:    
#             frame_path3d = path+cohort+trial+camera #Path to save the frames to 
#             if (os.path.exists(frame_path3d)):
#                 videos = os.listdir(frame_path3d)
#                 for video in videos:
#                     if not os.path.exists(frame_path_merged+cohort+trial+'\\'+video[5:-7]):
#                         os.makedirs(frame_path_merged+cohort+trial+'\\'+video[5:-7])

In [59]:
def weighted_average_merge_frame(front_frame, side_frame):
    #Computing the weights based on confidence for a coordinate in the frame 
    total_confidence = front_frame.confidence+side_frame.confidence
    front_weight = front_frame.confidence/total_confidence
    side_weight = side_frame.confidence/total_confidence
    #Merged frame's ankle, toe and heels are a weighted average of the front and side 
    #view's values 
    merged_frame = front_frame.multiply(front_weight, axis=0) + side_frame.multiply(side_weight, axis=0)
    #Simply copy hip and knee coordinates from the front view frame 
    merged_frame[merged_frame.isna()] = front_frame 
    merged_frame= merged_frame.reindex(order)
    return merged_frame

In [84]:
# order_feet = ['right ankle', 'left ankle', 'left toe 1', 'left toe 2', 'left heel', 'right toe 1', 'right toe 2', \
#          'right heel']

# template = pd.DataFrame(data = 0, index = order_feet, columns = ['x', 'y', 'z', 'confidence'])
# print (template)
# for cohort in cohorts[:3]:
#     for trial in trials:   
#         feet2d_path = path_2d+cohort+trial+'\\feet\\'
#         videos = os.listdir(feet2d_path)
#         for video in videos:
#             print (video)
#             jpg = glob.glob(feet2d_path+video+'\\*.jpg')
#             csv = glob.glob(feet2d_path+video+'\\*.csv')
#             jpg_frames = [x.split('\\')[-1][:-4] for x in jpg]
#             csv_frames = [x.split('\\')[-1][:-4] for x in csv]
#             missing_feet_csv = [x for x in jpg_frames if x not in csv_frames]
#             print (len(missing_feet_csv))
#             for miss in missing_feet_csv:
#                 template.to_csv(path+cohort+trial+'\\feet\\'+video+'\\processed3d\\'+miss+'.csv')

             x  y  z  confidence
right ankle  0  0  0           0
left ankle   0  0  0           0
left toe 1   0  0  0           0
left toe 2   0  0  0           0
left heel    0  0  0           0
right toe 1  0  0  0           0
right toe 2  0  0  0           0
right heel   0  0  0           0
InkedGVS_212_T_T1_0_Trim
0
InkedGVS_212_T_T2_0_Trim
0
InkedGVS_213_T_T1_0_Trim
12
InkedGVS_213_T_T2_0_Trim
3
InkedGVS_214_T_T2_0_Trim
809
InkedGVS_215_T_T1_0_Trim
13
InkedGVS_215_T_T2_0_Trim
0
InkedGVS_216_T_T1_0_Trim
1
InkedGVS_216_T_T2_0_Trim
2
InkedGVS_217_T_T1_0_Trim
0
InkedGVS_217_T_T2_0_Trim
0
InkedGVS_218_T_T1_0_Trim
8
InkedGVS_218_T_T2_0_Trim
10
InkedGVS_219_T_T1_0_Trim
0
InkedGVS_219_T_T2_0_Trim
0
InkedGVS_212_W_T1_0_Trim
0
InkedGVS_212_W_T2_0_Trim
0
InkedGVS_213_W_T1_0_Trim
0
InkedGVS_213_W_T2_0_Trim
0
InkedGVS_214_W_T1_0_Trim
0
InkedGVS_214_W_T2_0_Trim
5
InkedGVS_215_W_T1_0_Trim
0
InkedGVS_215_W_T2_0_Trim
3
InkedGVS_216_W_T1_0_Trim
0
InkedGVS_216_W_T2_0_Trim
1
InkedGVS_217_W_T1_0_Tri

In [86]:
#Example, combine 3D_data\HOA\beam_walking\lower_body\InkedGVS_212_T_T1_1_Trim\processed3d\*.csv and 
#3D_data\HOA\beam_walking\feet\InkedGVS_212_T_T1_0_Trim\processed3d\*.csv to get 
#multi_view_merged_data\HOA\beam_walking\GVS_212_T_T1\*.csv

'''
Combining rules:
1. Combine only the frames that exist in both views (lower body and feet). This eliminates the deleted lower body 
frames that had >8 coordinates missing as being used. Further, this eliminates the possibility of using the 
frames that were trimmed in at least one of the views.
2. If the video doesn't exist in feet, for a video that exists in lower body, simply copy all the lower body frames 
as merged frames.
3. If the video exists in both lower body and feet, then we look for frames that exist in both views and merge them.
4. For merging, simply copy the hip and knee coordinates and weighted average (if value and confidence of left 
ankle's x-coordinate in lower body frame is v_l, c_l and in feet frame is v_f, c_f then merged left ankle's 
x-coordinate is [c_l/(c_l+c_f)]*v_l + [c_f/(c_l+c_f)]*v_f)) for ankle, toes and heel. 
''' 

for cohort in cohorts:
    for trial in trials:   
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
            for video in videos:
                print (video)
                front_video = path+cohort+trial+'\\lower_body\\'+'Inked'+video+'_1_Trim'
                side_video = path+cohort+trial+'\\feet\\'+'Inked'+video+'_0_Trim'
                if (os.path.exists(front_video)):
                    print (front_video, 'exists')
                    if (os.path.exists(side_video)): #This is the case when both lower body and feet views exists
                        print (side_video, 'exists')
                        front_frames = os.listdir(front_video+'\\processed3d\\')
                        side_frames = os.listdir(side_video+'\\processed3d\\')
                        #Computing the merged frames only for frames that are common in both views 
                        common_frames = [x for x in front_frames if x in side_frames] 
#                         print (len(front_frames), len(side_frames), len(common_frames))
                        for frame in common_frames:
                            if (not os.path.exists ((merged_path+'\\'+video+'\\'+frame))):
    #                             print (frame)
                                front_frame = pd.read_csv(front_video+'\\processed3d\\'+frame, index_col = 0)
                                side_frame = pd.read_csv(side_video+'\\processed3d\\'+frame, index_col = 0)
    #                             print (front_frame)
    #                             print (side_frame)
                                merged_frame = weighted_average_merge_frame(front_frame, side_frame)
    #                             print (merged_frame)
                                merged_frame.to_csv(merged_path+'\\'+video+'\\'+frame)
                    else: #This is the case when lower body video exists but feet doesn't
                        print (side_video, 'does not exist')
                        print ('Copy')
#                         for file in glob.glob(os.path.join(front_video+'\\processed3d\\', '*.csv*')):
#                             #If only lower body exists, copy all processed 3d files directly to merged files 
#                             shutil.copy(file, merged_path+'\\'+video) 

GVS_212_T_T1
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\lower_body\InkedGVS_212_T_T1_1_Trim exists
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\feet\InkedGVS_212_T_T1_0_Trim exists
GVS_212_T_T2
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\lower_body\InkedGVS_212_T_T2_1_Trim exists
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\feet\InkedGVS_212_T_T2_0_Trim exists
GVS_213_T_T1
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\lower_body\InkedGVS_213_T_T1_1_Trim exists
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\feet\InkedGVS_213_T_T1_0_Trim exists
GVS_213_T_T2
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_walking\lower_body\InkedGVS_213_T_T2_1_Trim exists
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\3D_data\\HOA\beam_w

### Check for missing values (if they exist) after merging and hip height normalization 

In [129]:
#Check if there are any missing values (x, y, z, confidence = 0, 0, 0, 0) in this combined data 
#and how can we treat them before feeding in the models 
#Check any remaining missing values (x, y, z, confidence = 0, 0, 0, 0) after the treatment
for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
            for video in videos:
                print (video)
                counts_missing_frames = 0 #Missing frames in a particular video
                frames = glob.glob(merged_path+'\\'+video+'\\*.csv')
                for frame in frames:
#                     print (frame)
                    frame_csv = pd.read_csv(frame, index_col = 0)
    #                     print (frame_csv)
                    missing = frame_csv[(frame_csv.x==0) & (frame_csv.y==0) & (frame_csv.z==0)] #Missing rows/keypoints in the frame
#                     print (missing) 
                    len_missing = len(missing)
                    if (len_missing!=0): #No missing values, just update the stats 
                        counts_missing_frames+=1
                        print (frame)
                print ('Missing values in ', video, 'are:', counts_missing_frames)

In [126]:
for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
            for video in videos:
                print (video)
                frames = glob.glob(merged_path+'\\'+video+'\\*.csv')
                sorted_frames = sorted(frames,  key=lambda name: int(name.split('\\')[-1][:-4]))
                for idx, frame in enumerate(sorted_frames):
#                     print ('Current frame: ', frame)
                    try:
                        frame_csv = pd.read_csv(frame, index_col = 0)
    #                     print (frame_csv)
                        missing = frame_csv[(frame_csv.x==0) & (frame_csv.y==0) & (frame_csv.z==0)] #Missing rows/keypoints in the frame
    #                     print (missing) 
                        len_missing = len(missing)
                        if (len_missing ==0): #No missing values, just update the stats 
                            pass
                        #If more than or equal to 75% of coordinates are missing, remove the frame
                        elif (len_missing>8): 
                            print(frame, " removed (>8 missing coordinates)!")
                            os.remove(frame)
                        else:
                            for i in range(len_missing):
                                marker = missing.iloc[i]
    #                             print ('Missing marker name', marker.name)
                                frame_csv.loc[marker.name] = missing_value_treatment(sorted_frames, idx, marker.name)
                            frame_csv.to_csv(frame)
                    except Exception as e:
                        print ('Exception!', e)
                print (video, 'DONE!')

GVS_212_T_T1
GVS_212_T_T1 DONE!
GVS_212_T_T2
GVS_212_T_T2 DONE!
GVS_213_T_T1
GVS_213_T_T1 DONE!
GVS_213_T_T2
GVS_213_T_T2 DONE!
GVS_214_T_T1
Exception! list index out of range
GVS_214_T_T1 DONE!
GVS_214_T_T2
GVS_214_T_T2 DONE!
GVS_215_T_T1
GVS_215_T_T1 DONE!
GVS_215_T_T2
GVS_215_T_T2 DONE!
GVS_216_T_T1
GVS_216_T_T1 DONE!
GVS_216_T_T2
GVS_216_T_T2 DONE!
GVS_217_T_T1
GVS_217_T_T1 DONE!
GVS_217_T_T2
GVS_217_T_T2 DONE!
GVS_218_T_T1
Exception! list index out of range
GVS_218_T_T1 DONE!
GVS_218_T_T2
GVS_218_T_T2 DONE!
GVS_219_T_T1
GVS_219_T_T1 DONE!
GVS_219_T_T2
GVS_219_T_T2 DONE!
GVS_212_W_T1
GVS_212_W_T1 DONE!
GVS_212_W_T2
GVS_212_W_T2 DONE!
GVS_213_W_T1
GVS_213_W_T1 DONE!
GVS_213_W_T2
GVS_213_W_T2 DONE!
GVS_214_W_T1
GVS_214_W_T1 DONE!
GVS_214_W_T2
GVS_214_W_T2 DONE!
GVS_215_W_T1
GVS_215_W_T1 DONE!
GVS_215_W_T2
GVS_215_W_T2 DONE!
GVS_216_W_T1
GVS_216_W_T1 DONE!
GVS_216_W_T2
GVS_216_W_T2 DONE!
GVS_217_W_T1
GVS_217_W_T1 DONE!
GVS_217_W_T2
GVS_217_W_T2 DONE!
GVS_218_W_T1
GVS_218_W_T1 DONE!
GV

In [None]:
for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
            for video in videos:
                print (video)
                counts_missing_frames = 0 #Missing frames in a particular video
                frames = glob.glob(merged_path+'\\'+video+'\\*.csv')
                for frame in frames:
#                     print (frame)
                    frame_csv = pd.read_csv(frame, index_col = 0)
#                         print (frame_csv)
                    missing = frame_csv[(frame_csv.x==0) & (frame_csv.y==0) & (frame_csv.z==0)] #Missing rows/keypoints in the frame
#                     print (missing) 
                    len_missing = len(missing)
                    if (len_missing!=0): #No missing values, just update the stats 
                        counts_missing_frames+=1
                        print (frame)
                print ('Missing values in ', video, 'are:', counts_missing_frames)

### Hip Height normalization on the merged data 

In [133]:
#After combining, scale all average hip heights to a constant (100 cm) to normalize for subject heights in our dataset
#This is to make sure that we eliminate the effects of size demographics from our dataset 
#Refer http://personal.cityu.edu.hk/meachan/Online%20Anthropometry/Chapter2/Ch2-5.htm
#Refer https://msis.jsc.nasa.gov/sections/section03.htm for estimates of hip heights 
#We use 100 cm as constant hip height for all subjects in our analysis 

#For every .csv in multi_view_merged_data\HOA\beam_walking\GVS_212_T_T1\, we create a new .csv in 
#multi_view_merged_data\HOA\beam_walking\GVS_212_T_T1\hip_height_normalized\, where each file is scaled by 100/avg,
#where avg = [left-hip z-coordinate + right-hip z-coordinate]/2 = average of the left and right hip's height

for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
            for video in videos:
                print (video)
                if not os.path.exists(merged_path+'\\'+video+'\\hip_height_normalized'):
                    os.makedirs(merged_path+'\\'+video+'\\hip_height_normalized')
                frames = glob.glob(merged_path+'\\'+video+'\\*.csv')
                for frame in frames:
#                     print (frame)
                    new_frame_save_path = merged_path+'\\'+video+'\\hip_height_normalized\\'+frame.split('\\')[-1]
                    frame_csv = pd.read_csv(frame, index_col = 0)
                    avg_hip_height = 0.5*(frame_csv.loc['right hip', 'z'] + frame_csv.loc['left hip', 'z'])
                    scaling_factor = const_hip_height/avg_hip_height 
                    #Scaling factor to make sure that when multiplied with average hip height, it scales that to 
                    #100 cm 
                    frame_csv[['x', 'y', 'z']] = scaling_factor*frame_csv[['x', 'y', 'z']] 
                    frame_csv.to_csv(new_frame_save_path)
                print (video, 'Done!')
                    

GVS_212_T_T1
GVS_212_T_T1 Done!
GVS_212_T_T2
GVS_212_T_T2 Done!
GVS_213_T_T1
GVS_213_T_T1 Done!
GVS_213_T_T2
GVS_213_T_T2 Done!
GVS_214_T_T1
GVS_214_T_T1 Done!
GVS_214_T_T2
GVS_214_T_T2 Done!
GVS_215_T_T1
GVS_215_T_T1 Done!
GVS_215_T_T2
GVS_215_T_T2 Done!
GVS_216_T_T1
GVS_216_T_T1 Done!
GVS_216_T_T2
GVS_216_T_T2 Done!
GVS_217_T_T1
GVS_217_T_T1 Done!
GVS_217_T_T2
GVS_217_T_T2 Done!
GVS_218_T_T1
GVS_218_T_T1 Done!
GVS_218_T_T2
GVS_218_T_T2 Done!
GVS_219_T_T1
GVS_219_T_T1 Done!
GVS_219_T_T2
GVS_219_T_T2 Done!
GVS_212_W_T1
GVS_212_W_T1 Done!
GVS_212_W_T2
GVS_212_W_T2 Done!
GVS_213_W_T1
GVS_213_W_T1 Done!
GVS_213_W_T2
GVS_213_W_T2 Done!
GVS_214_W_T1
GVS_214_W_T1 Done!
GVS_214_W_T2
GVS_214_W_T2 Done!
GVS_215_W_T1
GVS_215_W_T1 Done!
GVS_215_W_T2
GVS_215_W_T2 Done!
GVS_216_W_T1
GVS_216_W_T1 Done!
GVS_216_W_T2
GVS_216_W_T2 Done!
GVS_217_W_T1
GVS_217_W_T1 Done!
GVS_217_W_T2
GVS_217_W_T2 Done!
GVS_218_W_T1
GVS_218_W_T1 Done!
GVS_218_W_T2
GVS_218_W_T2 Done!
GVS_219_W_T1
GVS_219_W_T1 Done!
GVS_219_

In [135]:
#Check for any missing values after the hip height normalization 
for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
            for video in videos:
                print (video)
                counts_missing_frames = 0 #Missing frames in a particular video
                frames = glob.glob(merged_path+'\\'+video+'\\hip_height_normalized\\*.csv')
                for frame in frames:
#                     print (frame)
                    frame_csv = pd.read_csv(frame, index_col = 0)
    #                     print (frame_csv)
                    missing = frame_csv[(frame_csv.x==0) & (frame_csv.y==0) & (frame_csv.z==0)] #Missing rows/keypoints in the frame
#                     print (missing) 
                    len_missing = len(missing)
                    if (len_missing!=0): #No missing values, just update the stats 
                        counts_missing_frames+=1
                        print (frame)
                print ('Missing values in ', video, 'are:', counts_missing_frames)

#There are no missing values now!!!


GVS_212_T_T1
Missing values in  GVS_212_T_T1 are: 0
GVS_212_T_T2
Missing values in  GVS_212_T_T2 are: 0
GVS_213_T_T1
Missing values in  GVS_213_T_T1 are: 0
GVS_213_T_T2
Missing values in  GVS_213_T_T2 are: 0
GVS_214_T_T1
Missing values in  GVS_214_T_T1 are: 0
GVS_214_T_T2
Missing values in  GVS_214_T_T2 are: 0
GVS_215_T_T1
Missing values in  GVS_215_T_T1 are: 0
GVS_215_T_T2
Missing values in  GVS_215_T_T2 are: 0
GVS_216_T_T1
Missing values in  GVS_216_T_T1 are: 0
GVS_216_T_T2
Missing values in  GVS_216_T_T2 are: 0
GVS_217_T_T1
Missing values in  GVS_217_T_T1 are: 0
GVS_217_T_T2
Missing values in  GVS_217_T_T2 are: 0
GVS_218_T_T1
Missing values in  GVS_218_T_T1 are: 0
GVS_218_T_T2
Missing values in  GVS_218_T_T2 are: 0
GVS_219_T_T1
Missing values in  GVS_219_T_T1 are: 0
GVS_219_T_T2
Missing values in  GVS_219_T_T2 are: 0
GVS_212_W_T1
Missing values in  GVS_212_W_T1 are: 0
GVS_212_W_T2
Missing values in  GVS_212_W_T2 are: 0
GVS_213_W_T1
Missing values in  GVS_213_W_T1 are: 0
GVS_213_W_T2

#### Patterns and Sanity checks!
Note the path for final files is GaitVideoData\video\multi_view_merged_data\HOA\beam_walking\GVS_212_T_T1\hip_height_normalized\*.csv 

In combined data after hip height normalization, take a look at trend of z-coordinate especially to 
check the hip, knee and ankle height make sense w.r.t norms and similarly some other sanity checks 
like distance between toe and heel's y-coordinate etc.



In [185]:
def sanity_check_merged(frame_csv):
    #Sanity check to check if there are any x<0, or y<0 or z<0 that 
    #we may need to treat 
    global sanity_x, sanity_y, sanity_z
    if ((frame_csv.x<0).sum()>0 or (frame_csv.x>x_max+x_min).sum()>0):
        print (frame)
        sanity_x+=1
        print ('x-coordinate sanity not satisfied for ', frame)
        os.remove(frame)
    elif ((frame_csv.y<0).sum()>0 or (frame_csv.y>y_max+y_min).sum()>0):
        print (frame)
        sanity_y+=1
        print ('y-coordinate sanity not satisfied for ', frame)
        os.remove(frame)
    elif ((frame_csv.z<0).sum()>0 or (frame_csv.z>z_max+z_min).sum()>0):
        print (frame)
        sanity_z+=1
        print ('z-coordinate sanity not satisfied for ', frame)
        os.remove(frame)
    else:
        pass

In [186]:
#Sanity checks (0<x<=87, 0<y<=310, 0<z<=120)
sanity_x, sanity_y, sanity_z = 0, 0, 0
for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
#             print (len(videos))
        for video in videos:
            frames = glob.glob(merged_path+'\\'+video+'\\hip_height_normalized\\*.csv')
            sorted_frames = sorted(frames,  key=lambda name: int(name.split('\\')[-1][:-4]))
            for frame in sorted_frames:
#                 print (frame)
                frame_csv = pd.read_csv(frame, index_col = 0)
                sanity_check_merged(frame_csv)
        
            print (video, 'Done!')
            print ('Sanities (x, y, z in that order) now:', sanity_x, sanity_y, sanity_z)
print ('Count of violating sanity x, y and z coordinates are: ', sanity_x, sanity_y, sanity_z)

GVS_212_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_212_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_213_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_213_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_214_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_214_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_215_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_215_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_216_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_216_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_217_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_217_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_218_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_218_T_T2 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_219_T_T1 Done!
Sanities (x, y, z in that order) now: 0 0 0
GVS_219_T_T2 Done!
Sanities (x, y, z in that order) now

In [10]:
#Patterns 
#We will make a csv with mean, std, min, max of the 12*3+4 features (12 coordinates across 3 dimensions and 4 extra 
#features namely (y-coordinate of toe1 - y-coordinate of heel, both left and right) and similarly, (y-coordinate of toe2 - y-coordinate of 
#heel)). So columns are mean, std, min and max and rows are 40 rows for each video and so approximately 107*40
#rows for 107 videos and 40 features in each. 
labels = [o + '-'+ y for o in order for y in ['x', 'y', 'z']] 
#right hip-x, right hip-y, right hip-z, right knee-x, right knee-y, right knee-z, right ankle-x, right ankle-y, ...
patterns = pd.DataFrame (columns = ['mean', 'std', 'min', 'max'])

for cohort in cohorts:
    for trial in trials:
        merged_path = frame_path_merged+cohort+trial 
        if (os.path.exists(merged_path)):
            videos = os.listdir(merged_path)
#             print (len(videos))
        for video in videos:
            start_time = time.time()
            frames = glob.glob(merged_path+'\\'+video+'\\hip_height_normalized\\*.csv')
            temp = pd.DataFrame(columns = labels) #Dataframe to collect 40 features from all ~1500 frames of each video
            for frame in frames:
#                     print (frame)
                frame_csv = pd.read_csv(frame, index_col = 0)
                temp.loc[len(temp)] = frame_csv[['x', 'y', 'z']].values.flatten()
            #Adding the 4 features for y-distance between the right/left toes (toe1/toe2) and heel
            temp['diff right toe1-heel-y'] = temp['right toe 1-y'] - temp['right heel-y']
            temp['diff right toe2-heel-y'] = temp['right toe 2-y'] - temp['right heel-y']
            temp['diff left toe1-heel-y'] = temp['left toe 1-y'] - temp['left heel-y']
            temp['diff left toe2-heel-y'] = temp['left toe 2-y'] - temp['left heel-y']
#             display(temp)
            video_wise_df = pd.DataFrame(data = [temp.mean(), temp.std(), temp.min(), temp.max()], \
                                         index = ['mean', 'std', 'min', 'max']).T
            
            video_wise_df['video'] = video
            patterns = patterns.append(video_wise_df)
            print (video, 'done in', time.time()-start_time)
patterns.to_csv(frame_path_merged+'final_merged_coordinate_patterns.csv')

GVS_212_T_T1 done in 52.12000393867493
GVS_212_T_T2 done in 33.31150698661804
GVS_213_T_T1 done in 35.68746042251587
GVS_213_T_T2 done in 32.27617573738098
GVS_214_T_T1 done in 35.742998123168945
GVS_214_T_T2 done in 34.9292151927948
GVS_215_T_T1 done in 35.737688064575195
GVS_215_T_T2 done in 40.49642038345337
GVS_216_T_T1 done in 38.3670608997345
GVS_216_T_T2 done in 48.25978970527649
GVS_217_T_T1 done in 41.49847221374512
GVS_217_T_T2 done in 39.99477410316467
GVS_218_T_T1 done in 40.83507943153381
GVS_218_T_T2 done in 40.645798206329346
GVS_219_T_T1 done in 37.664215326309204
GVS_219_T_T2 done in 35.987000703811646
GVS_212_W_T1 done in 37.295764446258545
GVS_212_W_T2 done in 49.678908824920654
GVS_213_W_T1 done in 42.217994689941406
GVS_213_W_T2 done in 41.55653667449951
GVS_214_W_T1 done in 39.7021541595459
GVS_214_W_T2 done in 40.135558128356934
GVS_215_W_T1 done in 41.36580038070679
GVS_215_W_T2 done in 40.70695924758911
GVS_216_W_T1 done in 42.52824282646179
GVS_216_W_T2 done i

In [15]:
patterns.groupby(patterns.index).mean().to_csv(frame_path_merged+'grouped_final_merged_coordinate_patterns.csv')
display(patterns.groupby(patterns.index).mean())

#Looking at the left/right hip-z (~100 cm), left/right knee z (~80cm), left/right ankle z (~20cm), 
#left/right toes z (~10cm), left/right heel z (~20cm), diff toe-heel y (~10cm), the 
#distributions/patterns do make sense relative to the actual human body coordinates. 

Unnamed: 0,mean,std,min,max
diff left toe1-heel-y,13.655794,6.109036,-24.68737,52.355033
diff left toe2-heel-y,11.861106,6.336333,-27.911008,52.697156
diff right toe1-heel-y,13.594777,6.018656,-22.776333,47.316134
diff right toe2-heel-y,11.248976,6.050511,-30.620003,45.388725
left ankle-x,22.623641,4.621671,9.37666,46.820653
left ankle-y,146.062024,16.26278,98.875735,200.72301
left ankle-z,19.472761,7.316207,5.725986,49.646566
left heel-x,24.130703,5.173933,8.959063,50.363302
left heel-y,148.922832,16.666629,100.440642,205.057533
left heel-z,16.737418,7.702586,2.13452,48.46987
