# Gait Video Study 
### Postprocessing the created 2D keypoints (via OpenPose) for lower body and feet 

In [2]:
import numpy as np
import cv2
import os
import glob
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from IPython.display import display, HTML

In [106]:
path = 'C:\\Users\\purpl\\Box\\Gait Video Project\\GaitVideoData\\video\\openpose_data\\'

#Configuration for which to run the code for 
cohorts = ['\\HOA', '\\MS', '\\PD', '\\ExtraHOA']
trials = ['\\beam_walking', '\\walking']
cameras = ['\\feet\\', '\\lower_body\\']

#Dimensions of the image 
w = 800
h = 448

### Utility functions

In [107]:
#The left hip marker must have x-coordinate values greater than right hip marker
def check_left_right_swap_needed(coords):
    global count #Telling the function that we are going to use global variable count and not the default local variable
    mid_hip_x = coords.loc[8, 'x']
    left_hip_x = coords.loc[12, 'x']
    right_hip_x = coords.loc[9, 'x']
    #If the left hip marker do not have x-coordinate values greater than right hip marker, a swap is needed
    if ((left_hip_x<mid_hip_x) & (right_hip_x>mid_hip_x)): 
        swap_needed = True
        count+=1
    else:
        swap_needed = False
    return swap_needed

In [115]:
def left_right_swap(coords):
    data_frame = pd.DataFrame()
#     print ('coords', coords[1:])
    if (check_left_right_swap_needed(coords)):
#         print (coords[1:])
#         print ('Left-right swap required!')
        data_frame=pd.DataFrame(coords[1:].values, index = order_swapped, columns = ['x', 'y', 'confidence'])
    else:
        data_frame=pd.DataFrame(coords[1:].values, index = order, columns = ['x', 'y', 'confidence'])
    data_frame = data_frame.reindex(order)
#     print (data_frame)
# print ('swapped df', data_frame, frame_path+video+'\\processed2d\\'+frame.split('\\')[-1][:-3]+'csv')
    return data_frame

In [116]:
def rename(dataframe):
    dataframe['cohort'][dataframe.cohort=='\\ExtraHOA'] = 'HOA'
    dataframe['cohort'][dataframe.cohort=='\\HOA'] = 'HOA'
    dataframe['cohort'][dataframe.cohort=='\\MS'] = 'MS'
    dataframe['cohort'][dataframe.cohort=='\\PD'] = 'PD'
    dataframe['trial'][dataframe.trial=='\\beam_walking'] = 'BW'
    dataframe['trial'][dataframe.trial=='\\walking'] = 'W'

In [117]:
#To check if the particular marker is not missing/is available in the frame 
def marker_available(coordinate_name, frame_path):
    frame_csv = pd.read_csv(frame_path, index_col = 0)
#     print (frame_csv)
    missing = (frame_csv.loc[coordinate_name].x==0) & (frame_csv.loc[coordinate_name].y==0) #Marker is missing in the frame
#     print ('Bool missing', not(missing))
    fill_list = list(frame_csv.loc[coordinate_name].values)
#     print ('Fill up values', fill_list)
    return not(missing), fill_list  

In [118]:
#Create the temporary interpolation dataframe for interpolation for a particular marker in current frame
def create_missing_value_fillup_df(sorted_frames, idx, marker_name):
    seq_travelled = []  
    forward_idx, backward_idx, forward_count, backward_count = 1, 1, 0, 0
    #3 forward and 3 backward coordinates and 1 current coordinate as rows 
    #4 time, x, y, and confidence score as columns 
    interpolation_df = pd.DataFrame(np.zeros([2*num_coords_each_side+1, 4])*np.nan)
#     print ('Initial Interpolate DF:', interpolation_df)
    while (forward_count<3):
        forward_frame = sorted_frames[idx+forward_idx]
#         print ('forward frame', forward_frame)
        available, fill_list = marker_available(marker_name, forward_frame)
#         print ('Availability', available)
        if available:
#             print ('forward frame', forward_frame)
            forward_count+=1
            seq_travelled.append(forward_idx)
            interpolation_df.iloc[num_coords_each_side+forward_count] = [forward_idx]+fill_list 
            #Fill in indices 4, 5, 6
        forward_idx+=1
        
    while (backward_count<3):
        backward_frame = sorted_frames[idx-backward_idx]
#         print ('backward frame', backward_frame)
        available, fill_list = marker_available(marker_name, backward_frame)
        if available:
#             print ('backward frame', backward_frame)
            seq_travelled.append(backward_idx)
            interpolation_df.iloc[2-backward_count] = [-1*backward_idx]+fill_list #Fill in indices 0, 1, 2
            backward_count+=1
        backward_idx+=1
    interpolation_df.columns = ['time', 'x', 'y', 'conf']
    #Setting time 0 for current frame w.r.t. forward having positive time and backward frames having negative time
    interpolation_df.iloc[3].time = 0 #Row 3 is for current frame and column 0 is for time index 
#     print ('Interpolate DF: ', interpolation_df)
    return interpolation_df, seq_travelled

In [119]:
#For missing value treatment:
#1. Keep a threshold i.e. if more than 8 out of 12 coordinates are missing 
#(i.e. more than or equal to 75% of coordinates are missing), then ignore that frame 
#2. For a marker missing from the current frame, we are using interpolation with 5 nearest frames with the 
#marker non-missing in the forward and backward direction from the frames and keep time for these frames as part of 
#the interpolation process
def missing_value_treatment(sorted_frames, idx, marker_name):
    #Consecutive no. of frames travelled in the sequence to fill up the current frame
    interpolate_df, seq_travelled = create_missing_value_fillup_df(sorted_frames, idx, marker_name)
    #Fill the particular marker in current frame using interpolation wrt time 
    #Set the time column as index of this dataframe 
    interpolate_df['time']-=interpolate_df['time'][0]
    interpolate_df.set_index('time')
    #Quadratic will automatically use the index values as the corresponding time 
    #Hence this a index aware interpolation 
    interpolate_df.interpolate(method = 'quadratic', inplace = True)
    #Extract the row at the 3rd index filled in using interpolation 
    #and set up the missing value and save as new csv when all missing markers 
    #are done filling for the current frame 
    interpolate_df.reset_index()
    #                             print ('Filled interpolate DF:', interpolate_df)
    #Not retaining the time we had used in interpolation step
    return interpolate_df.iloc[3].values[1:], seq_travelled

## Front camera (lower body view): Left-right swap and missing value treatment
### Left-right swap if needed

In [4]:
#12 required coordinates for lower body indices + middle hip for swapping left/right if needed 
lower_body_indices = list(range(8, 15)) + list(range(19, 25))
print (len(lower_body_indices))
order = ['right hip', 'right knee', 'right ankle', 'left hip', 'left knee', 'left ankle', 'left toe 1', 'left toe 2', \
         'left heel', 'right toe 1', 'right toe 2', 'right heel']
order_swapped = ['left hip', 'left knee', 'left ankle', 'right hip', 'right knee', 'right ankle', 'right toe 1', 'right toe 2', \
         'right heel', 'left toe 1', 'left toe 2', 'left heel']

13


In [None]:
#Left-right swap over all subjects and trials for lower-body extracted coordinates 
left_right_count_df = pd.DataFrame(columns = ['cohort', 'trial', 'video', 'count_left_right_swaps', 'count_total_frames'])
global count 
for cohort in cohorts:
    for trial in trials:
        for camera in cameras[1:]: #Front look camera only
            frame_path = path+cohort+trial+camera #Path to save the frames to 
            if (os.path.exists(frame_path)):
                videos = os.listdir(frame_path)
#             print (len(videos))
                for video in videos:
                    if not os.path.exists(frame_path+video+'\\processed2d'):
                        os.makedirs(frame_path+video+'\\processed2d')
                    frames = glob.glob(frame_path+video+'\\*.jpg')
                    count = 0 #Count of left-right swapped frames 
                    total_frames = 0 #Count of total frames 
                    for frame in frames:
    #                     print (frame)
                        csv_path = frame_path+video+'\\processed2d\\'+frame.split('\\')[-1][:-3]+'csv'
                        if not os.path.exists(csv_path):
                            try:
                                total_frames+=1
                                frame_csv = pd.read_csv(frame[:-3]+'csv', index_col = None)
                                coords = frame_csv.iloc[lower_body_indices][['x', 'y', 'confidence']]
                                data_frame = left_right_swap(coords)
                                data_frame.to_csv(csv_path)
                            except Exception as e:
                                print (e)
                    left_right_count_df.loc[len(left_right_count_df)] = [cohort, trial, video, count, total_frames]
left_right_count_df.to_csv(path+'left_right_count_lower_body_df.csv')

### Missing value imputation 

In [120]:
#Missing value treatment over all subjects and trials for lower-body extracted coordinates 
stats_cols = ['cohort', 'trial', 'video', 'count_missing0', 'count_missing1', \
                         'count_missing2', 'count_missing3', 'count_missing4', 'count_missing5', \
                         'count_missing6', 'count_missing7', 'count_missing8', \
                         'count_missing_greater8', 'count_total_frames']

cols_seq_travelled_df = ['cohort', 'trial', 'video', 'frame', 'missed marker', 'forward1', 'forward2', 'forward3', \
                        'backward1', 'backward2', 'backward3']
missing_value_lower_body_stats = pd.DataFrame(columns = stats_cols)
#Use 3 forward and backward coordinates to fill up the current coordinate 
num_coords_each_side = 3 
#Dataframe for store stats of consecutive frames explored to get non-missing values for the marker of frame of video
seq_travelled_dataframe = pd.DataFrame(columns = cols_seq_travelled_df) 

In [None]:
for cohort in cohorts:
    for trial in trials:
        frame_path = path+cohort+trial+'\\lower_body\\'
        videos = os.listdir(frame_path)
        for video in videos:
            csv_path = frame_path+video+'\\processed2d'
            frames = glob.glob(csv_path+'\\*.csv')
            sorted_frames = sorted(frames,  key=lambda name: int(name.split('\\')[-1][:-4]))
            counts_missing_frames = [0]*10 #Count of missing 0 markers to missing > 8 markers 
            for idx, frame in enumerate(sorted_frames):
#                 print ('Current frame: ', frame)
                try:
                    frame_csv = pd.read_csv(frame, index_col = 0)
#                     print (frame_csv)
                    missing = frame_csv[(frame_csv.x==0) & (frame_csv.y==0)] #Missing rows/keypoints in the frame
#                     print (missing) 
                    len_missing = len(missing)
                    if (len_missing ==0): #No missing values, just update the stats 
                        counts_missing_frames[len_missing]+=1
                    #If more than or equal to 75% of coordinates are missing, remove the frame
                    elif (len_missing>8): 
                        print(frame, " removed (>8 missing coordinates)!")
                        os.remove(frame)
                        counts_missing_frames[9]+=1
                    else:
                        counts_missing_frames[len_missing]+=1
                        for i in range(len_missing):
                            marker = missing.iloc[i]
#                             print ('Missing marker name', marker.name)
                            frame_csv.loc[marker.name], seq_travelled = missing_value_treatment(sorted_frames, idx, marker.name)
                            #Fill in the seq_travelled_dataframe with video, frame, missing marker, travelled 
                            seq_travelled_dataframe.loc[len(seq_travelled_dataframe)]= [cohort, trial, video, frame.split('\\')[-1], \
                                                                                        marker.name] + seq_travelled   
                        frame_csv.to_csv(frame)
                except Exception as e:
                    print ('Exception!', e)
            #In each video for each cohort, trial, how many out of total frames with missing 
            #no marker, missing 1 marker, missing 2 markers, ..., missing more than 8 markers hence deleted
            missing_value_lower_body_stats.loc[len(missing_value_lower_body_stats)] = [cohort, trial, video] + \
            counts_missing_frames + [len(frames)]
        print (video, 'DONE!')
missing_value_lower_body_stats.to_csv(path+'missing_value_lower_body_stats.csv')  
seq_travelled_dataframe.to_csv(path+'missing_values_seq_travelled_dataframe_lower_body_stats.csv')  

C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\openpose_data\\HOA\beam_walking\lower_body\InkedGVS_212_T_T1_1_Trim\processed2d\1726.csv  removed (>8 missing coordinates)!
Exception! [Errno 2] File C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\openpose_data\\HOA\beam_walking\lower_body\InkedGVS_212_T_T1_1_Trim\processed2d\1726.csv does not exist: 'C:\\Users\\purpl\\Box\\Gait Video Project\\GaitVideoData\\video\\openpose_data\\\\HOA\\beam_walking\\lower_body\\InkedGVS_212_T_T1_1_Trim\\processed2d\\1726.csv'
C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\openpose_data\\HOA\beam_walking\lower_body\InkedGVS_212_T_T2_1_Trim\processed2d\79.csv  removed (>8 missing coordinates)!
Exception! [Errno 2] File C:\Users\purpl\Box\Gait Video Project\GaitVideoData\video\openpose_data\\HOA\beam_walking\lower_body\InkedGVS_212_T_T2_1_Trim\processed2d\79.csv does not exist: 'C:\\Users\\purpl\\Box\\Gait Video Project\\GaitVideoData\\video\\openpose_data\\\\HOA\\beam_walkin

In [None]:
#Check that there are no left-right swaps or missing values left now for lower body frames 
count_missing, count_swap = 0, 0
for cohort in cohorts:
    for trial in trials:
        frame_path = path+cohort+trial+'\\lower_body\\'
        videos = os.listdir(frame_path)
        for video in videos:
            csv_path = frame_path+video+'\\processed2d'
            frames = glob.glob(csv_path+'\\*.csv')
            for frame in enumerate(sorted_frames):
                try:
                    frame_csv = pd.read_csv(frame, index_col = 0)
                    missing = frame_csv[(frame_csv.x==0) & (frame_csv.y==0)] #Missing rows/keypoints in the frame
                    len_missing = len(missing)
                    if (len_missing !=0): #No missing values, just update the stats 
                        count_missing+=1
                    #If the left hip marker do not have x-coordinate values greater than right hip marker, a swap is needed
                    if (frame_csv.loc['left_hip'].x<frame_csv.loc['right_hip'].x)
                        count_swap+=1
print ('Missing values left in lower body are:', count_missing)
print ('Swaps needed left in lower body are:', count_swap)

### Statistics of left-right swaps 

In [None]:
left_right_swaps_stats_df = pd.read_csv(path+'left_right_count_lower_body_df.csv', index_col = 0)
left_right_swaps_stats_df['proportion_swaps (in %)'] = 100*left_right_swaps_stats_df['count_left_right_swaps']/left_right_swaps_stats_df['count_total_frames']
rename(left_right_swaps_stats_df) #Renaming the cohort and trial 
left_right_swaps_stats_df

In [None]:
#Boxplot
plt.figure(figsize = (25, 6))
ax = sns.boxplot(y = 'value', x = 'feature_name' , hue = 'Label', data=new_regressN_df[new_regressN_df['TrialID']==2], orient = 'v', 
            linewidth=2.5, palette="Set3", showfliers=False, width=0.8)

# ax = sns.swarmplot(y = 'value', x = 'feature_name' , hue = 'Label', data=new_regressN_df[new_regressN_df['TrialID']==2],
#                    dodge = True, palette="Set2")
handles, _ = ax.get_legend_handles_labels()
sns.despine(offset=0)
handles, _ = ax.get_legend_handles_labels()
ax.legend(handles, ['HOA', 'PwMS'], loc = 'best')
plt.ylabel(r"Normalized values")
plt.xlabel('')
plt.xticks(list(range(0, 18)), feature_display_names)
plt.title('Multiple regression-based normalized gait data')
plt.ylim([0,1])
# plt.savefig(path + '..//viz//viz_regressN_swarm_trialWT.png', dpi = 250) #Use Box plot here 
plt.savefig(path + '..//viz//viz_regressN_trialWT.png', dpi = 250) #Use Swarmplot here 
plt.show()

In [None]:
#Statistics of left right swaps for the paper
print ('Mean swaps')
display(left_right_swaps_stats_df.groupby(['cohort', 'trial']).mean())
print ('Standard deviation of swaps')
display(left_right_swaps_stats_df.groupby(['cohort', 'trial']).std())

### Statistics for missing value imputation for lower body OpenPose markers 

## Side camera (feet view)
### Retaining only coordinates with confidence score above a threshold (max. 8: 2 ankles, 4 toes and 2 heels)

In [None]:
#Extract only ankles, toes and heels 
#Retain only the markers with confidence score higher than a set threshold 
#Do not fill any missing values 
for cohort in cohorts:
    for trial in trials:
        for camera in cameras[1:]: #Front look camera only
            frame_path = path+cohort+trial+camera #Path to save the frames to 
            if (os.path.exists(frame_path)):
                videos = os.listdir(frame_path)
#             print (len(videos))
                for video in videos:
                    if not os.path.exists(frame_path+video+'\\processed2d'):
                        os.makedirs(frame_path+video+'\\processed2d')