Run the following code block only if running on Google Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import Libraries
import os
import math
import pickle

import pandas as pd

from pathlib import Path
from itertools import combinations

# Sklearn
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_classification

from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate

### Herlper Variables

In [None]:
keypoints_of_interest = [
     "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist"
]

connections_of_interest = {
    "left_upper_arm" : ["left_shoulder", "left_elbow"],
    "left_lower_arm" : ["left_elbow", "left_wrist"],
    "right_upper_arm" : ["right_shoulder", "right_elbow"],
    "right_lower_arm" : ["right_elbow", "right_wrist"]
}

data_attr_of_interest = [
    kp + "_" + coord
    for kp in keypoints_of_interest
    for coord in ["x","y"]
]

connection_combinations = list(combinations(connections_of_interest.keys(), 2))

### Helper Functions

In [None]:
def dot(vA, vB):
    return vA[0]*vB[0]+vA[1]*vB[1]

def make_short_form(name):
  return "".join([word[0].upper() for word in name.split("_")])

def to_ij_vector(coordinates):
  return [(round(coordinates[1][0]-coordinates[0][0],4)), (round(coordinates[1][1]-coordinates[0][1],2))]

def ang(vA, vB):
    try:
      # Get dot prod
      dot_prod = dot(vA, vB)
      # Get magnitudes
      magA = dot(vA, vA)**0.5
      magB = dot(vB, vB)**0.5
      # Get cosine value
      cos_ = dot_prod/magA/magB
      # Get angle in radians and then convert to degrees
      angle = math.acos(dot_prod/magB/magA)
      # Basically doing angle <- angle mod 360
      ang_deg = math.degrees(angle)%360

      if ang_deg-180>=0:
          # As in if statement
          return 360 - ang_deg
      else:

          return ang_deg
    except Exception as e:
      print(e, ":", vA, vB)
      return 0


## Train the model

### Key variables for training the model

**** Specify root directory *****

In [None]:
root_dir = Path("/content/drive/MyDrive/gestures_dataset_new/keypoints")

In [None]:
# Key Variables

# Paths to the folder containing keypoints csvs

training_data_path = root_dir / "train"
testing_data_path = root_dir / "test"

# Where and what to save the model files as
save_model_as = root_dir / "feature_engineering_model.sklearn-1-2-2.pickle"


### Training Methods - data reading and feature geenrations

In [None]:
# Helper function for applying get_features method for a group of frames belonging to a single video file.
def remove_non_csv_files(directory_contents):
    for file in directory_contents:
        if file[-3:] != 'csv':
            print("Excluded file: ", file)
            directory_contents.remove(file)
    return directory_contents

def get_video_features(group):
  '''
  Arguments:
    groups  pandas.groups object

  Returns:
    pandas.series object
  '''
  group = group.sort_values(by="frame_sequence").drop(columns=["frame_sequence"])
  return get_features(group)

# Compile test/train data df
def compile_data_df(data_path):
  '''
  path  location to the folder that contains subfolders - hand_waving, pointing, other

  '''
  data_path = Path(data_path)
  data_df = pd.DataFrame()
  subfolders = [item for item in os.listdir(data_path) if os.path.isdir(data_path / item)]
  for subfolder in subfolders:
    files = os.listdir(data_path / subfolder)
    files = remove_non_csv_files(files)
    for file in files:
      df = pd.read_csv(data_path / subfolder / file)
      df["filename"] = file
      df["frame_sequence"] = range(1, len(df)+1)
      df["label"]=subfolder
      data_df = pd.concat([df, data_df])
  return data_df

# Function to spit out features given frames and keypoints
def get_features(df):
  data = df[data_attr_of_interest].dropna()

  for val in keypoints_of_interest:
    data[val] = list(zip(data[f"{val}_x"], data[f"{val}_y"]))

  for connection, (keypoint1, keypoint2) in connections_of_interest.items():
    data[connection] = list(zip(data[keypoint1], data[keypoint2]))

  # Converting to i j vectors
  for connection in connections_of_interest:
    data[connection] = data[connection].apply(to_ij_vector)

  # Connection vector angles
  rest_pos_vector = [0,-1] # downward, -j unit vector

  for connection in connections_of_interest:
    data[connection + "_angle"] = data[connection].apply(ang, vB=rest_pos_vector)

  # Feature - Ratio of angles lower angle:higher angle of anlge between left upper and lower arm and for right
  ratios = []

  for idx, row in data.iterrows():
    left_arm_angle = ang(row["left_upper_arm"], row["left_lower_arm"])
    right_arm_angle = ang(row["right_upper_arm"], row["right_lower_arm"])
    ratios.append(round(min([left_arm_angle, right_arm_angle])*100/max([left_arm_angle, right_arm_angle]),4))

  data["arm_angle_ratio"] = ratios

  # Ratio of y of wrists positions
  ratios = []

  for idx, row in data.iterrows():
    min_y = min([row["left_wrist_y"], row["right_wrist_y"]])
    max_y = max([row["left_wrist_y"], row["right_wrist_y"]])
    ratios.append(round(min_y*100/max_y,4))

  data["wrist_y_ratio"] = ratios

  # Difference of angle between each connection combination
  for connection1, connection2 in connection_combinations:
    data[f"{make_short_form(connection1)}_{make_short_form(connection2)}"] = (data[connection1 + "_angle"] - data[connection2 + "_angle"]).abs()

  # Ratio of y of wrist vs shoulder positions
  ratios = []

  for idx, row in data.iterrows():
    left = abs(row["left_wrist_y"] - row["left_shoulder_y"])
    right = abs(row["right_wrist_y"] - row["right_shoulder_y"])
    ratios.append(round(min([left,right])*100/max([left,right]),4))

  data["wrist_shoulder_y_ratio"] = ratios

  # Start summarizing
  starting_col_idx = data.columns.tolist().index("left_upper_arm_angle")

  features_video = {}

  for col in data.columns[starting_col_idx:]:
    features_video[f"{col}_min"] = data[col].min().tolist()
    features_video[f"{col}_max"] = data[col].max().tolist()
    features_video[f"{col}_mean"] = data[col].mean().tolist()
    features_video[f"{col}_median"] = data[col].median().tolist()
    features_video[f"{col}_var"] = data[col].var().tolist()

  return pd.Series(features_video).sort_index()

### Prepare training data

In [None]:
# Convert path strings to pathlib's Path object
training_data_path = Path(training_data_path)

# Read all the files and compiled them into single dataframe
training_data = compile_data_df(training_data_path)

# (Optional) Save the data as CSV
training_data.to_csv(root_dir / "training_data.csv")

# Extract labels from the training data
Y_train = training_data.groupby(["filename"])["label"].first().tolist()

# Get features and prepare training data
X_train = training_data.drop(columns=["label"])\
                        .groupby(["filename"])\
                        .apply(get_video_features)\
                        .to_numpy()

float division by zero : [0.0, 0.0] [0, -1]
float division by zero : [0.0, 0.0] [0, -1]
float division by zero : [0.0, 0.0] [0, -1]
float division by zero : [0.0, 0.0] [0, -1]
float division by zero : [0.0, 0.0] [0, -1]
float division by zero : [0.0, 373.21] [0.0, 0.0]
float division by zero : [0.0, 382.18] [0.0, 0.0]
math domain error : [0.0, 239.67] [0.0, 139.22]
float division by zero : [0.0, 382.02] [0.0, 0.0]
float division by zero : [0.0, 376.38] [0.0, 0.0]
float division by zero : [0.0, 377.59] [0.0, 0.0]


### Train the model pipeline

In [19]:
# Make Pipeline
model_pipeline = make_pipeline(StandardScaler(),
                    LinearSVC(random_state=0, tol=1e-5))

# Fit
model_pipeline = model_pipeline.fit(X_train, Y_train)

In [20]:
# Save model pipeline
with open(save_model_as, 'wb') as pickle_file:
  pickle.dump(model_pipeline, pickle_file)

## Test the model

### Key variables for testing the model

In [21]:
# Key Variables

# Paths to the folder containing keypoints csvs for testing
testing_data_path = root_dir / "test"

# Location of model
model_file_path = root_dir / "feature_engineering_model.sklearn-1-2-2.pickle"

### Prepare testing data

In [26]:
# Convert path strings to pathlib's Path object
testing_data_path = Path(testing_data_path)

# Read all the files and compiled them into single dataframe
testing_data = compile_data_df(testing_data_path)

# (Optional) Save the data as CSV
testing_data.to_csv(root_dir / "testing_data.csv")

# Extract true labels from the testing data
Y_test = testing_data.groupby(["filename"])["label"].first().tolist()

# Get features and prepare testing data
X_test = testing_data.drop(columns=["label"])\
                        .groupby(["filename"])\
                        .apply(get_video_features)\
                        .to_numpy()

In [28]:
# Load the saved model pipeline
with open(model_file_path, 'rb') as pickle_file:
  loaded_pipeline = pickle.load(pickle_file)

# Predict for the test set
Y_pred = loaded_pipeline.predict(X_test)

# Print accuracy score for the test set
print(accuracy_score(Y_test, Y_pred))

0.95
