# 3.0 Feature Engineering

## 3.1 Feature Creation

The `PoseDimensionCalculator` class operates through a series of algorithmic steps to calculate dimensions and angles based on pose data. Here's a technical breakdown of its key functions and the formulas used:

1. `enhance_pose_landmarks()`:  This method refines the pose data by creating additional landmark points to represent key body parts more accurately. It approximates the spine's location using the average coordinates of the torso's four landmarks (head, chest, stomach, and hips) and simplifies hand landmarks by averaging the coordinates of the pinky and index fingers to estimate the central position of the knuckles.
2. `calculate_pose_distance()` & `calculate_distances()`: This method calculates the physical distances between two joints. A smaller distance means a closer contact. elbow to knuckles.
3. `calculate_pose_angle()` & `calculate_connected_joint_range()`: These methods use `arctan2` to compute the angle between two vectors (connected joints based on human anatomy).

Concepts:
* Eucleandian Distance: https://science.howstuffworks.com/math-concepts/distance-formula.htm
* Atan2: https://en.wikipedia.org/wiki/Atan2
* Velocity: https://en.wikipedia.org/wiki/Velocity

In [3]:
import numpy as np
import pandas as pd
from scipy.stats import mode

class PoseDimensionCalculator:
    
    def __init__(self, data, include_z=False):
        self.data = data
        
        self.enhance_pose_landmarks = self.enhance_pose_landmarks()
        self.data = self.enhance_pose_landmarks

        if include_z:
            distances = self.calculate_distances_z()
            self.data = pd.concat([self.data, distances], axis=1)
            angles = self.calculate_connected_joint_range_z()
            self.data = pd.concat([self.data, angles], axis=1)
        else:
            distances = self.calculate_distances()
            self.data = pd.concat([self.data, distances], axis=1)
            angles = self.calculate_connected_joint_range()
            self.data = pd.concat([self.data, angles], axis=1)

    def enhance_pose_landmarks(self):
        enhanced_data = []
        for index, row in self.data.iterrows():
            enhanced_row = self.process_row(row)
            enhanced_row['index'] = index
            enhanced_data.append(enhanced_row)
        enhanced_df = pd.DataFrame(enhanced_data).set_index('index')
        merged = pd.concat([self.data, enhanced_df], axis=1)
        return merged

    def process_row(self, row):
        x_cols = sorted([col for col in self.data.columns if col.endswith('_x')])
        y_cols = sorted([col for col in self.data.columns if col.endswith('_y')])
        z_cols = sorted([col for col in self.data.columns if col.endswith('_z')])        
        x = row[x_cols].values
        y = row[y_cols].values
        z = row[z_cols].values

        enhanced_row = {
            'chest_x': x[11:13].mean(),
            'chest_y': y[11:13].mean(), 
            'chest_z': z[11:13].mean(),
            'stomach_x': (x[11:13].mean() + x[23:25].mean()) / 2,
            'stomach_y': (y[11:13].mean() + y[23:25].mean()) / 2,
            'stomach_z': (z[11:13].mean() + z[23:25].mean()) / 2,
            'hip_x': x[23:25].mean(),
            'hip_y': y[23:25].mean(),
            'hip_z': z[23:25].mean(),
        }

        return enhanced_row

    def calculate_pose_distance(self, Ax, Ay, Bx, By):
        distance = np.sqrt((self.data[Ax] - self.data[Bx])**2 + (self.data[Ay] - self.data[By])**2)
        return np.round(distance, 4)

    def calculate_pose_distance_z(self, x1, y1, z1, x2, y2, z2):
        distance = np.sqrt((self.data[x2] - self.data[x1]) ** 2 +
                       (self.data[y2] - self.data[y1]) ** 2 +
                       (self.data[z2] - self.data[z1]) ** 2)
        return np.round(distance, 4)
        
    def calculate_distances(self):
        dist = pd.DataFrame(index=self.data.index)
        dist['d_nose_to_rgt_knee'] = self.calculate_pose_distance('landmark_00_x', 'landmark_00_y', 'landmark_25_x', 'landmark_25_y')
        dist['d_nose_to_lft_knee'] = self.calculate_pose_distance('landmark_00_x', 'landmark_00_y', 'landmark_26_x', 'landmark_26_y')
        dist['d_rgt_shoulder_to_ankle'] = self.calculate_pose_distance('landmark_11_x', 'landmark_11_y', 'landmark_25_x', 'landmark_25_y')
        dist['d_lft_shoulder_to_ankle'] = self.calculate_pose_distance('landmark_12_x', 'landmark_12_y', 'landmark_26_x', 'landmark_26_y')
        dist['d_wrists'] = self.calculate_pose_distance('landmark_15_x', 'landmark_15_y', 'landmark_16_x', 'landmark_16_y')
        dist['d_elbows'] = self.calculate_pose_distance('landmark_13_x', 'landmark_13_y', 'landmark_14_x', 'landmark_14_y')
        dist['d_knees'] = self.calculate_pose_distance('landmark_25_x', 'landmark_25_y', 'landmark_26_x', 'landmark_26_y')
        dist['d_ankles'] = self.calculate_pose_distance('landmark_27_x', 'landmark_27_y', 'landmark_28_x', 'landmark_28_y')
        return dist

    def calculate_distances_z(self):
        dist = pd.DataFrame(index=self.data.index)
        dist['d_nose_to_rgt_knee'] = self.calculate_pose_distance_z('landmark_00_x', 'landmark_00_y','landmark_00_z','landmark_25_x', 'landmark_25_y','landmark_25_z')
        dist['d_nose_to_lft_knee'] = self.calculate_pose_distance_z('landmark_00_x', 'landmark_00_y','landmark_00_z','landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_rgt_shoulder_to_ankle'] = self.calculate_pose_distance_z('landmark_11_x', 'landmark_11_y','landmark_11_z', 'landmark_25_x', 'landmark_25_y','landmark_25_z')
        dist['d_lft_shoulder_to_ankle'] = self.calculate_pose_distance_z('landmark_12_x', 'landmark_12_y','landmark_12_z','landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_wrists'] = self.calculate_pose_distance_z('landmark_15_x', 'landmark_15_y','landmark_15_z', 'landmark_16_x', 'landmark_16_y','landmark_16_z')
        dist['d_elbows'] = self.calculate_pose_distance_z('landmark_13_x', 'landmark_13_y','landmark_13_z','landmark_14_x', 'landmark_14_y','landmark_14_z')
        dist['d_knees'] = self.calculate_pose_distance_z('landmark_25_x', 'landmark_25_y','landmark_25_z', 'landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_ankles'] = self.calculate_pose_distance_z('landmark_27_x', 'landmark_27_y','landmark_27_z','landmark_28_x', 'landmark_28_y','landmark_28_z')
        return dist
        
    def calculate_pose_angle(self, Ax, Ay, Bx, By, Cx, Cy):
        A = self.data[[Ax, Ay]].values
        B = self.data[[Bx, By]].values
        C = self.data[[Cx, Cy]].values
        BA = A - B
        BC = C - B
        angle_BA = np.arctan2(BA[:, 1], BA[:, 0])
        angle_BC = np.arctan2(BC[:, 1], BC[:, 0])
        angle_difference = np.degrees(angle_BC - angle_BA)    
        return np.round(angle_difference, 4)

    def calculate_pose_angle_z(self, Ax, Ay, Az, Bx, By, Bz, Cx, Cy, Cz):
        A = self.data[[Ax, Ay, Az]].values
        B = self.data[[Bx, By, Bz]].values
        C = self.data[[Cx, Cy, Cz]].values
        BA = A - B
        BC = C - B
        cosine_angle = (BA * BC).sum(axis=1) / (np.linalg.norm(BA, axis=1) * np.linalg.norm(BC, axis=1))
        angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))  # Clip to handle floating point errors
        angle_difference = np.degrees(angle)  # Convert to degrees
        return np.round(angle_difference, 4)    
        
    def calculate_connected_joint_range(self):
        angles = pd.DataFrame(index=self.data.index)
        angles['a_nose_to_rgt_shoulder'] = self.calculate_pose_angle('landmark_00_x', 'landmark_00_y', 'chest_x', 'chest_y','landmark_11_x','landmark_11_y')
        angles['a_nose_to_lft_shoulder'] = self.calculate_pose_angle('landmark_00_x', 'landmark_00_y', 'chest_x', 'chest_y','landmark_12_x','landmark_12_y')
        angles['a_mid_hip_to_knees'] = self.calculate_pose_angle('landmark_26_x','landmark_26_y','hip_x', 'hip_y','landmark_25_x','landmark_25_y')
        angles['a_lft_shoulder_to_wrist'] = self.calculate_pose_angle('landmark_12_x', 'landmark_12_y', 'landmark_14_x', 'landmark_14_y', 'landmark_16_x', 'landmark_16_y')
        angles['a_lft_hip_to_ankle'] = self.calculate_pose_angle('landmark_24_x', 'landmark_24_y', 'landmark_26_x', 'landmark_26_y','landmark_28_x', 'landmark_28_y')
        angles['a_rgt_shoulder_to_wrist'] = self.calculate_pose_angle('landmark_11_x', 'landmark_11_y', 'landmark_13_x', 'landmark_13_y', 'landmark_15_x', 'landmark_15_y')
        angles['a_rgt_hip_to_ankle'] = self.calculate_pose_angle('landmark_23_x', 'landmark_23_y', 'landmark_25_x', 'landmark_25_y','landmark_27_x', 'landmark_27_y')
        return angles

    def calculate_connected_joint_range_z(self):
        angles = pd.DataFrame(index=self.data.index)
        angles['a_nose_to_rgt_shoulder'] = self.calculate_pose_angle_z('landmark_00_x', 'landmark_00_y','landmark_00_z','chest_x','chest_y','chest_z','landmark_11_x','landmark_11_y','landmark_11_z')
        angles['a_nose_to_lft_shoulder'] = self.calculate_pose_angle_z('landmark_00_x', 'landmark_00_y','landmark_00_z','chest_x','chest_y','chest_z','landmark_12_x','landmark_12_y','landmark_12_z')
        angles['a_mid_hip_to_knees'] = self.calculate_pose_angle_z('landmark_26_x', 'landmark_26_y','landmark_26_z','hip_x','hip_y','hip_z','landmark_25_x','landmark_25_y','landmark_25_z')
        angles['a_lft_shoulder_to_wrist'] = self.calculate_pose_angle_z('landmark_12_x', 'landmark_12_y','landmark_12_z','landmark_14_x','landmark_14_y','landmark_14_z','landmark_16_x','landmark_16_y','landmark_16_z')        
        angles['a_lft_hip_to_ankle'] = self.calculate_pose_angle_z('landmark_24_x', 'landmark_24_y','landmark_24_z','landmark_26_x','landmark_26_y','landmark_26_z','landmark_28_x','landmark_28_y','landmark_28_z')
        angles['a_rgt_shoulder_to_wrist'] = self.calculate_pose_angle_z('landmark_11_x', 'landmark_11_y','landmark_11_z','landmark_13_x','landmark_13_y','landmark_13_z','landmark_15_x','landmark_15_y','landmark_15_z')        
        angles['a_rgt_hip_to_ankle'] = self.calculate_pose_angle_z('landmark_23_x', 'landmark_23_y','landmark_23_z','landmark_25_x','landmark_25_y','landmark_25_z','landmark_27_x','landmark_27_y','landmark_27_z')
        return angles

In [3]:
data = pd.read_csv("data/training-selected/all/pose_data_augmented.csv")
results = PoseDimensionCalculator(data, is_video=False,include_z=False)
results.data.to_csv("data/training-selected/all/pose_data_augmented_res_0301.csv", index=False)

In [4]:
z_results = PoseDimensionCalculator(data, is_video=False,include_z=True)
z_results.data.to_csv("data/training-selected/all/pose_data_augmented_z_res_0301.csv", index=False)
z_results.data

Unnamed: 0,image_filename,pose_name,difficulty,label,label_encoded,theta,landmark_00_x,landmark_00_y,landmark_00_z,landmark_01_x,...,d_elbows,d_knees,d_ankles,a_nose_to_rgt_shoulder,a_nose_to_lft_shoulder,a_mid_hip_to_knees,a_lft_shoulder_to_wrist,a_lft_hip_to_ankle,a_rgt_shoulder_to_wrist,a_rgt_hip_to_ankle
0,armpit-hold-seated-lft.png,armpit-hold-seated-lft,1,armpit-hold-seated,0,0,0.444102,0.627645,-0.457313,0.445873,...,0.4071,0.4453,0.2270,149.7455,30.2545,96.2236,137.7378,64.5253,125.8498,73.6329
1,armpit-hold-seated-lft.png,armpit-hold-seated-lft,1,armpit-hold-seated,0,10,0.516767,0.627645,-0.373248,0.510789,...,0.4071,0.4453,0.2270,149.7455,30.2545,96.2236,137.7378,64.5253,125.8498,73.6329
2,armpit-hold-seated-lft.png,armpit-hold-seated-lft,1,armpit-hold-seated,0,20,0.573730,0.627645,-0.277842,0.560185,...,0.4071,0.4453,0.2270,149.7455,30.2545,96.2236,137.7378,64.5253,125.8498,73.6329
3,armpit-hold-seated-lft.png,armpit-hold-seated-lft,1,armpit-hold-seated,0,30,0.613260,0.627645,-0.173994,0.592560,...,0.4071,0.4453,0.2270,149.7455,30.2545,96.2236,137.7378,64.5253,125.8498,73.6329
4,armpit-hold-seated-lft.png,armpit-hold-seated-lft,1,armpit-hold-seated,0,40,0.634157,0.627645,-0.064859,0.606931,...,0.4071,0.4453,0.2270,149.7455,30.2545,96.2236,137.7378,64.5253,125.8498,73.6329
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10219,strong-hold-side-split-rgt.png,strong-hold-side-split-rgt,3,strong-hold-side-split,140,310,0.113107,0.444561,-0.761474,0.140032,...,0.0949,0.4344,0.7478,47.5918,132.4082,139.5484,141.6383,156.0463,152.2347,155.7035
10220,strong-hold-side-split-rgt.png,strong-hold-side-split-rgt,3,strong-hold-side-split,140,320,0.243617,0.444561,-0.730265,0.269601,...,0.0949,0.4344,0.7478,47.5918,132.4082,139.5484,141.6383,156.0463,152.2347,155.7035
10221,strong-hold-side-split-rgt.png,strong-hold-side-split-rgt,3,strong-hold-side-split,140,330,0.366725,0.444561,-0.676867,0.390978,...,0.0949,0.4344,0.7478,47.5918,132.4082,139.5484,141.6383,156.0463,152.2347,155.7035
10222,strong-hold-side-split-rgt.png,strong-hold-side-split-rgt,3,strong-hold-side-split,140,340,0.478690,0.444561,-0.602902,0.500475,...,0.0949,0.4344,0.7478,47.5918,132.4082,139.5484,141.6383,156.0463,152.2347,155.7035


### Adding Grip Hold Feature

In [4]:
data2 = pd.read_csv("data/training-set-2/pose_data_aug2.csv")
data2_res = PoseDimensionCalculator(data2,include_z=True)

data2_res

<__main__.PoseDimensionCalculator at 0x1073d2050>

In [5]:
data2_res.data

Unnamed: 0,image_filename,label,label_encoded,theta,landmark_00_x,landmark_00_y,landmark_00_z,landmark_01_x,landmark_01_y,landmark_01_z,...,d_elbows,d_knees,d_ankles,a_nose_to_rgt_shoulder,a_nose_to_lft_shoulder,a_mid_hip_to_knees,a_lft_shoulder_to_wrist,a_lft_hip_to_ankle,a_rgt_shoulder_to_wrist,a_rgt_hip_to_ankle
0,ampit-thigh-hold-flying-ballerina.png,ampit-thigh-hold-flying-ballerina,0,0,0.281222,0.728519,-0.565210,0.284313,0.742852,-0.549514,...,0.4473,0.5227,0.9409,93.5802,86.4198,123.2982,130.7334,135.6154,138.7016,166.9441
1,ampit-thigh-hold-flying-ballerina.png,ampit-thigh-hold-flying-ballerina,0,10,0.375097,0.728519,-0.507790,0.375416,0.742852,-0.491795,...,0.4473,0.5227,0.9409,93.5802,86.4198,123.2982,130.7334,135.6154,138.7016,166.9441
2,ampit-thigh-hold-flying-ballerina.png,ampit-thigh-hold-flying-ballerina,0,20,0.457575,0.728519,-0.434940,0.455112,0.742852,-0.419133,...,0.4473,0.5227,0.9409,93.5802,86.4198,123.2982,130.7334,135.6154,138.7016,166.9441
3,ampit-thigh-hold-flying-ballerina.png,ampit-thigh-hold-flying-ballerina,0,30,0.526150,0.728519,-0.348876,0.520979,0.742852,-0.333736,...,0.4473,0.5227,0.9409,93.5802,86.4198,123.2982,130.7334,135.6154,138.7016,166.9441
4,ampit-thigh-hold-flying-ballerina.png,ampit-thigh-hold-flying-ballerina,0,40,0.578738,0.728519,-0.252210,0.571017,0.742852,-0.238199,...,0.4473,0.5227,0.9409,93.5802,86.4198,123.2982,130.7334,135.6154,138.7016,166.9441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20803,wrist-sit-straddle.png,wrist-sit-straddle,288,310,0.500793,0.574214,-0.000420,0.539367,0.582020,0.034969,...,0.3353,0.4141,0.7348,98.2733,81.7267,88.7285,157.1561,164.9391,175.6282,172.5503
20804,wrist-sit-straddle.png,wrist-sit-straddle,288,320,0.493258,0.574214,0.086549,0.525100,0.582020,0.128098,...,0.3353,0.4141,0.7348,98.2733,81.7267,88.7285,157.1561,164.9391,175.6282,172.5503
20805,wrist-sit-straddle.png,wrist-sit-straddle,288,330,0.470735,0.574214,0.170887,0.494879,0.582020,0.217334,...,0.3353,0.4141,0.7348,98.2733,81.7267,88.7285,157.1561,164.9391,175.6282,172.5503
20806,wrist-sit-straddle.png,wrist-sit-straddle,288,340,0.433909,0.574214,0.250033,0.449621,0.582020,0.299967,...,0.3353,0.4141,0.7348,98.2733,81.7267,88.7285,157.1561,164.9391,175.6282,172.5503


In [6]:
data2_res.data.to_csv("data/training-set-2/pose_data_aug_res.csv", index=False)