# 3.0 Feature Engineering

In [7]:
import numpy as np
import pandas as pd

## 3.1 Feature Creation

In [11]:
class PoseDimensionCalculator:
    
    def __init__(self, data, include_z=False):
        self.data = data
        
        self.enhance_pose_landmarks = self.enhance_pose_landmarks()
        self.data = self.enhance_pose_landmarks

        if include_z:
            distances = self.calculate_distances_z()
            self.data = pd.concat([self.data, distances], axis=1)
            angles = self.calculate_connected_joint_range_z()
            self.data = pd.concat([self.data, angles], axis=1)
        
        else:
            distances = self.calculate_distances()
            self.data = pd.concat([self.data, distances], axis=1)
            angles = self.calculate_connected_joint_range()
            self.data = pd.concat([self.data, angles], axis=1)

    def enhance_pose_landmarks(self):
        enhanced_data = []
        for index, row in self.data.iterrows():
            enhanced_row = self.process_row(row)
            enhanced_row['index'] = index
            enhanced_data.append(enhanced_row)
        enhanced_df = pd.DataFrame(enhanced_data).set_index('index')
        merged = pd.concat([self.data, enhanced_df], axis=1)
        return merged

    def process_row(self, row):
        x_cols = sorted([col for col in self.data.columns if col.endswith('_x')])
        y_cols = sorted([col for col in self.data.columns if col.endswith('_y')])
        z_cols = sorted([col for col in self.data.columns if col.endswith('_z')])        
        x = row[x_cols].values
        y = row[y_cols].values
        z = row[z_cols].values

        enhanced_row = {
            'head_x': x[0:11].mean(),
            'head_y': y[0:11].mean(), 
            'head_z': z[0:11].mean(),
            'chest_x': x[11:13].mean(),
            'chest_y': y[11:13].mean(), 
            'chest_z': z[11:13].mean(),
            'stomach_x': (x[11:13].mean() + x[23:25].mean()) / 2,
            'stomach_y': (y[11:13].mean() + y[23:25].mean()) / 2,
            'stomach_z': (z[11:13].mean() + z[23:25].mean()) / 2,
            'hip_x': x[23:25].mean(),
            'hip_y': y[23:25].mean(),
            'hip_z': z[23:25].mean(),
        }

        return enhanced_row

    def calculate_pose_distance(self, Ax, Ay, Bx, By):
        distance = np.sqrt((self.data[Ax] - self.data[Bx])**2 + (self.data[Ay] - self.data[By])**2)
        return np.round(distance, 4)

    def calculate_pose_distance_z(self, x1, y1, z1, x2, y2, z2):
        distance = np.sqrt((self.data[x2] - self.data[x1]) ** 2 +
                       (self.data[y2] - self.data[y1]) ** 2 +
                       (self.data[z2] - self.data[z1]) ** 2)
        return np.round(distance, 4)
        
    def calculate_distances(self):
        dist = pd.DataFrame(index=self.data.index)
        dist['d_nose_to_rgt_knee'] = self.calculate_pose_distance('landmark_00_x', 'landmark_00_y', 'landmark_25_x', 'landmark_25_y')
        dist['d_nose_to_lft_knee'] = self.calculate_pose_distance('landmark_00_x', 'landmark_00_y', 'landmark_26_x', 'landmark_26_y')
        dist['d_rgt_shoulder_to_ankle'] = self.calculate_pose_distance('landmark_11_x', 'landmark_11_y', 'landmark_25_x', 'landmark_25_y')
        dist['d_lft_shoulder_to_ankle'] = self.calculate_pose_distance('landmark_12_x', 'landmark_12_y', 'landmark_26_x', 'landmark_26_y')
        dist['d_rgt_shoulder_to_lft_ankle'] = self.calculate_pose_distance_z('landmark_11_x', 'landmark_11_y','landmark_26_x', 'landmark_26_y')
        dist['d_lft_shoulder_to_rgt_ankle'] = self.calculate_pose_distance_z('landmark_12_x', 'landmark_12_y','landmark_25_x', 'landmark_25_y')    
        dist['d_wrists'] = self.calculate_pose_distance('landmark_15_x', 'landmark_15_y', 'landmark_16_x', 'landmark_16_y')
        dist['d_elbows'] = self.calculate_pose_distance('landmark_13_x', 'landmark_13_y', 'landmark_14_x', 'landmark_14_y')
        dist['d_knees'] = self.calculate_pose_distance('landmark_25_x', 'landmark_25_y', 'landmark_26_x', 'landmark_26_y')
        dist['d_ankles'] = self.calculate_pose_distance('landmark_27_x', 'landmark_27_y', 'landmark_28_x', 'landmark_28_y')
        return dist

    def calculate_distances_z(self):
        dist = pd.DataFrame(index=self.data.index)
        dist['d_nose_to_rgt_knee'] = self.calculate_pose_distance_z('landmark_00_x', 'landmark_00_y','landmark_00_z','landmark_25_x', 'landmark_25_y','landmark_25_z')
        dist['d_nose_to_lft_knee'] = self.calculate_pose_distance_z('landmark_00_x', 'landmark_00_y','landmark_00_z','landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_rgt_shoulder_to_ankle'] = self.calculate_pose_distance_z('landmark_11_x', 'landmark_11_y','landmark_11_z', 'landmark_25_x', 'landmark_25_y','landmark_25_z')
        dist['d_lft_shoulder_to_ankle'] = self.calculate_pose_distance_z('landmark_12_x', 'landmark_12_y','landmark_12_z','landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_rgt_shoulder_to_lft_ankle'] = self.calculate_pose_distance_z('landmark_11_x', 'landmark_11_y','landmark_11_z', 'landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_lft_shoulder_to_rgt_ankle'] = self.calculate_pose_distance_z('landmark_12_x', 'landmark_12_y','landmark_12_z','landmark_25_x', 'landmark_25_y','landmark_25_z')
        dist['d_wrists'] = self.calculate_pose_distance_z('landmark_15_x', 'landmark_15_y','landmark_15_z', 'landmark_16_x', 'landmark_16_y','landmark_16_z')
        dist['d_elbows'] = self.calculate_pose_distance_z('landmark_13_x', 'landmark_13_y','landmark_13_z','landmark_14_x', 'landmark_14_y','landmark_14_z')
        dist['d_knees'] = self.calculate_pose_distance_z('landmark_25_x', 'landmark_25_y','landmark_25_z', 'landmark_26_x', 'landmark_26_y','landmark_26_z')
        dist['d_ankles'] = self.calculate_pose_distance_z('landmark_27_x', 'landmark_27_y','landmark_27_z','landmark_28_x', 'landmark_28_y','landmark_28_z')
        return dist
        
    def calculate_pose_angle(self, Ax, Ay, Bx, By, Cx, Cy):
        A = self.data[[Ax, Ay]].values
        B = self.data[[Bx, By]].values
        C = self.data[[Cx, Cy]].values
        BA = A - B
        BC = C - B
        angle_BA = np.arctan2(BA[:, 1], BA[:, 0])
        angle_BC = np.arctan2(BC[:, 1], BC[:, 0])
        angle_difference = np.degrees(angle_BC - angle_BA)    
        return np.round(angle_difference, 4)

    def calculate_pose_angle_z(self, Ax, Ay, Az, Bx, By, Bz, Cx, Cy, Cz):
        A = self.data[[Ax, Ay, Az]].values
        B = self.data[[Bx, By, Bz]].values
        C = self.data[[Cx, Cy, Cz]].values
        BA = A - B
        BC = C - B
        cosine_angle = (BA * BC).sum(axis=1) / (np.linalg.norm(BA, axis=1) * np.linalg.norm(BC, axis=1))
        angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))  # Clip to handle floating point errors
        angle_difference = np.degrees(angle)  # Convert to degrees
        return np.round(angle_difference, 4)    
        
    def calculate_connected_joint_range(self):
        angles = pd.DataFrame(index=self.data.index)
        angles['a_nose_to_rgt_shoulder'] = self.calculate_pose_angle('landmark_00_x', 'landmark_00_y', 'chest_x', 'chest_y','landmark_11_x','landmark_11_y')
        angles['a_nose_to_lft_shoulder'] = self.calculate_pose_angle('landmark_00_x', 'landmark_00_y', 'chest_x', 'chest_y','landmark_12_x','landmark_12_y')
        angles['a_mid_hip_to_knees'] = self.calculate_pose_angle('landmark_26_x','landmark_26_y','hip_x', 'hip_y','landmark_25_x','landmark_25_y')
        angles['a_lft_shoulder_to_wrist'] = self.calculate_pose_angle('landmark_12_x', 'landmark_12_y', 'landmark_14_x', 'landmark_14_y', 'landmark_16_x', 'landmark_16_y')
        angles['a_lft_hip_to_ankle'] = self.calculate_pose_angle('landmark_24_x', 'landmark_24_y', 'landmark_26_x', 'landmark_26_y','landmark_28_x', 'landmark_28_y')
        angles['a_rgt_shoulder_to_wrist'] = self.calculate_pose_angle('landmark_11_x', 'landmark_11_y', 'landmark_13_x', 'landmark_13_y', 'landmark_15_x', 'landmark_15_y')
        angles['a_rgt_hip_to_ankle'] = self.calculate_pose_angle('landmark_23_x', 'landmark_23_y', 'landmark_25_x', 'landmark_25_y','landmark_27_x', 'landmark_27_y')
        return angles

    def calculate_connected_joint_range_z(self):
        angles = pd.DataFrame(index=self.data.index)
        angles['a_nose_to_rgt_shoulder'] = self.calculate_pose_angle_z('landmark_00_x', 'landmark_00_y','landmark_00_z','chest_x','chest_y','chest_z','landmark_11_x','landmark_11_y','landmark_11_z')
        angles['a_nose_to_lft_shoulder'] = self.calculate_pose_angle_z('landmark_00_x', 'landmark_00_y','landmark_00_z','chest_x','chest_y','chest_z','landmark_12_x','landmark_12_y','landmark_12_z')
        angles['a_mid_hip_to_knees'] = self.calculate_pose_angle_z('landmark_26_x', 'landmark_26_y','landmark_26_z','hip_x','hip_y','hip_z','landmark_25_x','landmark_25_y','landmark_25_z')
        angles['a_lft_shoulder_to_wrist'] = self.calculate_pose_angle_z('landmark_12_x', 'landmark_12_y','landmark_12_z','landmark_14_x','landmark_14_y','landmark_14_z','landmark_16_x','landmark_16_y','landmark_16_z')        
        angles['a_lft_hip_to_ankle'] = self.calculate_pose_angle_z('landmark_24_x', 'landmark_24_y','landmark_24_z','landmark_26_x','landmark_26_y','landmark_26_z','landmark_28_x','landmark_28_y','landmark_28_z')
        angles['a_rgt_shoulder_to_wrist'] = self.calculate_pose_angle_z('landmark_11_x', 'landmark_11_y','landmark_11_z','landmark_13_x','landmark_13_y','landmark_13_z','landmark_15_x','landmark_15_y','landmark_15_z')        
        angles['a_rgt_hip_to_ankle'] = self.calculate_pose_angle_z('landmark_23_x', 'landmark_23_y','landmark_23_z','landmark_25_x','landmark_25_y','landmark_25_z','landmark_27_x','landmark_27_y','landmark_27_z')
        return angles

### Feature Creations for Training Data

In [12]:
data = pd.read_csv("data/training-set-2/pose_data_aug.csv")
results = PoseDimensionCalculator(data,include_z=True)
results.data

Unnamed: 0,image_filename,label,label_encoded,theta,landmark_00_x,landmark_00_y,landmark_00_z,landmark_01_x,landmark_01_y,landmark_01_z,...,d_elbows,d_knees,d_ankles,a_nose_to_rgt_shoulder,a_nose_to_lft_shoulder,a_mid_hip_to_knees,a_lft_shoulder_to_wrist,a_lft_hip_to_ankle,a_rgt_shoulder_to_wrist,a_rgt_hip_to_ankle
0,advanced-figurehead-lft.png,advanced-figurehead,0,0,0.545408,0.740922,-0.407526,0.529543,0.728610,-0.377108,...,0.7512,0.7316,0.7640,121.4334,58.5666,115.3067,70.4993,92.0094,86.8382,110.0583
1,advanced-figurehead-lft.png,advanced-figurehead,0,10,0.607888,0.740922,-0.306625,0.586982,0.728610,-0.279425,...,0.7512,0.7316,0.7640,121.4334,58.5666,115.3067,70.4993,92.0094,86.8382,110.0583
2,advanced-figurehead-lft.png,advanced-figurehead,0,20,0.651898,0.740922,-0.196408,0.626586,0.728610,-0.173252,...,0.7512,0.7316,0.7640,121.4334,58.5666,115.3067,70.4993,92.0094,86.8382,110.0583
3,advanced-figurehead-lft.png,advanced-figurehead,0,30,0.676100,0.740922,-0.080224,0.647152,0.728610,-0.061814,...,0.7512,0.7316,0.7640,121.4334,58.5666,115.3067,70.4993,92.0094,86.8382,110.0583
4,advanced-figurehead-lft.png,advanced-figurehead,0,40,0.679759,0.740922,0.038399,0.648054,0.728610,0.051502,...,0.7512,0.7316,0.7640,121.4334,58.5666,115.3067,70.4993,92.0094,86.8382,110.0583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19435,yogini-twisted-rgt.png,yogini-twisted,269,310,0.221597,0.731075,-0.655868,0.241815,0.751349,-0.633527,...,0.6759,0.5520,0.6271,116.5380,63.4620,154.1874,159.0989,59.3940,98.2172,120.2231
19436,yogini-twisted-rgt.png,yogini-twisted,269,320,0.332121,0.731075,-0.607424,0.348152,0.751349,-0.581912,...,0.6759,0.5520,0.6271,116.5380,63.4620,154.1874,159.0989,59.3940,98.2172,120.2231
19437,yogini-twisted-rgt.png,yogini-twisted,269,330,0.432553,0.731075,-0.540524,0.443911,0.751349,-0.512615,...,0.6759,0.5520,0.6271,116.5380,63.4620,154.1874,159.0989,59.3940,98.2172,120.2231
19438,yogini-twisted-rgt.png,yogini-twisted,269,340,0.519843,0.731075,-0.457200,0.526182,0.751349,-0.427743,...,0.6759,0.5520,0.6271,116.5380,63.4620,154.1874,159.0989,59.3940,98.2172,120.2231


In [13]:
results.data.to_csv("data/training-set-2/pose_data_res.csv", index=False)

### Feature Creations for Test Dataset

In [17]:
processed_dir = "data/testing-set-1/processed/superman/01"
processed_data = pd.read_csv(f"{processed_dir}/pose_data_raw.csv")
y_cols = [col for col in processed_data.columns if col.endswith('_y')]
processed_data[y_cols] = 1 - processed_data[y_cols]
new_data = PoseDimensionCalculator(processed_data,include_z=True)
new_data.data.to_csv(f"{processed_dir}/pose_data_res.csv", index=False)

new_data.data

Unnamed: 0,image_filename,secs,frame_no,landmark_00_x,landmark_00_y,landmark_00_z,landmark_00_v,landmark_01_x,landmark_01_y,landmark_01_z,...,d_elbows,d_knees,d_ankles,a_nose_to_rgt_shoulder,a_nose_to_lft_shoulder,a_mid_hip_to_knees,a_lft_shoulder_to_wrist,a_lft_hip_to_ankle,a_rgt_shoulder_to_wrist,a_rgt_hip_to_ankle
0,0000_00000000.png,0,0,0.638420,0.734680,-0.424831,0.999928,0.621984,0.738979,-0.424845,...,0.6659,0.4564,0.4924,104.4263,75.5737,97.6937,172.8833,84.2704,176.4427,117.8962
1,0000_00000001.png,0,1,0.644855,0.728846,-0.461262,0.999973,0.627963,0.734147,-0.463602,...,0.6319,0.3289,0.3685,107.4298,72.5702,79.2258,169.2717,79.9242,177.7642,94.0627
2,0000_00000002.png,0,2,0.665368,0.725991,-0.380435,0.999849,0.648089,0.729822,-0.386160,...,0.5996,0.3641,0.4090,87.1111,92.8889,86.2722,164.4517,82.9472,177.0112,103.7441
3,0000_00000003.png,0,3,0.687776,0.720446,-0.375738,0.999761,0.671197,0.724123,-0.383116,...,0.5704,0.4095,0.4581,85.2699,94.7301,90.8037,153.6533,77.4091,174.0972,104.4340
4,0000_00000004.png,0,4,0.694269,0.713737,-0.352446,0.999782,0.677614,0.720128,-0.361756,...,0.4835,0.3554,0.4159,80.4585,99.5415,82.1124,145.7452,81.8003,176.6910,103.5850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,0008_00000242.png,8,242,0.410895,0.615877,0.528229,0.999717,0.416067,0.626220,0.525009,...,0.4294,0.1970,0.3045,133.4372,46.5628,58.6142,165.8553,97.9145,121.0093,162.9633
240,0008_00000243.png,8,243,0.411530,0.613102,0.547794,0.999738,0.416648,0.623092,0.545720,...,0.4089,0.1944,0.3089,132.9080,47.0920,55.6752,165.3011,95.9861,119.7830,147.8539
241,0008_00000244.png,8,244,0.411585,0.613037,0.545562,0.999721,0.416594,0.622974,0.542923,...,0.4200,0.2140,0.3196,133.8837,46.1163,60.3653,165.8483,94.1177,119.4091,148.8346
242,0008_00000245.png,8,245,0.411701,0.614247,0.535866,0.999740,0.416712,0.624212,0.532747,...,0.4341,0.2108,0.3113,134.5585,45.4415,58.6527,165.6747,96.4831,120.2931,148.6020


In [18]:
processed_dir = "data/testing-set-1/processed/superman/02"
processed_data = pd.read_csv(f"{processed_dir}/pose_data_raw.csv")
y_cols = [col for col in processed_data.columns if col.endswith('_y')]
processed_data[y_cols] = 1 - processed_data[y_cols]
new_data = PoseDimensionCalculator(processed_data,include_z=True)
new_data.data.to_csv(f"{processed_dir}/pose_data_res.csv", index=False)

new_data.data

Unnamed: 0,image_filename,secs,frame_no,landmark_00_x,landmark_00_y,landmark_00_z,landmark_00_v,landmark_01_x,landmark_01_y,landmark_01_z,...,d_elbows,d_knees,d_ankles,a_nose_to_rgt_shoulder,a_nose_to_lft_shoulder,a_mid_hip_to_knees,a_lft_shoulder_to_wrist,a_lft_hip_to_ankle,a_rgt_shoulder_to_wrist,a_rgt_hip_to_ankle
0,0000_00000000.png,0,0,0.475145,0.764218,-0.079987,0.998759,0.480707,0.774117,-0.078263,...,0.3374,0.2171,0.0286,26.7294,153.2706,72.0340,119.6812,111.8551,86.8953,32.2881
1,0000_00000001.png,0,1,0.508250,0.765454,-0.116973,0.995476,0.502904,0.776606,-0.098355,...,0.4298,0.5564,0.4977,126.4108,53.5892,102.6451,68.6437,15.5216,94.0190,168.3062
2,0000_00000002.png,0,2,0.496257,0.762546,-0.053800,0.999991,0.487797,0.776374,-0.061634,...,0.4013,0.1842,0.1476,46.7051,133.2949,20.1867,80.8088,152.3527,121.5457,168.8597
3,0000_00000003.png,0,3,0.484905,0.769863,-0.068224,0.999977,0.480994,0.782861,-0.069158,...,0.3703,0.1712,0.1662,72.3298,107.6702,153.2216,77.0458,113.4226,115.6599,162.1418
4,0000_00000004.png,0,4,0.470059,0.768726,-0.029582,0.999996,0.465691,0.780312,-0.036883,...,0.2747,0.1030,0.1276,61.2371,118.7629,51.3501,110.3500,129.7610,118.5712,145.5930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,0004_00000128.png,4,128,0.478122,0.708115,-0.013726,0.994500,0.477595,0.720649,-0.014682,...,0.4492,0.2501,0.2856,47.5254,132.4746,76.6283,155.6854,157.3913,118.7962,163.5467
128,0004_00000129.png,4,129,0.431541,0.696888,0.350146,0.999484,0.436503,0.706023,0.340600,...,0.2633,0.3444,0.4712,146.3132,33.6868,89.7442,160.7453,115.6012,152.4926,164.9369
129,0004_00000130.png,4,130,0.434451,0.695300,0.448258,0.999890,0.441470,0.704763,0.438071,...,0.3353,0.2069,0.2747,150.2329,29.7671,36.1835,160.4160,165.9696,144.9393,162.6421
130,0004_00000131.png,4,131,0.423974,0.701657,0.523716,0.999709,0.432328,0.708904,0.516283,...,0.3634,0.2739,0.3586,151.0330,28.9670,45.5874,168.5390,173.1769,147.9039,172.5813


In [19]:
processed_dir = "data/testing-set-1/processed/superman/03"
processed_data = pd.read_csv(f"{processed_dir}/pose_data_raw.csv")
y_cols = [col for col in processed_data.columns if col.endswith('_y')]
processed_data[y_cols] = 1 - processed_data[y_cols]
new_data = PoseDimensionCalculator(processed_data,include_z=True)
new_data.data.to_csv(f"{processed_dir}/pose_data_res.csv", index=False)

new_data.data

Unnamed: 0,image_filename,secs,frame_no,landmark_00_x,landmark_00_y,landmark_00_z,landmark_00_v,landmark_01_x,landmark_01_y,landmark_01_z,...,d_elbows,d_knees,d_ankles,a_nose_to_rgt_shoulder,a_nose_to_lft_shoulder,a_mid_hip_to_knees,a_lft_shoulder_to_wrist,a_lft_hip_to_ankle,a_rgt_shoulder_to_wrist,a_rgt_hip_to_ankle
0,0000_00000000.png,0,0,0.247717,0.637562,0.249196,0.999862,0.240640,0.639388,0.217578,...,0.3216,0.1582,0.1796,133.6437,46.3563,59.9477,166.8286,168.5079,159.2376,174.8704
1,0000_00000001.png,0,1,0.249903,0.636986,0.245263,0.999823,0.242908,0.639332,0.213786,...,0.3140,0.1236,0.1258,131.1890,48.8110,48.7445,167.0453,171.7515,162.1030,179.2551
2,0000_00000002.png,0,2,0.257620,0.636147,0.288349,0.999755,0.250132,0.638366,0.260201,...,0.2759,0.1117,0.1087,124.6960,55.3040,42.8087,172.5040,168.3794,152.5596,175.1746
3,0000_00000003.png,0,3,0.266167,0.635083,0.218747,0.999899,0.258386,0.637035,0.190017,...,0.2537,0.0174,0.0866,97.4152,82.5848,1.3484,172.6551,171.4304,159.4143,178.5165
4,0000_00000004.png,0,4,0.270522,0.631543,0.291127,0.999596,0.262854,0.633832,0.264116,...,0.2455,0.0509,0.0636,110.3440,69.6560,21.0910,172.9931,171.9542,145.4566,177.4777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,0003_00000091.png,3,91,0.312931,0.670317,-0.514466,0.999999,0.321442,0.675156,-0.534721,...,0.6855,0.3584,0.4197,36.4244,143.5756,87.0698,128.2350,176.1878,171.4285,161.2880
90,0003_00000092.png,3,92,0.298422,0.665564,-0.443733,0.999998,0.306669,0.670630,-0.466838,...,0.6837,0.3895,0.4629,46.1862,133.8138,94.9398,119.6995,175.3259,170.1615,159.5762
91,0003_00000093.png,3,93,0.287410,0.662181,-0.366333,0.999956,0.295265,0.668362,-0.391926,...,0.6296,0.3644,0.4344,62.5864,117.4136,93.5851,111.8605,172.0761,168.8213,156.3796
92,0003_00000094.png,3,94,0.276481,0.661364,-0.241962,0.999810,0.283784,0.667786,-0.272138,...,0.5772,0.3638,0.4164,105.7088,74.2912,95.6611,106.0024,177.0435,166.0090,157.2787
