## Imports


In [1]:
import pandas as pd
import re
import numpy as np
from itertools import combinations
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

## Notes
Copy the extracted pose file to the data and input it here.

In [2]:
df = pd.read_csv("./pose_train.csv", header=None)

body_type_dict = {
    0: "head_top",
    1: "upper_jaw",
    2: "upper_left_jaw",
    3: "upper_right_jaw",
    4: "lower_jaw",
    5: "lower_left_jaw",
    6: "lower_right_jaw",
    7: "mouth_end_left",
    8: "mouth_end_right",
    9: "throat_base",
    10: "throat_end",
    11: "left_nostril",
    12: "right_nostril",
    13: "snout_top",
    14: "nose_bridge",
    15: "tongue",
    16: "left_eye",
    17: "right_eye",
    18: "left_ear_base",
    19: "left_ear_tip",
    20: "right_ear_base",
    21: "right_ear_tip",
    22: "left_horn_base",
    23: "right_horn_base",
    24: "neck_base",
    25: "neck_end",
    26: "withers",
    27: "tail_base",
    28: "tail_end",
    29: "top_left_knee",
    30: "top_left_hoof",
    31: "top_right_knee",
    32: "top_right_hoof",
    33: "bottom_left_knee",
    34: "bottom_left_hoof",
    35: "bottom_right_knee",
    36: "bottom_right_hoof"
}

df = pd.read_csv("./pose_train.csv", header=None)

# Rename columns
df = df.rename(columns={
    0: "file_path", 
    1: "class_type"
})

# Rename remaining columns using loop
for i, (key, value) in enumerate(body_type_dict.items()):
    df = df.rename(columns={
        2*i+2: f"{value}_x", 2*i+3: f"{value}_y"
    })

removed_landmarks = ["left_horn_base",
                     "right_horn_base",
                      "tail_base",
                      "tail_end",
                      "top_left_knee",
                      "top_left_hoof",
                      "top_right_knee",
                      "top_right_hoof",
                      "bottom_left_knee",
                      "bottom_left_hoof",
                      "bottom_right_knee",
                      "bottom_right_hoof"]

# Remove unwanted landmarks
df = df.drop(columns=[col for landmark in removed_landmarks 
                      for col in [f"{landmark}_x", f"{landmark}_y"]])

# The first two columns are file_path and class
new_columns = ["file_path", "class"]
num_landmarks = (df.shape[1] - 2) // 2

for i in range(num_landmarks):
    new_columns.append(f"landmark_{i}_x")
    new_columns.append(f"landmark_{i}_y")
    # new_columns.append(f"landmark_{i}_confidence")

# Assign the new column names to the DataFrame
df.columns = new_columns

df.head()

Unnamed: 0,file_path,class,landmark_0_x,landmark_0_y,landmark_1_x,landmark_1_y,landmark_2_x,landmark_2_y,landmark_3_x,landmark_3_y,...,landmark_20_x,landmark_20_y,landmark_21_x,landmark_21_y,landmark_22_x,landmark_22_y,landmark_23_x,landmark_23_y,landmark_24_x,landmark_24_y
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,296.3614,293.094178,397.665245,206.60334,377.376373,432.355077,305.081375,468.45143,...,199.306151,294.315023,292.633286,286.924213,115.693075,113.392481,53.178429,60.45972,279.581957,107.219965
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,459.499241,286.94516,388.054887,193.238409,315.705841,419.37539,245.853248,464.407538,...,190.420638,257.693499,264.025473,246.944134,109.557899,102.51406,43.726589,43.450309,226.496684,103.957907
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,289.48741,285.519009,396.84667,186.351404,313.766217,371.262126,244.913861,470.215439,...,226.628647,297.5705,302.248813,286.882875,144.955528,135.887252,69.888384,3.902289,284.363713,136.13728
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,288.618273,268.979498,382.164063,173.585077,302.824455,414.413653,229.864511,459.539536,...,192.475673,266.841775,268.900272,256.714435,107.872097,100.208299,38.425511,44.313932,227.684738,102.217632
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,313.312155,300.632514,407.919343,201.10665,327.683822,401.319738,254.663514,480.387762,...,200.583538,271.029247,278.318715,260.384214,117.659545,111.065335,50.618847,59.112247,261.044961,115.299045


In [3]:
ratio_df = pd.read_csv("./ratios.csv")
ratio_df.head()

Unnamed: 0,file_path,salient,distance_0_1_to_distance_0_2,distance_0_1_to_distance_0_3,distance_0_1_to_distance_0_4,distance_0_1_to_distance_0_5,distance_0_1_to_distance_0_6,distance_0_1_to_distance_0_7,distance_0_1_to_distance_0_8,distance_0_1_to_distance_0_9,...,distance_21_23_to_distance_21_24,distance_21_23_to_distance_22_23,distance_21_23_to_distance_22_24,distance_21_23_to_distance_23_24,distance_21_24_to_distance_22_23,distance_21_24_to_distance_22_24,distance_21_24_to_distance_23_24,distance_22_23_to_distance_22_24,distance_22_23_to_distance_23_24,distance_22_24_to_distance_23_24
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,0.826776,0.758674,0.682074,0.768227,2.050519,1.535174,1.283229,0.745853,...,1.829211,4.023505,2.009589,1.425642,2.199585,1.09861,0.779376,0.499462,0.354328,0.70942
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,0.602787,0.424272,0.397356,1.910233,0.519221,0.519906,1.011723,0.529132,...,2.028708,3.390883,2.564413,1.557727,1.67145,1.264062,0.767842,0.756267,0.459387,0.60744
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,1.640046,0.769222,0.884679,0.837385,2.09633,3.58433,1.321587,0.70035,...,2.412042,2.411467,2.62649,1.453204,0.999762,1.088908,0.602479,1.089167,0.602623,0.553287
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,0.914327,0.670006,0.638653,0.76279,1.75433,0.737882,1.44802,0.698797,...,1.960106,3.515815,2.615556,1.583582,1.793686,1.334395,0.807906,0.74394,0.450417,0.605447
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,1.350112,0.726233,0.886986,0.860619,1.907557,5.912182,1.402001,0.734557,...,2.079969,3.583141,2.118567,1.395346,1.722689,1.018557,0.670849,0.59126,0.38942,0.658627


In [4]:
# Merge DataFrames on 'id', keeping only matching rows
merged_df = pd.merge(df, ratio_df, on='file_path', how='inner')
merged_df.head()

Unnamed: 0,file_path,class,landmark_0_x,landmark_0_y,landmark_1_x,landmark_1_y,landmark_2_x,landmark_2_y,landmark_3_x,landmark_3_y,...,distance_21_23_to_distance_21_24,distance_21_23_to_distance_22_23,distance_21_23_to_distance_22_24,distance_21_23_to_distance_23_24,distance_21_24_to_distance_22_23,distance_21_24_to_distance_22_24,distance_21_24_to_distance_23_24,distance_22_23_to_distance_22_24,distance_22_23_to_distance_23_24,distance_22_24_to_distance_23_24
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,296.3614,293.094178,397.665245,206.60334,377.376373,432.355077,305.081375,468.45143,...,1.829211,4.023505,2.009589,1.425642,2.199585,1.09861,0.779376,0.499462,0.354328,0.70942
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,459.499241,286.94516,388.054887,193.238409,315.705841,419.37539,245.853248,464.407538,...,2.028708,3.390883,2.564413,1.557727,1.67145,1.264062,0.767842,0.756267,0.459387,0.60744
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,289.48741,285.519009,396.84667,186.351404,313.766217,371.262126,244.913861,470.215439,...,2.412042,2.411467,2.62649,1.453204,0.999762,1.088908,0.602479,1.089167,0.602623,0.553287
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,288.618273,268.979498,382.164063,173.585077,302.824455,414.413653,229.864511,459.539536,...,1.960106,3.515815,2.615556,1.583582,1.793686,1.334395,0.807906,0.74394,0.450417,0.605447
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,313.312155,300.632514,407.919343,201.10665,327.683822,401.319738,254.663514,480.387762,...,2.079969,3.583141,2.118567,1.395346,1.722689,1.018557,0.670849,0.59126,0.38942,0.658627


## Merge the angle data

In [5]:
# Custom scaling function to map [min, max] to [360, 0]
def custom_angle_scaling(data):
    scaled_data = (data) / 360
    return scaled_data

In [6]:
# Load the data
angles_df = pd.read_csv("./angles.csv")

# Drop non-scaling columns
X_angles = angles_df.drop(columns=['file_path', 'salient'])

# Apply the custom scaling to each column
scaled_values = X_angles.apply(custom_angle_scaling)

# Create a new DataFrame with the scaled values
scaled_angles_df = pd.DataFrame(scaled_values, columns=X_angles.columns)

# Add the original non-scaling columns back to the scaled DataFrame
scaled_angles_df['file_path'] = angles_df['file_path'].values
scaled_angles_df['salient'] = angles_df['salient'].values

# Display the first few rows of the scaled DataFrame
scaled_angles_df.head()


Unnamed: 0,angle_0_1_2,angle_0_1_3,angle_0_1_4,angle_0_1_5,angle_0_1_6,angle_0_1_7,angle_0_1_8,angle_0_1_9,angle_0_1_10,angle_0_1_11,...,angle_20_21_24,angle_20_22_23,angle_20_22_24,angle_20_23_24,angle_21_22_23,angle_21_22_24,angle_21_23_24,angle_22_23_24,file_path,salient
0,0.123263,0.083438,0.125147,0.203703,0.035166,0.0505,0.050439,0.085216,0.208719,0.083393,...,0.251039,0.430721,0.187092,0.128696,0.488368,0.129444,0.088148,0.079405,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0
1,0.152956,0.180543,0.226937,0.036238,0.255126,0.155853,0.22008,0.142044,0.037728,0.093243,...,0.232229,0.442839,0.17158,0.103563,0.496727,0.117692,0.06781,0.065502,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0
2,0.064103,0.053095,0.00788,0.181443,0.034247,0.044484,0.022303,0.128555,0.054623,0.010207,...,0.253551,0.492142,0.175269,0.084038,0.454053,0.121464,0.05265,0.079763,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0
3,0.072792,0.045555,0.178432,0.182603,0.029313,0.070093,0.008011,0.084257,0.389438,0.165819,...,0.229476,0.43263,0.172559,0.106374,0.485125,0.120065,0.071254,0.060604,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0
4,0.060306,0.041091,0.009453,0.167545,0.034506,0.014861,0.002763,0.044587,0.068245,0.024062,...,0.2528,0.431044,0.169186,0.110457,0.485747,0.114482,0.073679,0.0634,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0


In [7]:
# Merge DataFrames on 'id', keeping only matching rows
final_merged_df = pd.merge(merged_df, scaled_angles_df, on='file_path', how='inner')
final_merged_df.head()

Unnamed: 0,file_path,class,landmark_0_x,landmark_0_y,landmark_1_x,landmark_1_y,landmark_2_x,landmark_2_y,landmark_3_x,landmark_3_y,...,angle_20_21_23,angle_20_21_24,angle_20_22_23,angle_20_22_24,angle_20_23_24,angle_21_22_23,angle_21_22_24,angle_21_23_24,angle_22_23_24,salient_y
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,296.3614,293.094178,397.665245,206.60334,377.376373,432.355077,305.081375,468.45143,...,0.133141,0.251039,0.430721,0.187092,0.128696,0.488368,0.129444,0.088148,0.079405,0
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,459.499241,286.94516,388.054887,193.238409,315.705841,419.37539,245.853248,464.407538,...,0.141772,0.232229,0.442839,0.17158,0.103563,0.496727,0.117692,0.06781,0.065502,0
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,289.48741,285.519009,396.84667,186.351404,313.766217,371.262126,244.913861,470.215439,...,0.162929,0.253551,0.492142,0.175269,0.084038,0.454053,0.121464,0.05265,0.079763,0
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,288.618273,268.979498,382.164063,173.585077,302.824455,414.413653,229.864511,459.539536,...,0.139476,0.229476,0.43263,0.172559,0.106374,0.485125,0.120065,0.071254,0.060604,0
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,313.312155,300.632514,407.919343,201.10665,327.683822,401.319738,254.663514,480.387762,...,0.136867,0.2528,0.431044,0.169186,0.110457,0.485747,0.114482,0.073679,0.0634,0


In [8]:
final_merged_df.to_csv('ratio_angle_extracted.csv', index=False)

In [9]:
len(final_merged_df)

5525