## Imports


In [1]:
import pandas as pd
import re
import numpy as np
from itertools import combinations

## Notes
Copy the extracted pose file to the data and input it here.

In [2]:
body_type_dict = {
    0: "head_top",
    1: "upper_jaw",
    2: "upper_left_jaw",
    3: "upper_right_jaw",
    4: "lower_jaw",
    5: "lower_left_jaw",
    6: "lower_right_jaw",
    7: "mouth_end_left",
    8: "mouth_end_right",
    9: "throat_base",
    10: "throat_end",
    11: "left_nostril",
    12: "right_nostril",
    13: "snout_top",
    14: "nose_bridge",
    15: "tongue",
    16: "left_eye",
    17: "right_eye",
    18: "left_ear_base",
    19: "left_ear_tip",
    20: "right_ear_base",
    21: "right_ear_tip",
    22: "left_horn_base",
    23: "right_horn_base",
    24: "neck_base",
    25: "neck_end",
    26: "withers",
    27: "tail_base",
    28: "tail_end",
    29: "top_left_knee",
    30: "top_left_hoof",
    31: "top_right_knee",
    32: "top_right_hoof",
    33: "bottom_left_knee",
    34: "bottom_left_hoof",
    35: "bottom_right_knee",
    36: "bottom_right_hoof"
}

In [3]:
df = pd.read_csv("./pose_train.csv", header=None)

# Rename columns
df = df.rename(columns={
    0: "file_path", 
    1: "class_type"
})

# Rename remaining columns using loop
for i, (key, value) in enumerate(body_type_dict.items()):
    df = df.rename(columns={
        2*i+2: f"{value}_x", 2*i+3: f"{value}_y"
    })
df.head()

Unnamed: 0,file_path,class_type,head_top_x,head_top_y,upper_jaw_x,upper_jaw_y,upper_left_jaw_x,upper_left_jaw_y,upper_right_jaw_x,upper_right_jaw_y,...,top_right_hoof_x,top_right_hoof_y,bottom_left_knee_x,bottom_left_knee_y,bottom_left_hoof_x,bottom_left_hoof_y,bottom_right_knee_x,bottom_right_knee_y,bottom_right_hoof_x,bottom_right_hoof_y
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,296.3614,293.094178,397.665245,206.60334,377.376373,432.355077,305.081375,468.45143,...,205.570562,342.267391,356.86294,358.999825,209.73066,333.227604,212.780845,341.559351,343.793752,122.97855
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,459.499241,286.94516,388.054887,193.238409,315.705841,419.37539,245.853248,464.407538,...,267.986913,348.514492,273.177822,221.686398,276.333295,348.945423,219.593627,216.220845,214.391406,266.704047
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,289.48741,285.519009,396.84667,186.351404,313.766217,371.262126,244.913861,470.215439,...,4.633939,352.081746,232.706826,234.065257,303.631607,348.217363,254.426393,347.173315,349.183821,343.030674
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,288.618273,268.979498,382.164063,173.585077,302.824455,414.413653,229.864511,459.539536,...,227.107254,352.654799,234.858961,231.754416,234.017605,347.96636,231.258714,346.965449,348.791818,231.301306
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,313.312155,300.632514,407.919343,201.10665,327.683822,401.319738,254.663514,480.387762,...,234.847301,354.14606,362.431497,362.52812,238.848604,350.278534,346.834737,348.730438,347.520903,344.869274


In [4]:
removed_landmarks = ["left_horn_base",
                     "right_horn_base",
                      "tail_base",
                      "tail_end",
                      "top_left_knee",
                      "top_left_hoof",
                      "top_right_knee",
                      "top_right_hoof",
                      "bottom_left_knee",
                      "bottom_left_hoof",
                      "bottom_right_knee",
                      "bottom_right_hoof"]

# Remove unwanted landmarks
df = df.drop(columns=[col for landmark in removed_landmarks 
                      for col in [f"{landmark}_x", f"{landmark}_y"]])

df.head()

Unnamed: 0,file_path,class_type,head_top_x,head_top_y,upper_jaw_x,upper_jaw_y,upper_left_jaw_x,upper_left_jaw_y,upper_right_jaw_x,upper_right_jaw_y,...,right_ear_base_x,right_ear_base_y,right_ear_tip_x,right_ear_tip_y,neck_base_x,neck_base_y,neck_end_x,neck_end_y,withers_x,withers_y
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,296.3614,293.094178,397.665245,206.60334,377.376373,432.355077,305.081375,468.45143,...,199.306151,294.315023,292.633286,286.924213,115.693075,113.392481,53.178429,60.45972,279.581957,107.219965
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,459.499241,286.94516,388.054887,193.238409,315.705841,419.37539,245.853248,464.407538,...,190.420638,257.693499,264.025473,246.944134,109.557899,102.51406,43.726589,43.450309,226.496684,103.957907
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,289.48741,285.519009,396.84667,186.351404,313.766217,371.262126,244.913861,470.215439,...,226.628647,297.5705,302.248813,286.882875,144.955528,135.887252,69.888384,3.902289,284.363713,136.13728
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,288.618273,268.979498,382.164063,173.585077,302.824455,414.413653,229.864511,459.539536,...,192.475673,266.841775,268.900272,256.714435,107.872097,100.208299,38.425511,44.313932,227.684738,102.217632
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,313.312155,300.632514,407.919343,201.10665,327.683822,401.319738,254.663514,480.387762,...,200.583538,271.029247,278.318715,260.384214,117.659545,111.065335,50.618847,59.112247,261.044961,115.299045


## Drop Padding


In [5]:
# df.drop([100], axis=0, inplace=True)
# df = df.loc[:, (df != 0).any(axis=0)]
# df.head()
# len(df)

In [6]:
# Generate new column names
# The first two columns are file_path and class
new_columns = ["file_path", "class"]
num_landmarks = (df.shape[1] - 2) // 2

for i in range(num_landmarks):
    new_columns.append(f"landmark_{i}_x")
    new_columns.append(f"landmark_{i}_y")
    # new_columns.append(f"landmark_{i}_confidence")

# Assign the new column names to the DataFrame
df.columns = new_columns

## Extract ground truth


In [7]:
def extract_salient(row):
    match = re.search(r"salient[12]", row[0])

    if match:
        salient_type = match.group()
    else:
        print("Salient type not found")

    if "salient1" == salient_type:
        return 0
    elif "salient2" == salient_type:
        return 1
    else:
        return -1


df["salient"] = df.apply(extract_salient, axis=1)
df["salient"] = df["salient"].astype(int)

df.head()

Unnamed: 0,file_path,class,landmark_0_x,landmark_0_y,landmark_1_x,landmark_1_y,landmark_2_x,landmark_2_y,landmark_3_x,landmark_3_y,...,landmark_20_y,landmark_21_x,landmark_21_y,landmark_22_x,landmark_22_y,landmark_23_x,landmark_23_y,landmark_24_x,landmark_24_y,salient
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,296.3614,293.094178,397.665245,206.60334,377.376373,432.355077,305.081375,468.45143,...,294.315023,292.633286,286.924213,115.693075,113.392481,53.178429,60.45972,279.581957,107.219965,0
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,459.499241,286.94516,388.054887,193.238409,315.705841,419.37539,245.853248,464.407538,...,257.693499,264.025473,246.944134,109.557899,102.51406,43.726589,43.450309,226.496684,103.957907,0
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,289.48741,285.519009,396.84667,186.351404,313.766217,371.262126,244.913861,470.215439,...,297.5705,302.248813,286.882875,144.955528,135.887252,69.888384,3.902289,284.363713,136.13728,0
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,288.618273,268.979498,382.164063,173.585077,302.824455,414.413653,229.864511,459.539536,...,266.841775,268.900272,256.714435,107.872097,100.208299,38.425511,44.313932,227.684738,102.217632,0
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,313.312155,300.632514,407.919343,201.10665,327.683822,401.319738,254.663514,480.387762,...,271.029247,278.318715,260.384214,117.659545,111.065335,50.618847,59.112247,261.044961,115.299045,0


## Extract distances between points


In [8]:
# Function to calculate Euclidean distance
def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

In [9]:
# Generate all unique pairs of landmarks
landmark_pairs = list(combinations(range(num_landmarks), 2))

# Prepare the column names for the distances
distance_columns = [f"distance_{i}_{j}" for i, j in landmark_pairs]

# Create an empty list to store DataFrames
dataframes = []

# Loop through each row in the data
for index, row in df.iterrows():
    # Calculate distances between each pair of landmarks
    distances = [
        euclidean_distance(
            row[f"landmark_{i}_x"],
            row[f"landmark_{i}_y"],
            row[f"landmark_{j}_x"],
            row[f"landmark_{j}_y"],
        )
        for (i, j) in landmark_pairs
    ]

    # Create a new DataFrame for the current row
    row_df = pd.DataFrame(
        [[row["file_path"], row["class"], row["salient"]] + distances],
        columns=["file_path", "class", "salient"] + distance_columns,
    )

    # Append the row DataFrame to the list
    dataframes.append(row_df)

# Concatenate all DataFrames into a single DataFrame
distances_df = pd.concat(dataframes, ignore_index=True)

In [10]:
distances_df.head()

Unnamed: 0,file_path,class,salient,distance_0_1,distance_0_2,distance_0_3,distance_0_4,distance_0_5,distance_0_6,distance_0_7,...,distance_20_21,distance_20_22,distance_20_23,distance_20_24,distance_21_22,distance_21_23,distance_21_24,distance_22_23,distance_22_24,distance_23_24
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,0,133.203356,161.111835,175.573926,195.291631,173.390706,64.960814,86.767606,...,93.619326,199.309089,275.75644,203.5897,247.832808,329.582759,180.177563,81.914334,164.005078,231.181915
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,0,117.835695,195.484802,277.736397,296.549747,61.686543,226.947001,226.647918,...,74.38562,174.984116,259.652246,157.911727,211.471694,299.902209,147.829196,88.4437,116.947697,192.525522
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,0,146.151376,89.11421,189.998875,165.202788,174.533132,69.717729,40.775088,...,76.371688,181.14075,332.87915,171.44685,218.038657,366.15486,151.80287,151.839081,139.40841,251.963806
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,0,133.607299,146.126347,199.411967,209.201845,175.156016,76.158603,181.068641,...,77.092687,186.880926,270.647544,168.347221,224.553432,313.420786,159.899914,89.145995,119.829488,197.918905
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,cow_bite,0,137.316851,101.707728,189.080967,154.812841,159.555847,71.98572,23.226087,...,78.460656,180.180026,259.611679,167.055319,219.334212,303.903989,146.109853,84.814967,143.447906,217.798315


## Extract ratios between distances


In [11]:
# Create an empty list to store the results
results = []

# Iterate over each row in the DataFrame
# for index, row in distances_df.iterrows():
for index in range(0, len(distances_df)):
    # Extract the row
    row = distances_df.iloc[index]
    
    # Extract file_path and salient
    file_path = row["file_path"]
    salient = row["salient"]

    # Create a dictionary to hold the ratios for this row
    ratios = {"file_path": file_path, "salient": salient}

    # Compute the ratios for each pair
    num_columns = [
        col for col in distances_df.columns if col.startswith("distance")]

    for i in range(len(num_columns)):
        for j in range(i + 1, len(num_columns)):
            col1 = num_columns[i]
            col2 = num_columns[j]

            # Calculate the ratio and handle division by zero
            try:
                ratio = row[col1] / row[col2]
            except ZeroDivisionError:
                ratio = 0

            # Store the ratio in the dictionary
            ratios[f"{col1}_to_{col2}"] = ratio

    # Append the results dictionary to the results list
    results.append(ratios)

# Convert the results list into a new DataFrame
ratios_df = pd.DataFrame(results)

In [12]:
ratios_df.to_csv("./ratios.csv", index=False)

In [15]:
ratios_df.head()

Unnamed: 0,file_path,salient,distance_0_1_to_distance_0_2,distance_0_1_to_distance_0_3,distance_0_1_to_distance_0_4,distance_0_1_to_distance_0_5,distance_0_1_to_distance_0_6,distance_0_1_to_distance_0_7,distance_0_1_to_distance_0_8,distance_0_1_to_distance_0_9,...,distance_21_23_to_distance_21_24,distance_21_23_to_distance_22_23,distance_21_23_to_distance_22_24,distance_21_23_to_distance_23_24,distance_21_24_to_distance_22_23,distance_21_24_to_distance_22_24,distance_21_24_to_distance_23_24,distance_22_23_to_distance_22_24,distance_22_23_to_distance_23_24,distance_22_24_to_distance_23_24
0,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,0.826776,0.758674,0.682074,0.768227,2.050519,1.535174,1.283229,0.745853,...,1.829211,4.023505,2.009589,1.425642,2.199585,1.09861,0.779376,0.499462,0.354328,0.70942
1,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,0.602787,0.424272,0.397356,1.910233,0.519221,0.519906,1.011723,0.529132,...,2.028708,3.390883,2.564413,1.557727,1.67145,1.264062,0.767842,0.756267,0.459387,0.60744
2,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,1.640046,0.769222,0.884679,0.837385,2.09633,3.58433,1.321587,0.70035,...,2.412042,2.411467,2.62649,1.453204,0.999762,1.088908,0.602479,1.089167,0.602623,0.553287
3,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,0.914327,0.670006,0.638653,0.76279,1.75433,0.737882,1.44802,0.698797,...,1.960106,3.515815,2.615556,1.583582,1.793686,1.334395,0.807906,0.74394,0.450417,0.605447
4,train/cow_bite/salient1/4ff8a74c52c529eceb817a...,0,1.350112,0.726233,0.886986,0.860619,1.907557,5.912182,1.402001,0.734557,...,2.079969,3.583141,2.118567,1.395346,1.722689,1.018557,0.670849,0.59126,0.38942,0.658627


In [16]:
len(ratios_df)

5525