## Imports


In [70]:
import pandas as pd
import re
import numpy as np
from itertools import combinations

In [71]:
df = pd.read_csv("./data/poseRAC_bite.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,779.677412,42.7196,0.954902,650.562967,733.638968,0.919949,785.448387,736.713292,...,0,0,0,0,0,0,0,0,0,0
1,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,754.513492,38.838625,0.952326,609.207305,727.149117,0.907592,745.753844,731.021858,...,0,0,0,0,0,0,0,0,0,0
2,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,766.38283,37.61867,0.948925,629.896682,716.013459,0.938015,773.370746,718.807315,...,0,0,0,0,0,0,0,0,0,0
3,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,cow_bite,145.902219,459.679419,0.005438,140.804692,463.26641,0.983381,201.574115,457.743308,...,0,0,0,0,0,0,0,0,0,0
4,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,cow_bite,144.096931,453.290973,0.013318,145.528933,461.034141,0.987257,214.992833,456.064653,...,0,0,0,0,0,0,0,0,0,0


## Drop Padding


In [72]:
df.drop([100], axis=0, inplace=True)
df = df.loc[:, (df != 0).any(axis=0)]

In [73]:
# Generate new column names
# The first two columns are file_path and class
new_columns = ["file_path", "class"]
num_landmarks = (df.shape[1] - 2) // 3

for i in range(num_landmarks):
    new_columns.append(f"landmark_{i}_x")
    new_columns.append(f"landmark_{i}_y")
    new_columns.append(f"landmark_{i}_confidence")

# Assign the new column names to the DataFrame
df.columns = new_columns

## Extract ground truth


In [78]:
def extract_salient(row):
    match = re.search(r"salient[12]", row[0])

    if match:
        salient_type = match.group()
    else:
        print("Salient type not found")

    if "salient1" == salient_type:
        return 0
    elif "salient2" == salient_type:
        return 1
    else:
        return -1


df["salient"] = df.apply(extract_salient, axis=1)
df["salient"] = df["salient"].astype(int)

df.head()

Unnamed: 0,file_path,class,landmark_0_x,landmark_0_y,landmark_0_confidence,landmark_1_x,landmark_1_y,landmark_1_confidence,landmark_2_x,landmark_2_y,...,landmark_24_x,landmark_24_y,landmark_24_confidence,landmark_25_x,landmark_25_y,landmark_25_confidence,landmark_26_x,landmark_26_y,landmark_26_confidence,salient
0,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,779.677412,42.7196,0.954902,650.562967,733.638968,0.919949,785.448387,736.713292,...,549.438713,611.779741,0.00037,954.417266,250.100252,0.000883,739.499299,598.034264,0.000778,0
1,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,754.513492,38.838625,0.952326,609.207305,727.149117,0.907592,745.753844,731.021858,...,1457.042363,610.26156,0.001303,939.091961,255.03548,0.001179,1465.45753,610.086516,0.009204,0
2,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,766.38283,37.61867,0.948925,629.896682,716.013459,0.938015,773.370746,718.807315,...,1460.022642,609.337063,0.000991,938.569259,247.348415,0.001267,1468.243225,610.377826,0.003773,0
3,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,cow_bite,145.902219,459.679419,0.005438,140.804692,463.26641,0.983381,201.574115,457.743308,...,262.694797,491.348537,0.000455,207.612561,90.797421,0.001956,212.359303,90.248343,0.000509,0
4,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,cow_bite,144.096931,453.290973,0.013318,145.528933,461.034141,0.987257,214.992833,456.064653,...,405.189564,131.034728,0.000275,207.184497,81.546941,0.00059,405.075175,129.550741,0.000282,0


## Extract distances between points


In [79]:
# Number of landmarks
num_landmarks = 27


# Function to calculate Euclidean distance
def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

In [91]:
# Generate all unique pairs of landmarks
landmark_pairs = list(combinations(range(num_landmarks), 2))

# Prepare the column names for the distances
distance_columns = [f"distance_{i}_{j}" for i, j in landmark_pairs]

# Create an empty list to store DataFrames
dataframes = []

# Loop through each row in the data
for index, row in df.iterrows():
    # Calculate distances between each pair of landmarks
    distances = [
        euclidean_distance(
            row[f"landmark_{i}_x"],
            row[f"landmark_{i}_y"],
            row[f"landmark_{j}_x"],
            row[f"landmark_{j}_y"],
        )
        for (i, j) in landmark_pairs
    ]

    # Create a new DataFrame for the current row
    row_df = pd.DataFrame(
        [[row["file_path"], row["class"], row["salient"]] + distances],
        columns=["file_path", "class", "salient"] + distance_columns,
    )

    # Append the row DataFrame to the list
    dataframes.append(row_df)

# Concatenate all DataFrames into a single DataFrame
distances_df = pd.concat(dataframes, ignore_index=True)

In [92]:
distances_df.head()

Unnamed: 0,file_path,class,salient,distance_0_1,distance_0_2,distance_0_3,distance_0_4,distance_0_5,distance_0_6,distance_0_7,...,distance_22_23,distance_22_24,distance_22_25,distance_22_26,distance_23_24,distance_23_25,distance_23_26,distance_24_25,distance_24_26,distance_25_26
0,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,0,702.879871,694.017687,739.260678,767.667587,758.316579,784.117093,556.262828,...,186.360298,191.396473,410.443325,2.555814,208.557694,348.877829,183.809198,542.973002,190.556984,408.959423
1,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,0,703.480789,692.238658,744.031563,769.038322,758.966183,785.447668,557.63173,...,742.77504,1212.829339,741.740264,1221.210226,627.484796,1.034843,634.337944,628.059064,8.416987,634.918853
2,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,cow_bite,0,691.988409,681.224486,736.063344,763.435068,751.385075,778.490787,547.178535,...,1166.816501,1208.655712,736.92012,1216.930259,211.853585,716.932338,213.913616,634.782964,8.286204,642.140844
3,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,cow_bite,0,6.23308,55.705552,16.508006,61.269006,121.797399,43.007229,54.330617,...,11.958054,377.517249,196.980536,192.521119,386.832605,205.789996,201.264271,404.320726,404.246246,4.778393
4,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,cow_bite,0,7.87447,70.950139,13.930017,47.488788,110.525708,32.020989,51.903215,...,222.579077,217.956981,14.010883,217.477084,5.203131,208.69558,6.36785,204.095683,1.488389,203.629775


## Extract ratios between distances


In [94]:
# Create an empty list to store the results
results = []

# Iterate over each row in the DataFrame
for index, row in distances_df.iterrows():
    # Extract file_path and salient
    file_path = row["file_path"]
    salient = row["salient"]

    # Create a dictionary to hold the ratios for this row
    ratios = {"file_path": file_path, "salient": salient}

    # Compute the ratios for each pair
    num_columns = [
        col for col in distances_df.columns if col.startswith("distance")]

    for i in range(len(num_columns)):
        for j in range(i + 1, len(num_columns)):
            col1 = num_columns[i]
            col2 = num_columns[j]

            # Calculate the ratio and handle division by zero
            try:
                ratio = row[col1] / row[col2]
            except ZeroDivisionError:
                ratio = 0

            # Store the ratio in the dictionary
            ratios[f"{col1}_to_{col2}"] = ratio

    # Append the results dictionary to the results list
    results.append(ratios)

# Convert the results list into a new DataFrame
ratios_df = pd.DataFrame(results)

In [95]:
ratios_df.to_csv("./data/ratios.csv", index=False)

In [96]:
ratios_df.head()

Unnamed: 0,file_path,salient,distance_0_1_to_distance_0_2,distance_0_1_to_distance_0_3,distance_0_1_to_distance_0_4,distance_0_1_to_distance_0_5,distance_0_1_to_distance_0_6,distance_0_1_to_distance_0_7,distance_0_1_to_distance_0_8,distance_0_1_to_distance_0_9,...,distance_23_25_to_distance_23_26,distance_23_25_to_distance_24_25,distance_23_25_to_distance_24_26,distance_23_25_to_distance_25_26,distance_23_26_to_distance_24_25,distance_23_26_to_distance_24_26,distance_23_26_to_distance_25_26,distance_24_25_to_distance_24_26,distance_24_25_to_distance_25_26,distance_24_26_to_distance_25_26
0,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,0,1.012769,0.950788,0.915604,0.926895,0.896397,1.263575,1.149097,2.566077,...,1.898043,0.642533,1.830832,0.853087,0.338524,0.964589,0.449456,2.8494,1.327694,0.465956
1,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,0,1.01624,0.945499,0.914754,0.926893,0.895643,1.261551,1.148008,2.485025,...,0.001631,0.001648,0.122947,0.00163,1.009997,75.364016,0.999085,74.618039,0.989196,0.013257
2,train/cow_bite/salient1/9c08225dd12cdfdfbdc000...,0,1.015801,0.940121,0.906414,0.92095,0.888885,1.264648,1.148001,2.460122,...,3.351504,1.129413,86.521207,1.116472,0.336987,25.815636,0.333126,76.607213,0.988542,0.012904
3,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,0,0.111893,0.377579,0.101733,0.051176,0.144931,0.114725,0.105052,0.014934,...,1.022486,0.508977,0.509071,43.06678,0.497784,0.497875,42.119657,1.000184,84.614374,84.598787
4,train/cow_bite/salient1/056837a2b71e7d93ad65e3...,0,0.110986,0.565288,0.165817,0.071246,0.245916,0.151715,0.150922,0.018821,...,32.773319,1.022538,140.215747,1.024878,0.0312,4.278351,0.031272,137.125226,1.002288,0.007309
