# Imports

In [2]:
import glob
import os
import numpy as np
import pandas as pd
import cv2
from get_landmarks import get_landmarks
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler
from joblib import dump, load

from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer

2024-05-19 20:50:19.193878: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data Extraction

In [3]:
path = "clean_data/TEST_TRAIN/"
poses = [os.path.basename(d) for d in glob.glob("clean_data/TEST_TRAIN/*")]

all_imgs_path = []
for pose in poses:
    curr_path = path + pose + "/"
    all_imgs_path += (glob.glob(f"{curr_path}*"))

In [4]:
# # Do Not Run unless necessary!
# data = []
# for path in tqdm(all_imgs_path, desc="Processing Images..."):
#     landmarks = get_landmarks(cv2.imread(path))
#     landmarks.append(path)
#     data.append(landmarks)

# df = pd.DataFrame(data)

# df.to_csv("raw_kp_data.csv")

In [5]:
df = pd.read_csv("raw_kp_data.csv")
df = df.drop(df.columns[0], axis=1)
columns = []
for lmk in [f"lmk{x}" for x in range(33)]:
    for each in ["x", "y"]:
        columns.append(f"{lmk}_{each}")
columns.append("file_name")

df = df.rename(columns=dict(zip(df.columns, columns)))
df.head(3)

Unnamed: 0,lmk0_x,lmk0_y,lmk1_x,lmk1_y,lmk2_x,lmk2_y,lmk3_x,lmk3_y,lmk4_x,lmk4_y,...,lmk28_y,lmk29_x,lmk29_y,lmk30_x,lmk30_y,lmk31_x,lmk31_y,lmk32_x,lmk32_y,file_name
0,0.385088,0.702528,0.364045,0.705285,0.361666,0.700772,0.359247,0.696249,0.364545,0.705934,...,0.768468,0.898328,0.851075,0.870051,0.81165,0.781881,0.930616,0.763475,0.904605,clean_data/TEST_TRAIN/downdog/00000372.jpg
1,0.715758,0.547609,0.729912,0.527488,0.729571,0.523913,0.728997,0.520017,0.728891,0.527579,...,0.664694,0.307465,0.655862,0.261937,0.680302,0.380548,0.670513,0.336641,0.713165,clean_data/TEST_TRAIN/downdog/00000414.jpg
2,0.530292,0.608646,0.514618,0.623104,0.510664,0.621608,0.506621,0.619859,0.517501,0.626247,...,0.729783,0.889003,0.7819,0.853871,0.763474,0.806643,0.812826,0.792238,0.797374,clean_data/TEST_TRAIN/downdog/00000158.jpg


In [6]:
# Create df_X and df_y from df

df["pose"] = df["file_name"].apply(lambda x: x.split("/")[2])
df["pose"] = df["pose"].astype("category")
df_X = df.drop(columns=["pose", "file_name"])
df_y = df["pose"]

# Preprocessing Pipeline

1. Drop landmarks 1, 3, 4, 6
2. Scale

In [7]:
# lmk_to_drop = [1, 3, 4, 6]
# cols_to_drop = []
# for lmk in lmk_to_drop:
#     for each in ["x", "y"]:
#         cols_to_drop.append(f"lmk{str(lmk)}_{each}")

cols_to_drop = ['lmk1_x','lmk1_y','lmk3_x','lmk3_y','lmk4_x','lmk4_y','lmk6_x','lmk6_y']
cols_to_scale = [col for col in df_X.columns if col not in cols_to_drop]

transformers = [
    ("drop", "drop", cols_to_drop),
    ("scale", StandardScaler(), cols_to_scale)
]

column_transformer = ColumnTransformer(transformers=transformers, remainder="passthrough")

pipeline = make_pipeline(
    column_transformer
)

df_X_transformed = pipeline.fit_transform(df_X)