# Imports

In [11]:
import glob
import os
import numpy as np
import pandas as pd
import cv2
from get_landmarks import get_landmarks
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler
from joblib import dump, load

from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.model_selection import cross_val_score

# Data Extraction

In [7]:
path = "clean_data/TEST_TRAIN/"
poses = [os.path.basename(d) for d in glob.glob("clean_data/TEST_TRAIN/*")]

all_imgs_path = []
for pose in poses:
    curr_path = path + pose + "/"
    all_imgs_path += (glob.glob(f"{curr_path}*"))

In [3]:
# Do Not Run unless necessary!
data = []
for path in tqdm(all_imgs_path, desc="Processing Images..."):
    landmarks = get_landmarks(cv2.imread(path))
    landmarks.append(path)
    data.append(landmarks)

df = pd.DataFrame(data)

df.to_csv("raw_kp_data.csv")

I0000 00:00:1715852214.299209       1 gl_context.cc:344] GL version: 2.1 (2.1 INTEL-22.1.29), renderer: Intel(R) Iris(TM) Plus Graphics 655
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Processing Images...:   0%|          | 1/546 [00:00<04:09,  2.19it/s]I0000 00:00:1715852214.637428       1 gl_context.cc:344] GL version: 2.1 (2.1 INTEL-22.1.29), renderer: Intel(R) Iris(TM) Plus Graphics 655
Processing Images...:   0%|          | 2/546 [00:00<03:26,  2.64it/s]I0000 00:00:1715852214.986221       1 gl_context.cc:344] GL version: 2.1 (2.1 INTEL-22.1.29), renderer: Intel(R) Iris(TM) Plus Graphics 655
Processing Images...:   1%|          | 3/546 [00:01<03:20,  2.71it/s]I0000 00:00:1715852215.320015       1 gl_context.cc:344] GL version: 2.1 (2.1 INTEL-22.1.29), renderer: Intel(R) Iris(TM) Plus Graphics 655
Processing Images...:   1%|          | 4/546 [00:01<03:57,  2.28it/s]I0000 00:00:1715852215.861392       1 gl_context.cc:344] GL version: 2.1 (2.1 INTEL-22.1.29), renderer: Inte

In [2]:
df = pd.read_csv("raw_kp_data.csv")
df = df.drop(df.columns[0], axis=1)
columns = []
for lmk in [f"lmk{x}" for x in range(33)]:
    for each in ["x", "y", "z", "vis"]:
        columns.append(f"{lmk}_{each}")
columns.append("file_name")

df = df.rename(columns=dict(zip(df.columns, columns)))
df.head(3)

Unnamed: 0,lmk0_x,lmk0_y,lmk0_z,lmk0_vis,lmk1_x,lmk1_y,lmk1_z,lmk1_vis,lmk2_x,lmk2_y,...,lmk30_vis,lmk31_x,lmk31_y,lmk31_z,lmk31_vis,lmk32_x,lmk32_y,lmk32_z,lmk32_vis,file_name
0,0.385088,0.702528,-0.004816,0.999651,0.364045,0.705285,-0.031445,0.999706,0.361666,0.700772,...,0.52577,0.781881,0.930616,-0.215838,0.980343,0.763475,0.904605,0.165073,0.610637,clean_data/TEST_TRAIN/downdog/00000372.jpg
1,0.715758,0.547609,0.009604,0.999703,0.729912,0.527488,0.021406,0.999708,0.729571,0.523913,...,0.998306,0.380548,0.670513,0.219,0.923418,0.336641,0.713165,-0.141662,0.998337,clean_data/TEST_TRAIN/downdog/00000414.jpg
2,0.530292,0.608646,-0.049306,0.999273,0.514618,0.623104,-0.070189,0.999622,0.510664,0.621608,...,0.586286,0.806643,0.812826,-0.256742,0.982185,0.792238,0.797374,0.089617,0.671426,clean_data/TEST_TRAIN/downdog/00000158.jpg


# Data Prep

### 1. Eliminate unnecessary landmarks
Unnecessary landmarks are:
- Outer and Inner eye (lmk 1, 3, 4, 6)

In [3]:
lmk_to_drop = [1, 3, 4, 6]
cols_to_drop = []
for lmk in lmk_to_drop:
    for each in ["x", "y", "z", "vis"]:
        cols_to_drop.append(f"lmk{str(lmk)}_{each}")

df = df.drop(columns=cols_to_drop)

df.head(3)

Unnamed: 0,lmk0_x,lmk0_y,lmk0_z,lmk0_vis,lmk2_x,lmk2_y,lmk2_z,lmk2_vis,lmk5_x,lmk5_y,...,lmk30_vis,lmk31_x,lmk31_y,lmk31_z,lmk31_vis,lmk32_x,lmk32_y,lmk32_z,lmk32_vis,file_name
0,0.385088,0.702528,-0.004816,0.999651,0.361666,0.700772,-0.031418,0.999722,0.362612,0.702185,...,0.52577,0.781881,0.930616,-0.215838,0.980343,0.763475,0.904605,0.165073,0.610637,clean_data/TEST_TRAIN/downdog/00000372.jpg
1,0.715758,0.547609,0.009604,0.999703,0.729571,0.523913,0.021355,0.999792,0.728052,0.524347,...,0.998306,0.380548,0.670513,0.219,0.923418,0.336641,0.713165,-0.141662,0.998337,clean_data/TEST_TRAIN/downdog/00000414.jpg
2,0.530292,0.608646,-0.049306,0.999273,0.510664,0.621608,-0.070218,0.999613,0.515844,0.627295,...,0.586286,0.806643,0.812826,-0.256742,0.982185,0.792238,0.797374,0.089617,0.671426,clean_data/TEST_TRAIN/downdog/00000158.jpg


### 2. Add pose names and drop file_name

In [4]:
df["pose"] = df["file_name"].apply(lambda x: x.split("/")[2])
df = df.drop(columns="file_name")

# Model Creation (KMeans)

In [5]:
df_X = df.drop(columns="pose")
df_y = df["pose"]

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_X)
dump(scaler, "scaler.joblib")

['scaler.joblib']

In [21]:
pose_estimator = make_pipeline(StandardScaler(), KMeans(n_clusters=len(poses)))


In [25]:
pose_estimator.fit(df_X)

  super()._check_params_vs_input(X, default_n_init=10)


In [27]:
dump(pose_estimator, "pose_estimator_kmeans.pkl")

['pose_estimator_kmeans.pkl']