# Imports

In [49]:
import glob
import os
import numpy as np
import pandas as pd
import cv2
from get_landmarks import get_landmarks
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from joblib import dump, load

from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer

import tensorflow as tf
from sklearn.model_selection import train_test_split

# Data Extraction

In [35]:
path = "clean_data/TEST_TRAIN/"
poses = [os.path.basename(d) for d in glob.glob("clean_data/TEST_TRAIN/*")]

all_imgs_path = []
for pose in poses:
    curr_path = path + pose + "/"
    all_imgs_path += (glob.glob(f"{curr_path}*"))

In [36]:
# # Do Not Run unless necessary!
# data = []
# for path in tqdm(all_imgs_path, desc="Processing Images..."):
#     landmarks = get_landmarks(cv2.imread(path))
#     landmarks.append(path)
#     data.append(landmarks)

# df = pd.DataFrame(data)

# df.to_csv("raw_kp_data.csv")

In [37]:
df = pd.read_csv("raw_kp_data.csv")
df = df.drop(df.columns[0], axis=1)
columns = []
for lmk in [f"lmk{x}" for x in range(33)]:
    for each in ["x", "y"]:
        columns.append(f"{lmk}_{each}")
columns.append("file_name")

df = df.rename(columns=dict(zip(df.columns, columns)))
df.head(3)

Unnamed: 0,lmk0_x,lmk0_y,lmk1_x,lmk1_y,lmk2_x,lmk2_y,lmk3_x,lmk3_y,lmk4_x,lmk4_y,...,lmk28_y,lmk29_x,lmk29_y,lmk30_x,lmk30_y,lmk31_x,lmk31_y,lmk32_x,lmk32_y,file_name
0,0.385088,0.702528,0.364045,0.705285,0.361666,0.700772,0.359247,0.696249,0.364545,0.705934,...,0.768468,0.898328,0.851075,0.870051,0.81165,0.781881,0.930616,0.763475,0.904605,clean_data/TEST_TRAIN/downdog/00000372.jpg
1,0.715758,0.547609,0.729912,0.527488,0.729571,0.523913,0.728997,0.520017,0.728891,0.527579,...,0.664694,0.307465,0.655862,0.261937,0.680302,0.380548,0.670513,0.336641,0.713165,clean_data/TEST_TRAIN/downdog/00000414.jpg
2,0.530292,0.608646,0.514618,0.623104,0.510664,0.621608,0.506621,0.619859,0.517501,0.626247,...,0.729783,0.889003,0.7819,0.853871,0.763474,0.806643,0.812826,0.792238,0.797374,clean_data/TEST_TRAIN/downdog/00000158.jpg


In [38]:
# Create df_X and df_y from df

df["pose"] = df["file_name"].apply(lambda x: x.split("/")[2])
df["pose"] = df["pose"].astype("category")
df_X = df.drop(columns=["pose", "file_name"])
df_y = df["pose"]

# Preprocessing Pipeline

1. Drop landmarks 1, 3, 4, 6
2. Scale

In [53]:
# lmk_to_drop = [1, 3, 4, 6]
# cols_to_drop = []
# for lmk in lmk_to_drop:
#     for each in ["x", "y"]:
#         cols_to_drop.append(f"lmk{str(lmk)}_{each}")

# x
cols_to_drop = ['lmk1_x','lmk1_y','lmk3_x','lmk3_y','lmk4_x','lmk4_y','lmk6_x','lmk6_y']
cols_to_scale = [col for col in df_X.columns if col not in cols_to_drop]

transformers = [
    ("drop", "drop", cols_to_drop),
    ("scale", StandardScaler(), cols_to_scale)
]

column_transformer = ColumnTransformer(transformers=transformers, remainder="passthrough")

pipeline = make_pipeline(
    column_transformer
)

X = pipeline.fit_transform(df_X)

# y
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df_y)
y_ohe = tf.keras.utils.to_categorical(y_encoded)


### Simple NN

In [40]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [46]:
X = pipeline.fit_transform(df_X)
y_cat = pd.array(df_y)
y_num = tf.keras.utils.to_categorical(y_cat)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

ValueError: invalid literal for int() with base 10: 'downdog'

In [45]:
y_train

['warrior2_left', 'downdog', 'downdog', 'downdog', 'downdog', ..., 'downdog', 'plank', 'warrior2_right', 'plank', 'goddess']
Length: 436
Categories (7, object): ['downdog', 'goddess', 'plank', 'tree_chest', 'tree_up', 'warrior2_left', 'warrior2_right']

In [42]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Input(shape=[X_train.shape[1]]))
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dense(32, activation="relu"))
model.add(tf.keras.layers.Dense(len(poses), activation="relu"))

model.compile(loss='categorical_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_17 (Dense)            (None, 128)               7552      
                                                                 
 dense_18 (Dense)            (None, 64)                8256      
                                                                 
 dense_19 (Dense)            (None, 32)                2080      
                                                                 
 dense_20 (Dense)            (None, 7)                 231       
                                                                 
Total params: 18119 (70.78 KB)
Trainable params: 18119 (70.78 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [43]:
y_train

['warrior2_left', 'downdog', 'downdog', 'downdog', 'downdog', ..., 'downdog', 'plank', 'warrior2_right', 'plank', 'goddess']
Length: 436
Categories (7, object): ['downdog', 'goddess', 'plank', 'tree_chest', 'tree_up', 'warrior2_left', 'warrior2_right']

In [44]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))



TypeError: Cannot convert the argument `type_value`: CategoricalDtype(categories=['downdog', 'goddess', 'plank', 'tree_chest', 'tree_up',
                  'warrior2_left', 'warrior2_right'],
, ordered=False, categories_dtype=object) to a TensorFlow DType.