In [43]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, MinMaxScaler

In [41]:
# Read CSV
df = pd.read_csv('pose_landmark_data_10.csv')

# Drop Duplicates
df = df.drop_duplicates()

# min pose count is 29 poses, sample 25 from each and use rest as validation set
# Try modeling only using sample_df (including CV)
sample_df = df.groupby("pose").apply(lambda x: x.sample(n=25)).reset_index(drop=True)

# unique poses are below. pose_to_chk variable for quick checking pose combos
# that may cause problems.
poses = ['downdog', 'tree', 'boat', 'akarna', 'warrior', 'heron', 'goddess',
         'plank', 'revolved_triangle', 'cobra']
pose_to_chk = ['downdog', 'tree', 'boat', 'akarna', 'warrior', 'heron', 'goddess',
         'plank', 'revolved_triangle', 'cobra']

# Split df into features X and target Y
X = sample_df[sample_df["pose"].isin(pose_to_chk)]
Y = sample_df[sample_df["pose"].isin(pose_to_chk)]["pose"]


In [65]:
vis = [col for col in df.columns if col.endswith("visibility")]
df[vis].describe().T.sort_values("mean")

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
kp16_visibility,2063.0,-0.248834,0.321366,-2.717658,-0.45261,-0.282819,0.021369,0.951673
kp17_visibility,2063.0,-0.20296,0.30025,-2.583905,-0.388143,-0.220009,0.041204,0.943207
kp13_visibility,2063.0,-0.177842,0.225876,-2.150213,-0.285123,-0.134646,-0.046371,1.084365
kp14_visibility,2063.0,-0.130392,0.202331,-1.781607,-0.225742,-0.123368,0.012158,0.988916
kp15_visibility,2063.0,-0.085777,0.249185,-1.534231,-0.262,-0.087455,0.123765,0.627423
kp19_visibility,2063.0,0.06476,0.333151,-1.720022,-0.140385,0.055958,0.253459,3.437553
kp18_visibility,2063.0,0.112669,0.290528,-1.517557,-0.062378,0.069952,0.274504,3.274215
kp7_visibility,2063.0,0.368746,0.162905,-0.078752,0.242501,0.358659,0.467716,0.874286
kp6_visibility,2063.0,0.376445,0.157318,-0.097741,0.257482,0.371905,0.467962,0.90403
kp8_visibility,2063.0,0.391015,0.146271,-0.028537,0.281765,0.389167,0.480555,0.915823


In [59]:
df.describe()

Unnamed: 0,kp0_x,kp0_y,kp0_z,kp0_visibility,kp0_presence,kp1_x,kp1_y,kp1_z,kp1_visibility,kp1_presence,...,kp31_x,kp31_y,kp31_z,kp31_visibility,kp31_presence,kp32_x,kp32_y,kp32_z,kp32_visibility,kp32_presence
count,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,...,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0,2063.0
mean,0.487628,0.488844,0.490086,0.491302,0.485313,0.484061,0.482818,0.493379,0.482448,0.490563,...,0.995547,0.994242,0.990691,0.988933,0.980581,0.980311,0.97715,0.976829,0.968449,0.969831
std,0.209563,0.213157,0.213223,0.213312,0.212803,0.212604,0.212426,0.210816,0.209634,0.204521,...,0.026546,0.037759,0.045455,0.0514,0.06886,0.077701,0.073351,0.083337,0.086742,0.091633
min,0.039818,0.05239,0.055694,0.059428,0.050929,0.052958,0.055691,0.083351,0.065093,0.050368,...,0.03247,0.033262,0.094135,0.06013,0.061404,0.034928,0.065289,0.023044,0.039771,0.029261
25%,0.322212,0.319562,0.319442,0.319359,0.317856,0.316977,0.316761,0.317925,0.312078,0.326659,...,0.997386,0.997077,0.994745,0.993432,0.988184,0.989935,0.984517,0.986667,0.975483,0.980684
50%,0.484349,0.490236,0.492034,0.494339,0.479747,0.475498,0.471877,0.50163,0.471776,0.491316,...,0.999204,0.999077,0.998269,0.998039,0.996153,0.996658,0.994817,0.995489,0.992642,0.993099
75%,0.657038,0.667129,0.669501,0.670293,0.663399,0.661501,0.661539,0.673612,0.66524,0.654865,...,0.999786,0.999746,0.999549,0.999511,0.998888,0.999014,0.998476,0.998588,0.997715,0.99754
max,0.979807,0.959236,0.956746,0.953781,0.956474,0.951914,0.946747,0.932183,0.916986,0.979454,...,1.0,1.0,0.999997,0.999999,0.999992,0.999986,0.999986,0.999971,0.999936,0.999946
