In [2]:
!pip install pandas

Collecting pandas
  Obtaining dependency information for pandas from https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl.metadata
  Downloading pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl.metadata (18 kB)
Downloading pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl (11.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pandas
Successfully installed pandas-2.0.3


In [2]:
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from sklearn.metrics import f1_score

In [3]:
# seeds for split
SEED = 42

# test_creators
test_creators = ['Anna-Veronika', 'Blush-with-me-Parmita', 'Doctora-Claudia-Garcia',
                 'Gesund-durchs-Leben', 'Goldbeauty', 'Muscle-Watching-L', 'Natty',
                 'Pilates-by-Lisa', 'Shanthi-Kasiraj', 'Siddhi-yoga-hindi',
                 'templo-del-masaje']

# Load Data
df = pd.read_csv('/Users/kelianneheinz/MIDS/courses/W210_Capstone/model_data/m1_landmark_summary_blendshape_results_face_angled_augmented.csv')
df.loc[df.Pose == 9, 'Pose'] = 6
df = df[((df.face_angled != 1) | (df.Pose == 3))]
df.head()

Unnamed: 0,Filename,blendshapes,landmarks,Pose,Face Obstructed,Hands on Face,Notes,poor_quality,face_angled,Creator,Gender,Apparent_Race,head_angle,brow_arch,brow_raise_ratio,lid_brow_ratio
0,aug-Pose_2_Face-Yoga-Method_416.jpg,"[7.1687679792376e-07, 0.0015940895536914468, 0...","[(0.5354241728782654, 0.3942164480686188, -0.0...",2,0,0,,0,0,Face-Yoga-Method,F,Asian,0.005279,0.016992,0.624641,0.600919
1,zoom-Pose_2_Face-Yoga-Method_416.jpg,"[9.191711001221847e-07, 0.0002983348094858229,...","[(0.46912822127342224, 0.2676655948162079, -0....",2,0,0,,0,0,Face-Yoga-Method,F,Asian,0.009748,0.016792,0.58073,0.72197
2,shear-Pose_2_Face-Yoga-Method_416.jpg,"[4.570634928313666e-07, 0.0008711821283213794,...","[(0.4749401807785034, 0.40972456336021423, -0....",2,0,0,,0,0,Face-Yoga-Method,F,Asian,0.003687,0.010946,0.595802,0.67841
3,zoom-Pose_2_Face-Yoga-Method_417.jpg,"[1.2624046803466626e-06, 0.0002172658132622018...","[(0.4661442041397095, 0.2637966573238373, -0.0...",2,0,0,,0,0,Face-Yoga-Method,F,Asian,0.009163,0.016876,0.583923,0.712553
4,flip-Pose_2_Face-Yoga-Method_415.jpg,"[5.646483600685315e-07, 0.0006009486387483776,...","[(0.5249334573745728, 0.3435852825641632, -0.0...",2,0,0,,0,0,Face-Yoga-Method,F,Asian,0.011169,0.010762,0.598729,0.670204


In [4]:
def get_features(df):
    '''Returns features as a dataframe'''
    features = df.blendshapes.str.replace('\[', '', regex=True)\
        .str.replace('\]', '', regex=True).str.replace("'", '', regex=True)\
            .str.split(', ', expand=True).astype(float)
    feature_names = {
    0:'browDownLeft', 1:'browDownRight', 2:'browInnerUp', 3:'browOuterUpLeft',
    4:'browOuterUpRight', 5:'cheekPuff', 6:'cheekSquintLeft', 7:'cheekSquintRight',
    8:'eyeBlinkLeft', 9:'eyeBlinkRight', 10:'eyeLookDownLeft', 11:'eyeLookDownRight',
    12:'eyeLookInLeft', 13:'eyeLookInRight', 14:'eyeLookOutLeft', 15:'eyeLookOutRight',
    16:'eyeLookUpLeft', 17:'eyeLookUpRight', 18:'eyeSquintLeft', 19:'eyeSquintRight',
    20:'eyeWideLeft', 21:'eyeWideRight', 22:'jawForward', 23:'jawLeft',
    24:'jawOpen', 25:'jawRight', 26:'mouthClose', 27:'mouthDimpleLeft',
    28:'mouthDimpleRight', 29:'mouthFrownLeft', 30:'mouthFrownRight',
    31:'mouthFunnel', 32:'mouthLeft', 33:'mouthLowerDownLeft',
    34:'mouthLowerDownRight', 35:'mouthPressLeft', 36:'mouthPressRight',
    37:'mouthPucker', 38:'mouthRight', 39:'mouthRollLower', 40:'mouthRollUpper',
    41:'mouthShrugLower', 42:'mouthShrugUpper', 43:'mouthSmileLeft',
    44:'mouthSmileRight', 45:'mouthStretchLeft', 46:'mouthStretchRight',
    47:'mouthUpperUpLeft', 48:'mouthUpperUpRight', 49:'noseSneerLeft',
    50:'noseSneerRight', 51:'tongueOut'
    }
    features.rename(columns=feature_names, inplace=True)
    return features

In [11]:
save_dir = '/Users/kelianneheinz/MIDS/courses/W210_Capstone/github/website/server/models/'

# Pose 1

In [15]:
# Set up Data
pose_1_press = ['HisDream', 'Face-Yoga-Expert']
df_pose = df[~((df.Creator.isin(pose_1_press)) & (df.Pose == 1))]
key_pose = 1
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)
features['lid_brow_ratio'] = df['lid_brow_ratio']
features['brow_arch'] = df['brow_arch']
features['brow_raise_ratio'] = df['brow_raise_ratio']

test_creators_p1 = test_creators + ['LibertadDigital']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p1)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p1)]
X_test = features[df_pose.Creator.isin(test_creators_p1)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p1)]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'max_depth': 16, 'max_features': 'sqrt',
          'min_samples_split': 8, 'n_estimators': 50}
model1 = ExtraTreesClassifier(random_state=SEED, **params)
model1.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model1.predict(X_test)
print(f1_score(y_test, y_pred))

0.8951747088186356


In [30]:
# Save model as pickle file
pickle.dump(model1, open(save_dir+'pose_1_model.pkl', 'wb'))

In [None]:
pickle.dump(scaler, open(save_dir+'p1_scaler.pkl', 'wb'))

# Pose 2

In [5]:
# Set up Data
df_pose = df.copy()
key_pose = 2
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)

test_creators_p2 = test_creators + ['Accents-Way-English']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p2)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p2)]
X_test = features[df_pose.Creator.isin(test_creators_p2)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p2)]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'max_depth': 35, 'min_samples_leaf': 5,
          'min_samples_split': 2, 'n_estimators': 500}
model2 = ExtraTreesClassifier(random_state=SEED, **params)
model2.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model2.predict(X_test)
print(f1_score(y_test, y_pred))

0.7767441860465117


In [33]:
# Save model as pickle file
pickle.dump(model2, open(save_dir+'pose_2_model.pkl', 'wb'))

# Pose 3

In [13]:
# Set up Data
df_pose = df.copy()
key_pose = 3
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)
features['head_angle'] = df['head_angle']

test_creators_p3 = test_creators + ['Sientete-Bien']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p3)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p3)]
X_test = features[df_pose.Creator.isin(test_creators_p3)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p3)]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'colsample_bytree': 0.6, 'gamma': 2.5,
          'max_depth': 6, 'min_child_weight': 5,
          'subsample': 1.0}
model3 = xgb.XGBClassifier(seed=SEED, **params)
model3.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model3.predict(X_test)
print(f1_score(y_test, y_pred))

0.9562764456981664


In [47]:
# Save as pickle file
pickle.dump(model3, open(save_dir+'pose_3_model.pkl', 'wb'))

In [14]:
pickle.dump(scaler, open(save_dir + 'p3_scaler.pkl', 'wb'))

# Pose 4

In [7]:
# Set up Data
df_pose = df.copy()
key_pose = 4
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)

test_creators_p4 = test_creators + ['Masumi-Channel']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p4)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p4)]
X_test = features[df_pose.Creator.isin(test_creators_p4)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p4)]

# scaler = StandardScaler()
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'max_depth': 5, 'min_samples_leaf': 3,
          'min_samples_split': 2, 'n_estimators': 400}
model4 = ExtraTreesClassifier(random_state=SEED, **params)
model4.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model4.predict(X_test)
print(f1_score(y_test, y_pred))

0.9735182849936949


In [35]:
# Save model as pickle file
pickle.dump(model4, open(save_dir+'pose_4_model.pkl', 'wb'))

# Pose 5

In [8]:
# Set up Data
df_pose = df.copy()
key_pose = 5
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)

test_creators_p5 = test_creators + ['HisDream']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p5)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p5)]
X_test = features[df_pose.Creator.isin(test_creators_p5)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p5)]

# scaler = StandardScaler()
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'colsample_bytree': 1.0, 'gamma': 2.5,
          'max_depth': 10, 'min_child_weight': 5,
          'subsample': 1.0}
model5 = xgb.XGBClassifier(random_state=SEED, **params)
model5.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model5.predict(X_test)
print(f1_score(y_test, y_pred))

0.986093552465234


In [43]:
# Save to Pickle File
pickle.dump(model5, open(save_dir+'pose_5_model.pkl', 'wb'))

# Pose 6

In [9]:
# Set up Data
df_pose = df.copy()
key_pose = 6
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)

test_creators_p6 = test_creators + ['Funfitt-with-Susana-Yabar', 'Daniela-Suarez']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p6)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p6)]
X_test = features[df_pose.Creator.isin(test_creators_p6)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p6)]

# scaler = StandardScaler()
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'max_depth': 35, 'min_samples_leaf': 5,
          'min_samples_split': 2, 'n_estimators': 300}
model6 = ExtraTreesClassifier(random_state=SEED, **params)
model6.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model6.predict(X_test)
print(f1_score(y_test, y_pred))

0.8929824561403509


In [12]:
# Save standard scaler
pickle.dump(scaler, open(save_dir+'scaler.pkl', 'wb'))

In [37]:
# Save model as pickle file
pickle.dump(model6, open(save_dir+'pose_6_model.pkl', 'wb'))

# Pose 7

In [38]:
# Set up Data
df_pose = df.copy()
key_pose = 7
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)

test_creators_p7 = test_creators + ['Masumi-Channel', 'Verena-Boix']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p7)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p7)]
X_test = features[df_pose.Creator.isin(test_creators_p7)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p7)]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'max_depth': 20, 'min_samples_leaf': 1,
          'min_samples_split': 4, 'n_estimators': 400}
model7 = ExtraTreesClassifier(random_state=SEED, **params)
model7.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model7.predict(X_test)
print(f1_score(y_test, y_pred))

0.7246537396121885


In [39]:
# Save model as pickle file
pickle.dump(model7, open(save_dir+'pose_7_model.pkl', 'wb'))

# Pose 8

In [40]:
# Set up Data
df_pose = df.copy()
key_pose = 8
df_pose.loc[df_pose.Pose != key_pose, 'Pose'] = 0
df_pose.loc[df_pose.Pose == key_pose, 'Pose'] = 1

features = get_features(df_pose)

test_creators_p8 = test_creators + ['Valentina-Sadak', 'Yoga-with-Souvik']

# Prepare inputs
X_train = features[~df_pose.Creator.isin(test_creators_p8)]
y_train = df_pose.Pose[~df_pose.Creator.isin(test_creators_p8)]
X_test = features[df_pose.Creator.isin(test_creators_p8)]
y_test = df_pose.Pose[df_pose.Creator.isin(test_creators_p8)]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

weights_array = class_weight.compute_sample_weight('balanced', y=y_train)

# Train Model
params = {'max_depth': 20, 'min_samples_leaf': 1,
          'min_samples_split': 6, 'n_estimators': 500}
model8 = ExtraTreesClassifier(random_state=SEED, **params)
model8.fit(X_train, y_train, sample_weight=weights_array)
y_pred = model8.predict(X_test)
print(f1_score(y_test, y_pred))

0.9636650868878356


In [41]:
# Save to pickle file
pickle.dump(model8, open(save_dir+'pose_8_model.pkl', 'wb'))