In [2]:
import pandas as pd
import face_recognition
from sklearn.linear_model import HuberRegressor
import numpy as np

In [3]:
def get_face_encoding(image_path):
    picture_of_me = face_recognition.load_image_file(image_path)
    my_face_encoding = face_recognition.face_encodings(picture_of_me)
    if not my_face_encoding:
        print(f"No face found in {image_path}")
        return 0
    return my_face_encoding[0].tolist()

In [4]:
df = pd.read_csv("talents_dataset/combine.csv")
print(df.shape)

all_faces = []
remove_idx = []
for i, id in enumerate(df.id):
    face_enc = get_face_encoding(f"talents_dataset/{id:04}.jpg")
    if face_enc == 0:
        remove_idx.append(i)
        continue
    all_faces.append(face_enc)
X = np.array(all_faces)
df_face = pd.DataFrame(X)
print(df_face.shape)

df = df.drop(remove_idx)
print(df.shape)

(1956, 7)
No face found in talents_dataset/0019.jpg
No face found in talents_dataset/0073.jpg
No face found in talents_dataset/0116.jpg
No face found in talents_dataset/0138.jpg
No face found in talents_dataset/0221.jpg
No face found in talents_dataset/0235.jpg
No face found in talents_dataset/0252.jpg
No face found in talents_dataset/0258.jpg
No face found in talents_dataset/0306.jpg
No face found in talents_dataset/0385.jpg
No face found in talents_dataset/0435.jpg
No face found in talents_dataset/0485.jpg
No face found in talents_dataset/0525.jpg
No face found in talents_dataset/0543.jpg
No face found in talents_dataset/0575.jpg
No face found in talents_dataset/0584.jpg
No face found in talents_dataset/0610.jpg
No face found in talents_dataset/0849.jpg
No face found in talents_dataset/0911.jpg
No face found in talents_dataset/0957.jpg
No face found in talents_dataset/0975.jpg
No face found in talents_dataset/1016.jpg
No face found in talents_dataset/1018.jpg
No face found in talents

In [11]:
X = pd.concat([df, df_face], axis="columns", join="inner")
y_weight = X.weight.values
y_BMI = X.bmi.values
print(y_weight.shape, y_BMI.shape)
X = X.drop(["id", "bmi", "height", "weight"], axis=1)
X = np.array(X.values)
print(X.shape)
X

(1865,) (1865,)
(1865, 131)


array([[ 3.80000000e+01,  0.00000000e+00,  1.00000000e+00, ...,
        -2.79931799e-02,  3.13673541e-02,  6.15612743e-03],
       [ 2.50000000e+01,  1.00000000e+00,  0.00000000e+00, ...,
        -9.46738049e-02,  5.95200285e-02, -8.04585218e-03],
       [ 2.40000000e+01,  0.00000000e+00,  1.00000000e+00, ...,
        -3.51711512e-02,  1.16121978e-01, -3.25355772e-03],
       ...,
       [ 4.00000000e+01,  0.00000000e+00,  1.00000000e+00, ...,
        -7.10002407e-02,  7.07572699e-02, -6.90515153e-03],
       [ 4.00000000e+01,  1.00000000e+00,  0.00000000e+00, ...,
         8.23957846e-03,  8.70382711e-02,  1.66507512e-02],
       [ 3.00000000e+01,  0.00000000e+00,  1.00000000e+00, ...,
        -1.15561731e-01,  1.18354969e-01,  5.02426289e-02]])

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_weight_train, y_weight_test ,y_BMI_train, y_BMI_test = train_test_split(X, y_weight, y_BMI, random_state=1, test_size=0.15)

In [13]:
from sklearn.metrics import mean_squared_error, r2_score

def report_goodness(model, X_test, y_test, predictor_log=True):
    # Make predictions using the testing set
    y_pred = model.predict(X_test)
    y_true = y_test
    if predictor_log:
        y_true = np.log(y_test)
    # The coefficients
    # The mean squared error
    print(f"Mean squared error: {mean_squared_error(y_true, y_pred):.2f}")
    # Explained variance score: 1 is perfect prediction
    print(f"Variance score: {r2_score(y_true, y_pred):.2f}")

    errors = abs(y_pred - y_true)
    mape = 100 * np.mean(errors / y_true)
    accuracy = 100 - mape
    print("Model Performance")
    print(f"Average Error: {np.mean(errors):0.4f} degrees.")
    print(f"Accuracy = {accuracy:0.2f}%.")

In [14]:
model_BMI = HuberRegressor(alpha=0.05, epsilon=1.8, fit_intercept=True, max_iter=100, tol=1e-05, warm_start=False)
model_BMI = model_BMI.fit(X_train,np.log(y_BMI_train))
report_goodness(model_BMI,X_test,y_BMI_test)

Mean squared error: 0.52
Variance score: -0.02
Model Performance
Average Error: 0.2377 degrees.
Accuracy = 98.74%.


In [15]:
model_weight = HuberRegressor(alpha=0.1, epsilon=1.7, fit_intercept=True, max_iter=100, tol=1e-05, warm_start=False)
model_weight = model_weight.fit(X_train,np.log(y_weight_train))
report_goodness(model_weight,X_test,y_weight_test)

Mean squared error: 0.15
Variance score: -0.03
Model Performance
Average Error: 0.1733 degrees.
Accuracy = 96.18%.


In [None]:
import joblib

joblib.dump(model_weight, "models/weight_predictor_light.model")
joblib.dump(model_BMI, "models/bmi_predictor_light.model")