In [1]:
from pickle import dump, load
from tqdm.notebook import tqdm
from skimage import feature
import cv2, os, dlib, imutils
from imutils import face_utils
import numpy as np
from sklearn.model_selection import train_test_split

In [4]:
if not os.path.isdir('SMILEsmileD'):
    !git clone https://github.com/hromi/SMILEsmileD.git
else:
    print('Dataset already downloaded')

Dataset already downloaded


In [14]:
if not os.path.isfile('shape_predictor_68_face_landmarks.dat'):
    !wget https://github.com/davisking/dlib-models/raw/master/shape_predictor_68_face_landmarks.dat.bz2
    !bzip2 -d shape_predictor_68_face_landmarks.dat.bz2
else:
    print('Dlib\'s shape predictor already downloaded')

Dlib's shape predictor already downloaded


In [12]:
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [3]:
smileDataset = 'SMILEsmileD/SMILEs/positives/positives7'
nonSmileDataset = 'SMILEsmileD/SMILEs/negatives/negatives7'

X, Y = [], []

for name in tqdm(os.listdir(smileDataset)):
    classe = 'smile'
    imgpath = os.path.join(smileDataset, name)
    img = cv2.imread(imgpath)
    h, w = img.shape[:2]
    # frame = imutils.resize(img, width=128)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    fd , vis = feature.hog(img, orientations=8, pixels_per_cell=(8, 8),
                cells_per_block=(1, 1), visualize=True, feature_vector=True)
    rect = dlib.rectangle(left=0, top=0, right=w, bottom=h)
    shape = predictor(gray, rect)
    shape = face_utils.shape_to_np(shape)
    # print(shape.shape, fd.shape)
    # break
    X.append(np.concatenate([fd, shape.flatten()]))
    Y.append(classe)
for name in tqdm(os.listdir(nonSmileDataset)):
    if 'jpg' != name[-3:]: continue
    classe = 'notsmile'
    imgpath = os.path.join(nonSmileDataset, name)
    img = cv2.imread(imgpath)
    h, w = img.shape[:2]
    # frame = imutils.resize(img, width=128)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    fd , vis = feature.hog(img, orientations=8, pixels_per_cell=(8, 8),
                cells_per_block=(1, 1), visualize=True, feature_vector=True)
    rect = dlib.rectangle(left=0, top=0, right=w, bottom=h)
    shape = predictor(gray, rect)
    shape = face_utils.shape_to_np(shape)
    # print(shape.shape, fd.shape)
    # break
    X.append(np.concatenate([fd, shape.flatten()]))
    Y.append(classe)
X = np.array(X)
Y = np.array(Y)
print(X.shape, Y.shape)

  0%|          | 0/3690 [00:00<?, ?it/s]

  0%|          | 0/9476 [00:00<?, ?it/s]

(13165, 648) (13165,)


In [15]:
for i in tqdm(range(1,101)):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=i)
    feat_path_train = f'SMILEsmilesD_features/train_features_HOGShape_{i}.pkl'
    labels_path_train = f'SMILEsmilesD_features/train_labels_HOGShape_{i}.pkl'
    feat_path_test = f'SMILEsmilesD_features/test_features_HOGShape_{i}.pkl'
    labels_path_test = f'SMILEsmilesD_features/test_labels_HOGShape_{i}.pkl'
    dump(X_train, open(feat_path_train, 'wb'))
    dump(y_train, open(labels_path_train, 'wb'))
    dump(X_test, open(feat_path_test, 'wb'))
    dump(y_test, open(labels_path_test, 'wb'))

  0%|          | 0/100 [00:00<?, ?it/s]

In [4]:
np.unique(Y, return_counts=True)

(array(['notsmile', 'smile'], dtype='<U8'), array([9475, 3690]))

### Checking that seed works

In [16]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, Y, test_size=0.3, random_state=100)

In [17]:
(X_train == X_train2).all()

True

In [18]:
X_train.shape

(9215, 648)

In [6]:
X.shape

(13165, 648)

### Saving all the features to calculate sigest for the whole dataset

In [7]:
dump(X, open('total_feats_imbalancedbalanced.pkl', 'wb'))

In [15]:
import pandas as pd
feats = pd.DataFrame(X)
labels = pd.DataFrame(Y)

In [16]:
featslabels = pd.concat([feats, labels], axis=1, ignore_index=True)
featslabels

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,639,640,641,642,643,644,645,646,647,648
0,0.452467,0.452467,0.097848,0.028372,0.341674,0.299845,0.410845,0.452467,0.400064,0.400064,...,42.0,44.0,45.0,36.0,48.0,31.0,48.0,27.0,48.0,smile
1,0.491241,0.491241,0.239019,0.491241,0.356827,0.000000,0.131813,0.272428,0.206922,0.054842,...,50.0,48.0,48.0,37.0,53.0,32.0,53.0,28.0,53.0,smile
2,0.541585,0.541585,0.284915,0.047125,0.541585,0.174478,0.063225,0.047125,0.435262,0.435262,...,45.0,44.0,45.0,36.0,46.0,32.0,47.0,29.0,46.0,smile
3,0.470415,0.470415,0.470415,0.470415,0.262108,0.000000,0.009733,0.214576,0.084730,0.020300,...,48.0,46.0,51.0,36.0,49.0,31.0,49.0,26.0,48.0,smile
4,0.443658,0.022131,0.041990,0.232020,0.443658,0.443658,0.443658,0.395706,0.177368,0.000000,...,51.0,41.0,49.0,33.0,50.0,30.0,51.0,26.0,51.0,smile
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13160,0.520837,0.384667,0.049799,0.043468,0.400666,0.080444,0.371638,0.520837,0.403646,0.403646,...,48.0,44.0,47.0,38.0,47.0,34.0,48.0,30.0,48.0,notsmile
13161,0.572276,0.572276,0.000000,0.048269,0.090448,0.037249,0.074838,0.572276,0.484186,0.456039,...,47.0,39.0,49.0,30.0,51.0,26.0,50.0,22.0,50.0,notsmile
13162,0.451399,0.451399,0.451399,0.000000,0.399231,0.062284,0.147279,0.451399,0.108145,0.489184,...,50.0,41.0,52.0,35.0,53.0,32.0,53.0,30.0,53.0,notsmile
13163,0.547543,0.547543,0.542156,0.189768,0.242523,0.069525,0.057079,0.059495,0.473528,0.210503,...,52.0,46.0,53.0,36.0,53.0,32.0,54.0,28.0,54.0,notsmile


### Saving in a csv file

In [17]:
featslabels.to_csv('features_labels_imbalanced.csv', index=False)

### Training models with each method with all the images from the dataset

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from joblib import dump

In [10]:
clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf', C=1))
clf.fit(X, Y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=1, gamma='auto'))])

In [11]:
dump(clf, 'smileModel_auto_imbalanced.joblib')

['smileModel_auto_imbalanced.joblib']

In [12]:
clf = make_pipeline(StandardScaler(), SVC(gamma=0.0007965187, kernel='rbf', C=1))
clf.fit(X, Y)
dump(clf, 'smileModel_sigestmedian_imbalanced.joblib')

['smileModel_sigestmedian_imbalanced.joblib']

In [13]:
clf = make_pipeline(StandardScaler(), SVC(gamma=(0.0005884004), kernel='rbf', C=1))
clf.fit(X, Y)
dump(clf, 'smileModel_siges90quantile_imbalanced.joblib')

['smileModel_siges90quantile_imbalanced.joblib']

In [14]:
clf = make_pipeline(StandardScaler(), SVC(gamma=((0.0010461882+0.0005884004)/2), kernel='rbf', C=1))
clf.fit(X, Y)
dump(clf, 'smileModel_sigesmean_imbalanced.joblib')

['smileModel_sigesmean_imbalanced.joblib']