In [1]:
#Importing libraries
import dlib
import cv2
import numpy as np
import glob
import random
import math
import itertools
from sklearn.svm import SVC
import pickle
import os
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
import time
import numpy as np
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn import datasets
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
#classes of FER data set
emotions = ["happy", "neutral", "sad","surprise","fear","disgust","angry"]
detector = dlib.get_frontal_face_detector()
#Fetching the dat file which contains the pre trained model of landmark detection algorithm
model = dlib.shape_predictor("C:\\Users\\Neha\\Desktop\\AIProject\\shape_predictor_68_face_landmarks.dat")
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

In [3]:
def get_landmarks(image):
    detections = detector(image, 1)
    # For all detected face instances individually
    for k, d in enumerate(detections):
        # Get facial landmarks with prediction model
        shape = model(image, d)
        xpoint = []
        ypoint = []
        for i in range(0, 68):
            xpoint.append(float(shape.part(i).x))
            ypoint.append(float(shape.part(i).y))

        # Center points of both axis
        xcenter = np.mean(xpoint)
        ycenter = np.mean(ypoint)
        # Calculate distance between particular points and center point
        xdistcent = [(x-xcenter) for x in xpoint]
        ydistcent = [(y-ycenter) for y in ypoint]

        # Prevent divided by 0 value
        if xpoint[11] == xpoint[14]:
            angle_nose = 0
        else:
            # Point 14 is the tip of the nose, point 11 is the top of the nose brigde
            angle_nose = int(math.atan((ypoint[11]-ypoint[14])/(xpoint[11]-xpoint[14]))*180/math.pi)

        # Get offset by finding how the nose brigde should be rotated to become perpendicular to the horizontal plane
        if angle_nose < 0:
            angle_nose += 90
        else:
            angle_nose -= 90

        landmarks = []
        for cx, cy, x, y in zip(xdistcent, ydistcent, xpoint, ypoint):
            # Add the coordinates relative to the centre of gravity
            landmarks.append(cx)
            landmarks.append(cy)

            # Get the euclidean distance between each point and the centre point (the vector length)
            meanar = np.asarray((ycenter,xcenter))
            centpar = np.asarray((y,x))
            dist = np.linalg.norm(centpar-meanar)

            # Get the angle the vector describes relative to the image, corrected for the offset that the nosebrigde
            # has when the face is not perfectly horizontal
            if x == xcenter:
                angle_relative = 0
            else:
                angle_relative = (math.atan(float(y-ycenter)/(x-xcenter))*180/math.pi) - angle_nose
            landmarks.append(dist)
            landmarks.append(angle_relative)

    if len(detections) < 1:
        # In case no case selected, print "error" values
        landmarks = "error"
    return landmarks


In [4]:
#Fetching the train and validation data set 
def make_sets():
    training_data = []
    training_label = []
    testing_data = []
    testing_label = []
    for emotion in emotions:
        training_set, testing_set = get_files(emotion)
        print("length:", len(training_set))
        #add data to training and testing dataset, and generate labels 0-4
        for item in training_set:
            #read image
            img = cv2.imread(item)
            #convert to grayscale
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            clahe_img = clahe.apply(gray_img)
            landmarks_vec = get_landmarks(clahe_img)

            if landmarks_vec == "error":
                pass
            else:
                training_data.append(landmarks_vec)
                training_label.append(emotions.index(emotion))

        for item in testing_set:
            img = cv2.imread(item)
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            clahe_img = clahe.apply(gray_img)
            landmarks_vec = get_landmarks(clahe_img)
            if landmarks_vec == "error":
                pass
            else:
                testing_data.append(landmarks_vec)
                testing_label.append(emotions.index(emotion))

    return training_data, training_label, testing_data, testing_label

In [5]:
import matplotlib.pyplot as plt
def get_files(emotion):
    images = glob.glob("C:\\Users\\Neha\\Desktop\\AIProject\\FER\\%s\\*" %emotion)
    random.shuffle(images)
    training_set = images[:int(len(images)*0.8)]   #get 80% of image files to be trained
    testing_set = images[-int(len(images)*0.2):]   #get 20% of image files to be tested
    return training_set, testing_set

# KNN

In [91]:
#Creating train and validation data set
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
X_train_knn, y_train_knn, X_test_knn, y_test_knn = make_sets()
print(len(X_train_knn))
print(len(y_train_knn))
X_train_knn_arr = np.array(X_train_knn)
Y_train_knn_arr = np.array(y_train_knn)


length: 5772
length: 3972
length: 3864
length: 2536
length: 3277
length: 348
length: 3196
14896
14896


In [92]:
#Applying standardisation
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_test_knn_arr = np.array(X_test_knn)
Y_test_knn_arr = np.array(y_test_knn)
X_train_knn_arr = scaler.fit_transform(X_train_knn_arr)
X_test_knn_arr = scaler.transform(X_test_knn_arr)

In [93]:
print(X_train_knn_arr.shape)
print(Y_train_knn_arr.shape)

(14896, 272)
(14896,)


In [94]:
#Finding best parameter using GridSearchCV
clf_knn = KNeighborsClassifier()
param_grid = {'n_neighbors': np.arange(1, 30) }
knn_gridcv = GridSearchCV(clf_knn, param_grid ,cv=10, refit = True,n_jobs= -1)
knn_gridcv.fit(X_train_knn_arr,Y_train_knn_arr)
knn_gridcv.best_params_


{'n_neighbors': 23}

In [95]:
scores = knn_gridcv.cv_results_['mean_test_score']
scores

array([0.44226845, 0.43824029, 0.44179743, 0.4445483 , 0.4474347 ,
       0.44864397, 0.44884608, 0.45052429, 0.44931601, 0.45092716,
       0.45347799, 0.45354443, 0.45441691, 0.45676626, 0.45582644,
       0.45522277, 0.45670014, 0.45683482, 0.45629728, 0.45696842,
       0.45797504, 0.45824349, 0.45965325, 0.45945227, 0.45770636,
       0.45871325, 0.45737065, 0.45763906, 0.45757199])

In [96]:
#Classification Report
grid_predictin_random= knn_gridcv.predict(X_test_knn_arr)
print(classification_report(Y_test_knn_arr,grid_predictin_random))

              precision    recall  f1-score   support

           0       0.57      0.86      0.69      1073
           1       0.35      0.63      0.45       709
           2       0.29      0.09      0.14       455
           3       0.59      0.42      0.49       424
           4       0.32      0.13      0.18       462
           5       0.00      0.00      0.00        57
           6       0.37      0.10      0.16       480

    accuracy                           0.46      3660
   macro avg       0.36      0.32      0.30      3660
weighted avg       0.43      0.46      0.41      3660



# SVM

In [7]:
#Creating train and test data set for SVM 
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train, y_train, X_test, y_test = make_sets()
#Applying standardization
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

length: 5772
length: 3972
length: 3864
length: 2536
length: 3277
length: 348
length: 3196


In [10]:
#Finding the best parameter for tuning SVM
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform
param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=1.4245532033758337, gamma=0.0014920502377871508 ...............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  C=1.4245532033758337, gamma=0.0014920502377871508, total=  39.5s
[CV] C=1.4245532033758337, gamma=0.0014920502377871508 ...............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   39.4s remaining:    0.0s


[CV]  C=1.4245532033758337, gamma=0.0014920502377871508, total=  34.8s
[CV] C=1.4245532033758337, gamma=0.0014920502377871508 ...............
[CV]  C=1.4245532033758337, gamma=0.0014920502377871508, total=  36.6s
[CV] C=5.388970121123746, gamma=0.010485877666893474 .................
[CV] .. C=5.388970121123746, gamma=0.010485877666893474, total=  48.8s
[CV] C=5.388970121123746, gamma=0.010485877666893474 .................
[CV] .. C=5.388970121123746, gamma=0.010485877666893474, total=  48.5s
[CV] C=5.388970121123746, gamma=0.010485877666893474 .................
[CV] .. C=5.388970121123746, gamma=0.010485877666893474, total=  48.6s
[CV] C=9.001198957369, gamma=0.010711823154681908 ....................
[CV] ..... C=9.001198957369, gamma=0.010711823154681908, total=  51.6s
[CV] C=9.001198957369, gamma=0.010711823154681908 ....................
[CV] ..... C=9.001198957369, gamma=0.010711823154681908, total=  52.7s
[CV] C=9.001198957369, gamma=0.010711823154681908 ....................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 23.9min finished


RandomizedSearchCV(cv=3, estimator=SVC(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001B52F9B5670>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001B52E867340>},
                   verbose=2)

In [11]:
#Best hyperparameter
rnd_search_cv.best_estimator_

SVC(C=7.786440215591567, gamma=0.002944085280686374)

In [12]:
#Best score in Training Data set
rnd_search_cv.best_score_

0.5225450435992848

In [16]:
#Accuracy of training data set
from sklearn.metrics import accuracy_score
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.543571812802582

In [17]:
#Fetching the test data set
def prep_test_set():

    testing_data = []
    testing_label = []
    for emotion in emotions:
        images = glob.glob("C:\\Users\\Neha\\Desktop\\AIProject\\archive\\test\\%s\\*" %emotion)
        
        #random.shuffle(images)
        train_set = images[:]
        print(len(train_set))
        #add data to training and testing dataset, and generate labels 0-4
        for item in train_set:
            #read image
            img = cv2.imread(item)
            #convert to grayscale
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            clahe_img = clahe.apply(gray_img)
            landmarks_vec = get_landmarks(clahe_img)

            if landmarks_vec == "error":
                pass
            else:
                testing_data.append(landmarks_vec)
                testing_label.append(emotions.index(emotion))

        
    return  testing_data, testing_label

In [18]:
#Checking for accuracy for test data set
X_test_org, y_test_org = prep_test_set()
np_X_test_org = np.array(X_test_org)
np_y_test_org = np.array(y_test_org)
X_test_org_scaled = scaler.transform(np_X_test_org)

y_test_pred = rnd_search_cv.best_estimator_.predict(X_test_org_scaled)
accuracy_score(np_y_test_org, y_test_pred)


1774
1233
1247
831
1024
111
958


0.5357987529563535