# SVM CLASSIFICATION

In [1]:
original_labels = [0, 1, 2, 3, 4, 5, 6,7]

### Converting FER2013 TO IMAGES

In [10]:
import numpy as np
import pandas as pd
import os
import argparse
import errno
import scipy.misc
import dlib
import cv2
import imageio
from skimage.feature import hog
import sys
sys.argv=['']
del sys
# initialization
image_height = 48
image_width = 48
window_size = 24
window_step = 6
ONE_HOT_ENCODING = False
SAVE_IMAGES = False
GET_LANDMARKS = False
GET_HOG_FEATURES = False
GET_HOG_WINDOWS_FEATURES = False
SELECTED_LABELS = []
IMAGES_PER_LABEL = 500
OUTPUT_FOLDER_NAME = "fer2013_features"

# parse arguments and initialize variables:
parser = argparse.ArgumentParser()
parser.add_argument("-j", "--jpg", default="no", help="save images as .jpg files")
parser.add_argument("-l", "--landmarks", default="yes", help="extract Dlib Face landmarks")
parser.add_argument("-ho", "--hog", default="yes", help="extract HOG features")
parser.add_argument("-hw", "--hog_windows", default="yes", help="extract HOG features from a sliding window")
parser.add_argument("-o", "--onehot", default="no", help="one hot encoding")
parser.add_argument("-e", "--expressions", default="0,1,2,3,4,5,6,7", help="choose the faciale expression you want to use: 0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral")
args = parser.parse_args()
if args.jpg == "yes":
    SAVE_IMAGES = True
if args.landmarks == "yes":
    GET_LANDMARKS = True
if args.hog == "yes":
    GET_HOG_FEATURES = True
if args.hog_windows == "yes":
    GET_HOG_WINDOWS_FEATURES = True
if args.onehot == "yes":
    ONE_HOT_ENCODING = True
if args.expressions != "":
    expressions  = args.expressions.split(",")
    for i in range(0,len(expressions)):
        label = int(expressions[i])
        if (label >=0 and label<=7 ):
            SELECTED_LABELS.append(label)
if SELECTED_LABELS == []:
    SELECTED_LABELS = [0,1,2,3,4,5,6,7]
print(len(SELECTED_LABELS))
print( str(len(SELECTED_LABELS)) + " expressions")

# loading Dlib predictor and preparing arrays:
print( "preparing")
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
original_labels = [0, 1, 2, 3, 4, 5, 6,7]
new_labels = list(set(original_labels) & set(SELECTED_LABELS))
nb_images_per_label = list(np.zeros(len(new_labels), 'uint8'))
try:
    os.makedirs(OUTPUT_FOLDER_NAME)
except OSError as e:
    if e.errno == errno.EEXIST and os.path.isdir(OUTPUT_FOLDER_NAME):
        pass
    else:
        raise

def get_landmarks(image, rects):
    # this function have been copied from http://bit.ly/2cj7Fpq
    if len(rects) > 1:
        raise BaseException("TooManyFaces")
    if len(rects) == 0:
        raise BaseException("NoFaces")
    return np.matrix([[p.x, p.y] for p in predictor(image, rects[0]).parts()])

def get_new_label(label, one_hot_encoding=False):
    if one_hot_encoding:
        new_label = new_labels.index(label)
        label = list(np.zeros(len(new_labels), 'uint8'))
        label[new_label] = 1
        return label
    else:
        return new_labels.index(label)

def sliding_hog_windows(image):
    hog_windows = []
    for y in range(0, image_height, window_step):
        for x in range(0, image_width, window_step):
            window = image[y:y+window_size, x:x+window_size]
            hog_windows.extend(hog(window, orientations=8, pixels_per_cell=(8, 8),
                                            cells_per_block=(1, 1), visualize=False))
    return hog_windows

print( "importing csv file")
data = pd.read_csv('fer2013.csv')

for category in data['Usage'].unique():
    print( "converting set: " + category + "...")
    # create folder
    if not os.path.exists(category):
        try:
            os.makedirs(OUTPUT_FOLDER_NAME + '/' + category)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(OUTPUT_FOLDER_NAME):
               pass
            else:
                raise
    
    # get samples and labels of the actual category
    category_data = data[data['Usage'] == category]
    samples = category_data['pixels'].values
    labels = category_data['emotion'].values
    
    # get images and extract features
    images = []
    labels_list = []
    landmarks = []
    hog_features = []
    hog_images = []
    for i in range(len(samples)):
        try:
            if labels[i] in SELECTED_LABELS and nb_images_per_label[get_new_label(labels[i])] < IMAGES_PER_LABEL:
                image = np.fromstring(samples[i], dtype=int, sep=" ").reshape((image_height, image_width))
                images.append(image)
                if SAVE_IMAGES:
                    imageio.imwrite(category + '/' + str(i) + '.jpg', image)
                if GET_HOG_WINDOWS_FEATURES:
                    features = sliding_hog_windows(image)
                    f, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16),
                                            cells_per_block=(1, 1), visualize=True)
                    hog_features.append(features)
                    hog_images.append(hog_image)
                elif GET_HOG_FEATURES:
                    features, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16),
                                            cells_per_block=(1, 1), visualize=True)
                    hog_features.append(features)
                    hog_images.append(hog_image)
                if GET_LANDMARKS:
                    imageio.imwrite('temp.jpg', image)
                    image2 = cv2.imread('temp.jpg')
                    face_rects = [dlib.rectangle(left=1, top=1, right=47, bottom=47)]
                    face_landmarks = get_landmarks(image2, face_rects)
                    landmarks.append(face_landmarks)            
                labels_list.append(get_new_label(labels[i], one_hot_encoding=ONE_HOT_ENCODING))
                nb_images_per_label[get_new_label(labels[i])] += 1
        except Exception as e:
            print( "error in image: " + str(i) + " - " + str(e))

    np.save(OUTPUT_FOLDER_NAME + '/' + category + '/images.npy', images)
    if ONE_HOT_ENCODING:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/labels.npy', labels_list)
    else:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/labels.npy', labels_list)
    if GET_LANDMARKS:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/landmarks.npy', landmarks)
    if GET_HOG_FEATURES or GET_HOG_WINDOWS_FEATURES:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/hog_features.npy', hog_features)
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/hog_images.npy', hog_images)

ModuleNotFoundError: No module named 'dlib'

# Loading Data

In [1]:

from parameters import DATASET, HYPERPARAMS
import numpy as np

def load_data(validation=False, test=False):
    
    data_dict = dict()
    validation_dict = dict()
    test_dict = dict()

    if DATASET.name == "Fer2013":
        # load train set
        if HYPERPARAMS.features == "landmarks_and_hog":
            data_dict['X'] = np.load(DATASET.train_folder + '/landmarks.npy')
            data_dict['X'] = np.array([x.flatten() for x in data_dict['X']])
            data_dict['X'] = np.concatenate((data_dict['X'], np.load(DATASET.train_folder + '/hog_features.npy')),axis=1)
        elif HYPERPARAMS.features == "landmarks":
            data_dict['X'] = np.load(DATASET.train_folder + '/landmarks.npy')
            data_dict['X'] = np.array([x.flatten() for x in data_dict['X']])
        elif HYPERPARAMS.features == "hog":
            data_dict['X'] = np.load(DATASET.train_folder + '/hog_features.npy')
        else:
            print( "Error '{}' features not recognized".format(HYPERPARAMS.features))
        data_dict['Y'] = np.load(DATASET.train_folder + '/labels.npy')
        if DATASET.trunc_trainset_to > 0:
            data_dict['X'] = data_dict['X'][0:DATASET.trunc_trainset_to, :]
            data_dict['Y'] = data_dict['Y'][0:DATASET.trunc_trainset_to]
        if validation:
            # load validation set 
            if HYPERPARAMS.features == "landmarks_and_hog":
                validation_dict['X'] = np.load(DATASET.validation_folder + '/landmarks.npy')
                validation_dict['X'] = np.array([x.flatten() for x in validation_dict['X']])
                validation_dict['X'] = np.concatenate((validation_dict['X'], np.load(DATASET.validation_folder + '/hog_features.npy')),axis=1)
            elif HYPERPARAMS.features == "landmarks":
                validation_dict['X'] = np.load(DATASET.validation_folder + '/landmarks.npy')
                validation_dict['X'] = np.array([x.flatten() for x in validation_dict['X']])
            elif HYPERPARAMS.features == "hog":
                validation_dict['X'] = np.load(DATASET.validation_folder + '/hog_features.npy')
            else:
                print( "Error '{}' features not recognized".format(HYPERPARAMS.features))
            validation_dict['Y'] = np.load(DATASET.validation_folder + '/labels.npy')
            if DATASET.trunc_validationset_to > 0:
                validation_dict['X'] = validation_dict['X'][0:DATASET.trunc_validationset_to, :]
                validation_dict['Y'] = validation_dict['Y'][0:DATASET.trunc_validationset_to]
        if test:
            # load train set
            if HYPERPARAMS.features == "landmarks_and_hog":
                test_dict['X'] = np.load(DATASET.test_folder + '/landmarks.npy')
                test_dict['X'] = np.array([x.flatten() for x in test_dict['X']])
                test_dict['X'] = np.concatenate((test_dict['X'], np.load(DATASET.test_folder + '/hog_features.npy')), axis=1)
            elif HYPERPARAMS.features == "landmarks":
                test_dict['X'] = np.load(DATASET.test_folder + '/landmarks.npy')
                test_dict['X'] = np.array([x.flatten() for x in test_dict['X']])
            elif HYPERPARAMS.features == "hog":
                test_dict['X'] = np.load(DATASET.test_folder + '/hog_features.npy')
            else:
                print( "Error '{}' features not recognized".format(HYPERPARAMS.features))
            test_dict['Y'] = np.load(DATASET.test_folder + '/labels.npy')
            np.save(DATASET.test_folder + "/lab.npy", test_dict['Y'])
            if DATASET.trunc_testset_to > 0:
                test_dict['X'] = test_dict['X'][0:DATASET.trunc_testset_to, :]
                test_dict['Y'] = test_dict['Y'][0:DATASET.trunc_testset_to]

        if not validation and not test:
            return data_dict
        elif not test:
            return data_dict, validation_dict
        else: 
            return data_dict, validation_dict, test_dict
    else:
        print( "Unknown dataset")
        exit()


### TRAINING THE MODEL

In [2]:
import time
import argparse
import os
import sys
if sys.version_info >= (3, 0):
        import _pickle as cPickle
else:
        import cPickle
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

from data_loader import load_data 
from parameters import DATASET, TRAINING, HYPERPARAMS

def train(epochs=HYPERPARAMS.epochs, random_state=HYPERPARAMS.random_state, 
          kernel="linear", decision_function=HYPERPARAMS.decision_function, gamma=HYPERPARAMS.gamma, train_model=True):

        print( "loading dataset " + DATASET.name + "...")
        if train_model:
                data, validation = load_data(validation=True)
        else:
                data, validation, test = load_data(validation=True, test=True)
        
        if train_model:
            # Training phase
            print( "building model...")
            model = SVC(random_state=random_state, max_iter=1000, kernel="linear", decision_function_shape=decision_function, gamma=gamma)

            print( "start training...")
            print( "--")
            print( "kernel: {}".format(kernel))
            print( "decision function: {} ".format(decision_function))
            print( "max epochs: {} ".format(epochs))
            print( "gamma: {} ".format(gamma))
            print( "--")
            print( "Training samples: {}".format(len(data['Y'])))
            print( "Validation samples: {}".format(len(validation['Y'])))
            print( "--")
            start_time = time.time()
            model.fit(data['X'], data['Y'])
            training_time = time.time() - start_time
            print( "training time = {0:.1f} sec".format(training_time))

            if TRAINING.save_model:
                print( "saving model...")
                with open(TRAINING.save_model_path, 'wb') as f:
                        cPickle.dump(model, f)

            print( "evaluating...")
            validation_accuracy = evaluate(model, validation['X'], validation['Y'])
            print( "  - validation accuracy = {0:.1f}".format(validation_accuracy*100))
            return validation_accuracy
        else:
            # Testing phase : load saved model and evaluate on test dataset
            print( "start evaluation...")
            print( "loading pretrained model...")
            if os.path.isfile(TRAINING.save_model_path):
                with open(TRAINING.save_model_path, 'rb') as f:
                        model = cPickle.load(f)
            else:
                print( "Error: file '{}' not found".format(TRAINING.save_model_path))
                exit()

            print( "--")
            print( "Validation samples: {}".format(len(validation['Y'])))
            print( "Test samples: {}".format(len(test['Y'])))
            print( "--")
            print( "evaluating...")
            start_time = time.time()
            validation_accuracy = evaluate(model, validation['X'],  validation['Y'])
            print( "  - validation accuracy = {0:.1f}".format(validation_accuracy*100))
            test_accuracy = evaluate(model, test['X'], test['Y'])
            print( "  - test accuracy = {0:.1f}".format(test_accuracy*100))
            print( "  - evalution time = {0:.1f} sec".format(time.time() - start_time))
           
            return test_accuracy

def evaluate(model, X, Y):
        predicted_Y = model.predict(X)
        accuracy = accuracy_score(Y, predicted_Y)
        return accuracy

train()


loading dataset Fer2013...
building model...
start training...
--
kernel: linear
decision function: ovr 
max epochs: 10000 
gamma: auto 
--
Training samples: 3436
Validation samples: 56
--




training time = 27.3 sec
saving model...
evaluating...
  - validation accuracy = 48.2


0.48214285714285715

### Evaluating

In [3]:
train(train_model=False)

loading dataset Fer2013...
start evaluation...
loading pretrained model...
--
Validation samples: 56
Test samples: 8
--
evaluating...
  - validation accuracy = 48.2
  - test accuracy = 62.5
  - evalution time = 1.1 sec


0.625

### Optimizing parameters

In [4]:
import time
import argparse
import pprint
import numpy as np 
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import sys
sys.argv=['']
del sys
from train import train
from parameters import HYPERPARAMS

# define the search space
fspace = {
    'decision_function': hp.choice('decision_function', ['ovr', 'ovo']),
    'gamma':  hp.uniform('gamma', 0.001, 0.01)
}

# parse arguments

max_evals = 15
current_eval = 1
train_history = []

def function_to_minimize(hyperparams, gamma='auto', decision_function='ovr'):
    decision_function = hyperparams['decision_function']
    gamma = hyperparams['gamma']
    global current_eval 
    global max_evals
    print( "#################################")
    print( "       Evaluation {} of {}".format(current_eval, max_evals))
    print( "#################################")
    start_time = time.time()
    try:
        accuracy = train(epochs=HYPERPARAMS.epochs_during_hyperopt, decision_function=decision_function, gamma=gamma)
        training_time = int(round(time.time() - start_time))
        current_eval += 1
        train_history.append({'accuracy':accuracy, 'decision_function':decision_function, 'gamma':gamma, 'time':training_time})
    except Exception as e:
        print( "#################################")
        print( "Exception during training: {}".format(str(e)))
        print( "Saving train history in train_history.npy")
        np.save("train_history.npy", train_history)
        exit()
    return {'loss': -accuracy, 'time': training_time, 'status': STATUS_OK}

# lunch the hyperparameters search
trials = Trials()
best_trial = fmin(fn=function_to_minimize, space=fspace, algo=tpe.suggest, max_evals=max_evals, trials=trials)

# get some additional information and print( the best parameters
for trial in trials.trials:
    if trial['misc']['vals']['decision_function'][0] == best_trial['decision_function'] and \
            trial['misc']['vals']['gamma'][0] == best_trial['gamma']:
        best_trial['accuracy'] = -trial['result']['loss'] * 100
        best_trial['time'] = trial['result']['time']
print( "#################################")
print( "      Best parameters found")
print( "#################################")         
pprint.pprint(best_trial)
print( "decision_function { 0: ovr, 1: ovo }")
print( "#################################")


#################################                     
       Evaluation 1 of 15                             
#################################                     
loading dataset Fer2013...                            
building model...                                     
start training...                                     
--                                                    
kernel: rbf                                           
decision function: ovr                                
max epochs: 500                                       
gamma: 0.009508275768257215                           
--                                                    
Training samples: 3436                                
Validation samples: 56                                
--                                                    
training time = 47.4 sec                              
saving model...                                       
  0%|          | 0/15 [00:47<?, ?trial/s, best loss=?]




evaluating...                                         
  - validation accuracy = 39.3                        
#################################                                                 
       Evaluation 2 of 15                                                         
#################################                                                 
loading dataset Fer2013...                                                        
building model...                                                                 
start training...                                                                 
--                                                                                
kernel: rbf                                                                       
decision function: ovo                                                            
max epochs: 500                                                                   
gamma: 0.0049653069759365166                                




evaluating...                                                                     
  - validation accuracy = 46.4                                                    
#################################                                                 
       Evaluation 3 of 15                                                        
#################################                                                
loading dataset Fer2013...                                                       
building model...                                                                
start training...                                                                
--                                                                               
kernel: rbf                                                                      
decision function: ovo                                                           
max epochs: 500                                                                  
gamma: 0.0045




evaluating...                                                                    
  - validation accuracy = 46.4                                                   
#################################                                                
       Evaluation 4 of 15                                                        
#################################                                                
loading dataset Fer2013...                                                       
building model...                                                                
start training...                                                                
--                                                                               
kernel: rbf                                                                      
decision function: ovr                                                           
max epochs: 500                                                                  
gamma: 0.0040950




evaluating...                                                                    
  - validation accuracy = 46.4                                                   
#################################                                                
       Evaluation 5 of 15                                                        
#################################                                                
loading dataset Fer2013...                                                       
building model...                                                                
start training...                                                                
--                                                                               
kernel: rbf                                                                      
decision function: ovo                                                           
max epochs: 500                                                                  
gamma: 0.0033551




evaluating...                                                                    
  - validation accuracy = 50.0                                                   
#################################                                                
       Evaluation 6 of 15                                         
#################################                                 
loading dataset Fer2013...                                        
building model...                                                 
start training...                                                 
--                                                                
kernel: rbf                                                       
decision function: ovr                                            
max epochs: 500                                                   
gamma: 0.005007077882982711                                       
--                                                                
Training samples:




evaluating...                                                     
  - validation accuracy = 46.4                                    
#################################                                 
       Evaluation 7 of 15                                         
#################################                                 
loading dataset Fer2013...                                        
building model...                                                 
start training...                                                 
--                                                                
kernel: rbf                                                       
decision function: ovo                                            
max epochs: 500                                                   
gamma: 0.006919568848586376                                       
--                                                                
Training samples: 3436                                        




evaluating...                                                     
  - validation accuracy = 42.9                                    
#################################                                 
       Evaluation 8 of 15                                         
#################################                                 
loading dataset Fer2013...                                        
building model...                                                 
start training...                                                 
--                                                                
kernel: rbf                                                       
decision function: ovr                                            
max epochs: 500                                                   
gamma: 0.007204466450877652                                       
--                                                                
Training samples: 3436                                        




evaluating...                                                     
  - validation accuracy = 42.9                                    
#################################                                 
       Evaluation 9 of 15                                         
#################################                                 
loading dataset Fer2013...                                        
building model...                                                 
start training...                                                 
--                                                                
kernel: rbf                                                       
decision function: ovo                                            
max epochs: 500                                                   
gamma: 0.005134535164510164                                       
--                                                                
Training samples: 3436                                        




evaluating...                                                     
  - validation accuracy = 44.6                                    
#################################                                 
       Evaluation 10 of 15                                        
#################################                                 
loading dataset Fer2013...                                        
building model...                                                 
start training...                                                 
--                                                                
kernel: rbf                                                       
decision function: ovr                                            
max epochs: 500                                                   
gamma: 0.008191798146045853                                       
--                                                                
Training samples: 3436                                        




evaluating...                                                     
  - validation accuracy = 41.1                                    
#################################                                  
       Evaluation 11 of 15                                         
#################################                                  
loading dataset Fer2013...                                         
building model...                                                  
start training...                                                  
--                                                                 
kernel: rbf                                                        
decision function: ovo                                             
max epochs: 500                                                    
gamma: 0.00980673410426254                                         
--                                                                 
Training samples: 3436                            




evaluating...                                                      
  - validation accuracy = 39.3                                     
#################################                                  
       Evaluation 12 of 15                                         
#################################                                  
loading dataset Fer2013...                                         
building model...                                                  
start training...                                                  
--                                                                 
kernel: rbf                                                        
decision function: ovr                                             
max epochs: 500                                                    
gamma: 0.009569601367579901                                        
--                                                                 
Training samples: 3436                          




evaluating...                                                      
  - validation accuracy = 39.3                                     
#################################                                  
       Evaluation 13 of 15                                         
#################################                                  
loading dataset Fer2013...                                         
building model...                                                  
start training...                                                  
--                                                                 
kernel: rbf                                                        
decision function: ovr                                             
max epochs: 500                                                    
gamma: 0.0049211784428082475                                       
--                                                                 
Training samples: 3436                          




evaluating...                                                      
  - validation accuracy = 46.4                                     
#################################                                  
       Evaluation 14 of 15                                         
#################################                                  
loading dataset Fer2013...                                         
building model...                                                  
start training...                                                  
--                                                                 
kernel: rbf                                                        
decision function: ovo                                             
max epochs: 500                                                    
gamma: 0.008623658327332721                                        
--                                                                 
Training samples: 3436                          




evaluating...                                                      
  - validation accuracy = 41.1                                     
#################################                                  
       Evaluation 15 of 15                                         
#################################                                  
loading dataset Fer2013...                                         
building model...                                                  
start training...                                                  
--                                                                 
kernel: rbf                                                        
decision function: ovr                                             
max epochs: 500                                                    
gamma: 0.0011559970127537034                                       
--                                                                 
Training samples: 3436                          




evaluating...                                                      
  - validation accuracy = 51.8                                     
100%|██████████| 15/15 [11:17<00:00, 45.19s/trial, best loss: -0.5178571428571429]
#################################
      Best parameters found
#################################
{'accuracy': 51.78571428571429,
 'decision_function': 0,
 'gamma': 0.0011559970127537034,
 'time': 38}
decision_function { 0: ovr, 1: ovo }
#################################


In [15]:
import numpy as np
data1 = np.load('C:/Vscode/VIT_Code/4th_sem/ML-J Comp/fer2013_features/PublicTest/hog_features.npy')
print("hog features",data1.shape)
data2 = np.load('C:/Vscode/VIT_Code/4th_sem/ML-J Comp/fer2013_features/PublicTest/hog_images.npy')
print("hog images",data2.shape)
data3 = np.load('C:/Vscode/VIT_Code/4th_sem/ML-J Comp/fer2013_features/PublicTest/images.npy')
print("images",data3.shape)
data4 = np.load('C:/Vscode/VIT_Code/4th_sem/ML-J Comp/fer2013_features/PublicTest/labels.npy')
print("labels",data4.shape)
data5 = np.load('C:/Vscode/VIT_Code/4th_sem/ML-J Comp/fer2013_features/PublicTest/landmarks.npy')
print("landmarks",data5.shape)

hog features (56, 2592)
hog images (56, 48, 48)
images (56, 48, 48)
labels (56,)
landmarks (56, 68, 2)
