In [1]:
from keras.utils.np_utils import to_categorical
import pandas as pd
import numpy as np
import random
import sys

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# fer2013 dataset:
# Training       28709
# PrivateTest     3589
# PublicTest      3589

In [3]:
# emotion labels from FER2013:
emotion = {'Angry': 0, 'Disgust': 1, 'Fear': 2, 'Happy': 3,
           'Sad': 4, 'Surprise': 5, 'Neutral': 6}
emo     = ['Angry', 'Fear', 'Happy',
           'Sad', 'Surprise', 'Neutral']

In [4]:
df= pd.read_csv('dataset/fer2013.csv')
df.head()

Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training


In [5]:
df.shape

(35887, 3)

In [6]:
set(df.Usage)

{'PrivateTest', 'PublicTest', 'Training'}

In [7]:
def reconstruct(pix_str, size=(48,48)):
    pix_arr = np.array(list(map(int, pix_str.split())))
    return np.reshape(pix_arr, size)

In [8]:
def load_data(sample_split=0.3, usage='Training', to_cat=True, verbose=True,
              classes=['Angry','Happy'], filepath='dataset/fer2013.csv'):
    df = pd.read_csv(filepath)
    # print df.tail()
    # print df.Usage.value_counts()
    df = df[df.Usage == usage]
    frames = []
    classes.append('Disgust')
    for _class in classes:
        class_df = df[df['emotion'] == emotion[_class]]
        frames.append(class_df)
    data = pd.concat(frames, axis=0)
    rows = random.sample(set(data.index), int(len(data)*sample_split))
    data = data.ix[rows]
    print ('{} set for {}: {}'.format(usage, classes, data.shape))
    data['pixels'] = data.pixels.apply(lambda x: reconstruct(x))
    x = np.array([mat for mat in data.pixels]) # (n_samples, img_width, img_height)
    X_train = x.reshape(-1, x.shape[1], x.shape[2],1)
    y_train, new_dict = emotion_count(data.emotion, classes, verbose)
    print (new_dict)
    if to_cat:
        y_train = to_categorical(y_train)
    return X_train, y_train, new_dict

In [9]:
def emotion_count(y_train, classes, verbose=True):
    emo_classcount = {}
    print ('Disgust classified as Angry')
    y_train.loc[y_train == 1] = 0
    classes.remove('Disgust')
    for new_num, _class in enumerate(classes):
        y_train.loc[(y_train == emotion[_class])] = new_num
        class_count = sum(y_train == (new_num))
        if verbose:
            print ('{}: {} with {} samples'.format(new_num, _class, class_count))
        emo_classcount[_class] = (new_num, class_count)
    return y_train.values, emo_classcount

In [10]:
def save_data(X_train, y_train, fname='', folder='dataset/'):
    np.save(folder + 'X_train' + fname, X_train)
    np.save(folder + 'y_train' + fname, y_train)

In [11]:
# makes the numpy arrays ready to use:
print ('Making moves...')
emo = ['Angry', 'Fear', 'Happy',
       'Sad', 'Surprise', 'Neutral']
X_train, y_train, emo_dict = load_data(sample_split=1.0,
                                       classes=emo,
                                       usage='PublicTest',
                                       verbose=True)

Making moves...


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


PublicTest set for ['Angry', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral', 'Disgust']: (3589, 3)
Disgust classified as Angry
0: Angry with 523 samples
1: Fear with 496 samples
2: Happy with 895 samples


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


3: Sad with 653 samples
4: Surprise with 415 samples
5: Neutral with 607 samples
{'Neutral': (5, 607), 'Happy': (2, 895), 'Sad': (3, 653), 'Angry': (0, 523), 'Surprise': (4, 415), 'Fear': (1, 496)}


In [12]:
print( 'Saving...')
save_data(X_train, y_train, fname='_data_PublicTest')
print (X_train.shape)
print (y_train.shape)
print ('Done!')

Saving...
(3589, 48, 48, 1)
(3589, 6)
Done!
