In [1]:
import sys, os
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
def createDataset(filename):
    df = pd.read_csv(filename)
    df.info()
    print("-------------------------")
    df.head()

    X_train,train_y,X_test,test_y=[],[],[],[]

    for index, row in df.iterrows():
        val=row['pixels'].split(" ")
        try:
            if 'Training' in row['Usage']:
                X_train.append(np.array(val,'float32'))
                train_y.append(row['emotion'])
            elif 'PublicTest' in row['Usage']:
                X_test.append(np.array(val,'float32'))
                test_y.append(row['emotion'])
        except:
            print(f"error occured at index :{index} and row:{row}")

    X_train = np.array(X_train,'float32')
    train_y = np.array(train_y,'float32')
    X_test = np.array(X_test,'float32')
    test_y = np.array(test_y,'float32')

    
    X_train = X_train.reshape(X_train.shape[0], 48, 48, 1)
    X_test = X_test.reshape(X_test.shape[0], 48, 48, 1)
    
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, tf.one_hot(train_y, 7)))
    validation_ds = tf.data.Dataset.from_tensor_slices((X_test, tf.one_hot(test_y, 7)))

    return train_ds, validation_ds

In [3]:
fergit_train, fergit_test = createDataset('../datasets/FERGIT.csv')
print(len(list(fergit_train.as_numpy_iterator())))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49300 entries, 0 to 49299
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   emotion  49300 non-null  int64 
 1   pixels   49300 non-null  object
 2   Usage    49300 non-null  object
dtypes: int64(1), object(2)
memory usage: 1.1+ MB
-------------------------
42122


In [4]:
ckext_train, ckext_test = createDataset('D:\RandomGits\Facial-Expression-Recognition\datasets\ckextended.csv')
print(len(list(ckext_train.as_numpy_iterator())))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 902 entries, 0 to 901
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   emotion  902 non-null    int64 
 1   pixels   902 non-null    object
 2   Usage    902 non-null    object
dtypes: int64(1), object(2)
memory usage: 21.3+ KB
-------------------------
720


In [5]:
fer13_train, fer13_test = createDataset('D:\RandomGits\Facial-Expression-Recognition\datasets\\fer2013.csv')
print(len(list(fer13_train.as_numpy_iterator())))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35887 entries, 0 to 35886
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   emotion  35887 non-null  int64 
 1   pixels   35887 non-null  object
 2   Usage    35887 non-null  object
dtypes: int64(1), object(2)
memory usage: 841.2+ KB
-------------------------
28709


In [6]:
train_ds = fergit_train.concatenate(ckext_train).concatenate(fer13_train)
test_ds = fergit_test.concatenate(ckext_test).concatenate(fer13_test)

print(len(list(train_ds.as_numpy_iterator())))
print(len(list(test_ds.as_numpy_iterator())))

71551
7267


In [7]:
img, label = next(train_ds.as_numpy_iterator())
print(img)
print(next(fergit_train.as_numpy_iterator()))

[[[ 70.]
  [ 80.]
  [ 82.]
  ...
  [ 52.]
  [ 43.]
  [ 41.]]

 [[ 65.]
  [ 61.]
  [ 58.]
  ...
  [ 56.]
  [ 52.]
  [ 44.]]

 [[ 50.]
  [ 43.]
  [ 54.]
  ...
  [ 49.]
  [ 56.]
  [ 47.]]

 ...

 [[ 91.]
  [ 65.]
  [ 42.]
  ...
  [ 72.]
  [ 56.]
  [ 43.]]

 [[ 77.]
  [ 82.]
  [ 79.]
  ...
  [105.]
  [ 70.]
  [ 46.]]

 [[ 77.]
  [ 72.]
  [ 84.]
  ...
  [106.]
  [109.]
  [ 82.]]]
(array([[[ 70.],
        [ 80.],
        [ 82.],
        ...,
        [ 52.],
        [ 43.],
        [ 41.]],

       [[ 65.],
        [ 61.],
        [ 58.],
        ...,
        [ 56.],
        [ 52.],
        [ 44.]],

       [[ 50.],
        [ 43.],
        [ 54.],
        ...,
        [ 49.],
        [ 56.],
        [ 47.]],

       ...,

       [[ 91.],
        [ 65.],
        [ 42.],
        ...,
        [ 72.],
        [ 56.],
        [ 43.]],

       [[ 77.],
        [ 82.],
        [ 79.],
        ...,
        [105.],
        [ 70.],
        [ 46.]],

       [[ 77.],
        [ 72.],
        [ 84.],
     

In [8]:
train_ds.save('../datasets/train_ds')
test_ds.save('../datasets/test_ds')