In [1]:
import numpy as np
import pandas as pd

In [2]:
# load all columns but the first

pixels_Usage = pd.read_csv('fer2013.csv').iloc[:,1:]

In [3]:
pixels_Usage.head()

Unnamed: 0,pixels,Usage
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training


In [4]:
labels = pd.read_csv('fer2013new.csv').iloc[:,2:]

In [5]:
labels.head()

Unnamed: 0,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown,NF
0,4,0,0,1,3,2,0,0,0,0
1,6,0,1,1,0,0,0,0,2,0
2,5,0,0,3,1,0,0,0,1,0
3,4,0,0,4,1,0,0,0,1,0
4,9,0,0,1,0,0,0,0,0,0


In [6]:
df = pd.concat([pixels_Usage,labels],axis=1)

In [7]:
df.head()

Unnamed: 0,pixels,Usage,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown,NF
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training,4,0,0,1,3,2,0,0,0,0
1,151 150 147 155 148 133 111 140 170 174 182 15...,Training,6,0,1,1,0,0,0,0,2,0
2,231 212 156 164 174 138 161 173 182 200 106 38...,Training,5,0,0,3,1,0,0,0,1,0
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training,4,0,0,4,1,0,0,0,1,0
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training,9,0,0,1,0,0,0,0,0,0


In [8]:
df[df['NF']>0].groupby('Usage').size()  # report count of images with NF votes >0 based on the dataset they belong to (private=test,public=validation)

Usage
PrivateTest     72
PublicTest      20
Training       257
dtype: int64

In [9]:
df.groupby('Usage').size()  # as we can see, they're a very low percentage with respect to the total amount, so we'll just delete them later

Usage
PrivateTest     3589
PublicTest      3589
Training       28709
dtype: int64

In [10]:
# check for rows with NaNs or null values; in this case, there are none
print(df[df.isna().any(axis=1)])
print(df[df.isnull().any(axis=1)])  

Empty DataFrame
Columns: [pixels, Usage, neutral, happiness, surprise, sadness, anger, disgust, fear, contempt, unknown, NF]
Index: []
Empty DataFrame
Columns: [pixels, Usage, neutral, happiness, surprise, sadness, anger, disgust, fear, contempt, unknown, NF]
Index: []


In [11]:
df_filtered = df[df['NF']==0]
print(df_filtered[df_filtered['NF']>0])
print(df.shape,df_filtered.shape) # the filter was successful

Empty DataFrame
Columns: [pixels, Usage, neutral, happiness, surprise, sadness, anger, disgust, fear, contempt, unknown, NF]
Index: []
(35887, 12) (35538, 12)


In [12]:
df_filtered = df_filtered.drop(columns=['NF'])

In [13]:
print(df_filtered.shape)
df_filtered.head()

(35538, 11)


Unnamed: 0,pixels,Usage,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training,4,0,0,1,3,2,0,0,0
1,151 150 147 155 148 133 111 140 170 174 182 15...,Training,6,0,1,1,0,0,0,0,2
2,231 212 156 164 174 138 161 173 182 200 106 38...,Training,5,0,0,3,1,0,0,0,1
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training,4,0,0,4,1,0,0,0,1
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training,9,0,0,1,0,0,0,0,0


In [14]:
dataset = df_filtered['pixels'].apply(lambda x: np.fromstring(x, sep=' ', dtype=int)).values  # create full dataset of pixels

In [15]:
# create image dataset, properly shaped

dataset = np.stack(dataset).reshape(-1, 48, 48, 1)

In [16]:
dataset.shape

(35538, 48, 48, 1)

In [17]:
df_filtered.head()

Unnamed: 0,pixels,Usage,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training,4,0,0,1,3,2,0,0,0
1,151 150 147 155 148 133 111 140 170 174 182 15...,Training,6,0,1,1,0,0,0,0,2
2,231 212 156 164 174 138 161 173 182 200 106 38...,Training,5,0,0,3,1,0,0,0,1
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training,4,0,0,4,1,0,0,0,1
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training,9,0,0,1,0,0,0,0,0


In [18]:
dataset_labels = df_filtered.drop(['pixels', 'Usage'], axis=1)

In [19]:
dataset_labels.shape

(35538, 9)

In [20]:
dataset_labels = dataset_labels.to_numpy()

In [21]:
dataset_labels.shape

(35538, 9)

In [22]:
df_filtered['new_id'] = np.arange(df_filtered.shape[0])

In [23]:
df_filtered.head(20)

Unnamed: 0,pixels,Usage,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown,new_id
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training,4,0,0,1,3,2,0,0,0,0
1,151 150 147 155 148 133 111 140 170 174 182 15...,Training,6,0,1,1,0,0,0,0,2,1
2,231 212 156 164 174 138 161 173 182 200 106 38...,Training,5,0,0,3,1,0,0,0,1,2
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training,4,0,0,4,1,0,0,0,1,3
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training,9,0,0,1,0,0,0,0,0,4
5,55 55 55 55 55 54 60 68 54 85 151 163 170 179 ...,Training,6,0,0,1,0,0,1,1,1,5
6,20 17 19 21 25 38 42 42 46 54 56 62 63 66 82 1...,Training,2,0,0,8,0,0,0,0,0,6
7,77 78 79 79 78 75 60 55 47 48 58 73 77 79 57 5...,Training,0,10,0,0,0,0,0,0,0,7
8,85 84 90 121 101 102 133 153 153 169 177 189 1...,Training,0,10,0,0,0,0,0,0,0,8
9,255 254 255 254 254 179 122 107 95 124 149 150...,Training,0,0,6,0,0,0,4,0,0,9


In [24]:
train_indices = df_filtered['new_id'][df_filtered['Usage'] == 'Training']
val_indices = df_filtered['new_id'][df_filtered['Usage'] == 'PublicTest']
test_indices = df_filtered['new_id'][df_filtered['Usage'] == 'PrivateTest']

In [25]:
x_train = dataset[train_indices]
y_train = dataset_labels[train_indices]
x_val = dataset[val_indices]
y_val = dataset_labels[val_indices]
x_test = dataset[test_indices]
y_test = dataset_labels[test_indices]

In [26]:
final = df_filtered.iloc[:,:11]

In [27]:
final.shape

(35538, 11)

In [28]:
final = final.rename(columns={'Usage':'usage'})

In [29]:
final['usage'] = final['usage'].replace({'Training': 'train', 'PublicTest': 'val', 'PrivateTest': 'test'})

In [30]:
final.head()

Unnamed: 0,pixels,usage,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,train,4,0,0,1,3,2,0,0,0
1,151 150 147 155 148 133 111 140 170 174 182 15...,train,6,0,1,1,0,0,0,0,2
2,231 212 156 164 174 138 161 173 182 200 106 38...,train,5,0,0,3,1,0,0,0,1
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,train,4,0,0,4,1,0,0,0,1
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,train,9,0,0,1,0,0,0,0,0


In [31]:
final.shape

(35538, 11)

In [32]:
final.iloc[:,2:] = final.iloc[:,2:]/10

1        0.6
2        0.5
3        0.4
4        0.9
        ... 
35881    0.5
35882    0.8
35884    0.0
35885    0.0
35886    0.2
Name: neutral, Length: 35538, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  final.iloc[:,2:] = final.iloc[:,2:]/10
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
35881    0.0
35882    0.0
35884    0.0
35885    1.0
35886    0.0
Name: happiness, Length: 35538, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  final.iloc[:,2:] = final.iloc[:,2:]/10
1        0.1
2        0.0
3        0.0
4        0.0
        ... 
35881    0.0
35882    0.0
35884    0.0
35885    0.0
35886    0.0
Name: surprise, Length: 35538, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  final.iloc[:,2:] = final.iloc[:,2:]/10
1        0.1
2        0.3
3        0.4
4        0.1
        ... 
35881    0.3
35882  

In [33]:
final.head()

Unnamed: 0,pixels,usage,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown
0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,train,0.4,0.0,0.0,0.1,0.3,0.2,0.0,0.0,0.0
1,151 150 147 155 148 133 111 140 170 174 182 15...,train,0.6,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.2
2,231 212 156 164 174 138 161 173 182 200 106 38...,train,0.5,0.0,0.0,0.3,0.1,0.0,0.0,0.0,0.1
3,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,train,0.4,0.0,0.0,0.4,0.1,0.0,0.0,0.0,0.1
4,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,train,0.9,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0


In [34]:
final.to_csv('fer_no_nf.csv',index=False)