# Prepare FER2013+ dataset

## Initializing and importing libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import packages.
import zipfile
import pandas as pd
import shutil

## Process and save dataset in CSV file

In [None]:
# Import, unzip and arrange folder with dataset.
zip_ref = zipfile.ZipFile('/content/drive/MyDrive/TFG_FER/Datasets_and_packages/FERPlus-master.zip', 'r')
zip_ref.extractall() # Extracts de files into /content.
zip_ref.close()

In [None]:
# Read the new FER2013+ labels file.
df = pd.read_csv('/content/FERPlus-master/fer2013new.csv')
df

Unnamed: 0,Usage,Image name,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown,NF
0,Training,fer0000000.png,4,0,0,1,3,2,0,0,0,0
1,Training,fer0000001.png,6,0,1,1,0,0,0,0,2,0
2,Training,fer0000002.png,5,0,0,3,1,0,0,0,1,0
3,Training,fer0000003.png,4,0,0,4,1,0,0,0,1,0
4,Training,fer0000004.png,9,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
35882,PrivateTest,fer0035797.png,8,0,0,2,0,0,0,0,0,0
35883,PrivateTest,,0,0,0,0,0,0,0,0,0,10
35884,PrivateTest,fer0035799.png,0,0,0,0,7,1,0,2,0,0
35885,PrivateTest,fer0035800.png,0,10,0,0,0,0,0,0,0,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35887 entries, 0 to 35886
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Usage       35887 non-null  object
 1   Image name  35714 non-null  object
 2   neutral     35887 non-null  int64 
 3   happiness   35887 non-null  int64 
 4   surprise    35887 non-null  int64 
 5   sadness     35887 non-null  int64 
 6   anger       35887 non-null  int64 
 7   disgust     35887 non-null  int64 
 8   fear        35887 non-null  int64 
 9   contempt    35887 non-null  int64 
 10  unknown     35887 non-null  int64 
 11  NF          35887 non-null  int64 
dtypes: int64(10), object(2)
memory usage: 3.3+ MB


In [None]:
# Define a function to determine the label based on the highest value in the last 10 columns.
def get_label(row):
    max_val = row[['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown', 'NF']].max()
    if max_val == row['neutral']:
        return 6
    elif max_val == row['happiness']:
        return 3
    elif max_val == row['surprise']:
        return 5
    elif max_val == row['sadness']:
        return 4
    elif max_val == row['anger']:
        return 0
    elif max_val == row['disgust']:
        return 1
    elif max_val == row['fear']:
        return 2
    elif max_val == row['contempt']:
        return 7
    else:
        return 8

In [None]:
# Apply the get_label function to each row in the dataframe to create a new 'label' column.
df['label'] = df.apply(get_label, axis=1)

In [None]:
df['label']

0        6
1        6
2        6
3        6
4        6
        ..
35882    6
35883    8
35884    0
35885    3
35886    4
Name: label, Length: 35887, dtype: int64

In [None]:
# Create a new dataframe with only the first 2 columns and the new 'label' column.
new_df = df[['Usage', 'Image name', 'label']]
new_df

Unnamed: 0,Usage,Image name,label
0,Training,fer0000000.png,6
1,Training,fer0000001.png,6
2,Training,fer0000002.png,6
3,Training,fer0000003.png,6
4,Training,fer0000004.png,6
...,...,...,...
35882,PrivateTest,fer0035797.png,6
35883,PrivateTest,,8
35884,PrivateTest,fer0035799.png,0
35885,PrivateTest,fer0035800.png,3


## Check that new dataset is correct

In [None]:
# Print number of images in dataset.
print("Number of images:", len(new_df))

Number of images: 35887


In [None]:
# Count the number of occurrences of each usage label.
counts = new_df["Usage"].value_counts()

# Print the results.
print("Number of training images:", counts["Training"])
print("Number of validation images:", counts["PublicTest"])
print("Number of test images:", counts["PrivateTest"])

Number of training images: 28709
Number of validation images: 3589
Number of test images: 3589


In [None]:
# Group the data by emotion and usage label, and count the number of images in each group.
counts = new_df.groupby(["label", "Usage"]).size()

# Print the results.
for emotion in range(9):
    print("Emotion:", emotion)
    print("Training:", counts[emotion, "Training"])
    print("Validation:", counts[emotion, "PublicTest"])
    print("Test:", counts[emotion, "PrivateTest"])
    print("-------------")

Emotion: 0
Training: 2467
Validation: 319
Emotion: 1
Training: 191
Validation: 34
Emotion: 2
Training: 652
Validation: 74
Emotion: 3
Training: 7528
Validation: 899
Emotion: 4
Training: 3515
Validation: 412
Emotion: 5
Training: 3562
Validation: 456
Emotion: 6
Training: 10309
Validation: 1335
Emotion: 7
Training: 165
Validation: 24
Emotion: 8
Training: 320
Validation: 36


## Update labels of FER2013 dataset

In [None]:
# Read the old FER2013 dataset.
df_old = pd.read_csv('/content/drive/MyDrive/TFG_FER/Datasets_and_packages/fer2013.csv')
df_old

Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training
...,...,...,...
35882,6,50 36 17 22 23 29 33 39 34 37 37 37 39 43 48 5...,PrivateTest
35883,3,178 174 172 173 181 188 191 194 196 199 200 20...,PrivateTest
35884,0,17 17 16 23 28 22 19 17 25 26 20 24 31 19 27 9...,PrivateTest
35885,3,30 28 28 29 31 30 42 68 79 81 77 67 67 71 63 6...,PrivateTest


In [None]:
# Change old emotional labels for new ones.
df_old['emotion'] = new_df["label"]

In [None]:
df_updated = df_old
df_updated

Unnamed: 0,emotion,pixels,Usage
0,6,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,6,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,6,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,6,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training
...,...,...,...
35882,6,50 36 17 22 23 29 33 39 34 37 37 37 39 43 48 5...,PrivateTest
35883,8,178 174 172 173 181 188 191 194 196 199 200 20...,PrivateTest
35884,0,17 17 16 23 28 22 19 17 25 26 20 24 31 19 27 9...,PrivateTest
35885,3,30 28 28 29 31 30 42 68 79 81 77 67 67 71 63 6...,PrivateTest


In [None]:
# Filter out rows with emotion values 7 and 8.
df_filtered = df_updated[(df_updated['emotion'] < 7)]
df_filtered

Unnamed: 0,emotion,pixels,Usage
0,6,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,6,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,6,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,6,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training
...,...,...,...
35881,6,181 177 176 156 178 144 136 132 122 107 131 16...,PrivateTest
35882,6,50 36 17 22 23 29 33 39 34 37 37 37 39 43 48 5...,PrivateTest
35884,0,17 17 16 23 28 22 19 17 25 26 20 24 31 19 27 9...,PrivateTest
35885,3,30 28 28 29 31 30 42 68 79 81 77 67 67 71 63 6...,PrivateTest


In [None]:
# Print number of images in dataset.
print("Number of images:", len(df_filtered))

Number of images: 35272


In [None]:
# Count the number of occurrences of each usage label.
counts = df_filtered["Usage"].value_counts()

# Print the results.
print("Number of training images:", counts["Training"])
print("Number of validation images:", counts["PublicTest"])
print("Number of test images:", counts["PrivateTest"])

Number of training images: 28224
Number of validation images: 3529
Number of test images: 3519


In [None]:
# Group the data by emotion and usage label, and count the number of images in each group.
counts = df_filtered.groupby(["emotion", "Usage"]).size()

# Print the results.
for emotion in range(7):
    print("Emotion:", emotion)
    print("Training:", counts[emotion, "Training"])
    print("Validation:", counts[emotion, "PublicTest"])
    print("Test:", counts[emotion, "PrivateTest"])
    print("-------------")

Emotion: 0
Training: 2467
Validation: 319
Emotion: 1
Training: 191
Validation: 34
Emotion: 2
Training: 652
Validation: 74
Emotion: 3
Training: 7528
Validation: 899
Emotion: 4
Training: 3515
Validation: 412
Emotion: 5
Training: 3562
Validation: 456
Emotion: 6
Training: 10309
Validation: 1335


## Save dataset file to Drive

In [None]:
# Write the dataframe containing fer2013 with all the labels.
df_updated.to_csv('fer2013plus_all.csv', index=False)

# Write the dataframe containing fer2013 without "contempt", "unknown" and "NP".
df_filtered.to_csv('fer2013plus.csv', index=False)

In [None]:
source_file_path = "/content/fer2013plus.csv"
destination_folder_path = "/content/drive/MyDrive/TFG_FER/Datasets_and_packages/"

# Move the file from the source folder to the destination folder.
shutil.move(source_file_path, destination_folder_path)

'/content/drive/MyDrive/TFG_FER/Datasets_and_packages/fer2013plus.csv'

In [None]:
source_file_path = "/content/fer2013plus_all.csv"
destination_folder_path = "/content/drive/MyDrive/TFG_FER/Datasets_and_packages/"

# Move the file from the source folder to the destination folder.
shutil.move(source_file_path, destination_folder_path)

'/content/drive/MyDrive/TFG_FER/Datasets_and_packages/fer2013plus_all.csv'