In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING =  'emotion-recognition-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4123094%2F7143293%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240417%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240417T114636Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dd0e7c94170770f883e14da51699acc43c5601844a3349157d6ba40d6976d2377df269833d351d7a5dc80e329c8e7e5b3cf5cae53d93a501a95758a4d6758aba66847fd41d3dcd932427cb20d6d6ca054c92154802b69572f0d69adb4f4d0444f97cd3c8fe28619c851ae6ce69516a25e639cc226f86d8fa300533e5e957d1b9a76753cda8479a93171caa492a2e6dfd72741618fa3a77dce471949d781250f3db640fbb77e39c8aee30c50e97376f6682be22b3e6b46300a0cac48090577273f5fa44e8e85c9ad86356d84fb6a5a2519abc1f7a9a38a72efdbd0b62aa784d1d399f92a1d9586765bf0b85f74ee80657595cf975d908ec3a31eacdca0f9efadcb'
KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading emotion-recognition-dataset, 2126237709 bytes compressed
Downloaded and uncompressed: emotion-recognition-dataset
Data source import complete.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# This part of the code has been adapted from https://www.kaggle.com/code/krishnasrivaibhav/emotion-recognition-resnet152-architecture

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.264843~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.260698~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/1d2d1d77af5a1db07d9ecc3073b29788423a035c4a21498b9a23b781~12fffff.jpg
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.414866~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.265474~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.414906~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.100210~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.264945~12fffff.png
/kaggle/input/emotion-recognition-dataset/dataset/Surprise/cropped_emotions.414911~12fffff.png
/kaggle/input/emotion-recognition-dataset/datas

In [None]:
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense,BatchNormalization,Dropout,Flatten,MaxPooling2D,Conv2D,Activation
from sklearn.preprocessing import LabelEncoder,OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from PIL import Image
from sklearn.metrics import confusion_matrix,classification_report
import os
import glob

In [None]:
encoder=OneHotEncoder() # Adapted from https://www.kaggle.com/code/krishnasrivaibhav/emotion-recognition-resnet152-architecture
encoder.fit([[0],[1],[2],[3],[4],[5]])

# 0 = Ahegao
# 1 = Angry
# 2 = Happy
# 3 = Neutral
# 4 = Sad
# 5 = Surprise

In [None]:
data=[]
paths_ahegao=[]
result=[]

for r,d,f in os.walk('/kaggle/input/emotion-recognition-dataset/dataset/Ahegao'):
    for file in f:
        if '.jpg' or '.png' in file:
            paths_ahegao.append(os.path.join(r,file))
for path in paths_ahegao:
    img=Image.open(path)
    img=img.resize((128,128))
    img=np.array(img)
    if (img.shape==(128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[0]]).toarray())


In [None]:
paths_angry=[]
for r,d,f in os.walk('/kaggle/input/emotion-recognition-dataset/dataset/Angry'):
    for file in f:
        if '.jpg' or '.png' in file:
            paths_angry.append(os.path.join(r,file))
for path in paths_angry:
    img=Image.open(path)
    img=img.resize((128,128))
    img=np.array(img)
    if (img.shape==(128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[1]]).toarray())

In [None]:
paths_happy=[]
for r,d,f in os.walk('/kaggle/input/emotion-recognition-dataset/dataset/Happy'):
    for file in f:
        if '.jpg' or '.png' in file:
            paths_happy.append(os.path.join(r,file))
for path in paths_happy:
    img=Image.open(path)
    img=img.resize((128,128))
    img=np.array(img)
    if (img.shape==(128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[2]]).toarray())

In [None]:
paths_neutral=[]
for r,d,f in os.walk('/kaggle/input/emotion-recognition-dataset/dataset/Neutral'):
    for file in f:
        if '.jpg' or '.png' in file:
            paths_neutral.append(os.path.join(r,file))
for path in paths_neutral:
    img=Image.open(path)
    img=img.resize((128,128))
    img=np.array(img)
    if (img.shape==(128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[3]]).toarray())

In [None]:
paths_sad=[]
for r,d,f in os.walk('/kaggle/input/emotion-recognition-dataset/dataset/Sad'):
    for file in f:
        if '.jpg' or '.png' in file:
            paths_sad.append(os.path.join(r,file))
for path in paths_sad:
    img=Image.open(path)
    img=img.resize((128,128))
    img=np.array(img)
    if (img.shape==(128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[4]]).toarray())

In [None]:
paths_surprise=[]
for r,d,f in os.walk('/kaggle/input/emotion-recognition-dataset/dataset/Surprise'):
    for file in f:
        if '.jpg' or '.png' in file:
            paths_surprise.append(os.path.join(r,file))
for path in paths_surprise:
    img=Image.open(path)
    img=img.resize((128,128))
    img=np.array(img)
    if (img.shape==(128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[5]]).toarray())

In [None]:
data=np.array(data)
data.shape

(15453, 128, 128, 3)

In [None]:
result=np.array(result)
result=result.reshape(15453,6)

In [None]:
# Splitting data between the training, validation and test sets
x_train,x_test,y_train,y_test=train_test_split(data,result,test_size=0.2,shuffle=True,random_state=0)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, shuffle = True, random_state=0)

# Size of each x and y of the training, validation and test sets
print("x_train size:", x_train.shape)
print("y_train size:", y_train.shape)
print("x_test size:", x_test.shape)
print("y_test size:", x_test.shape)
print("x_val size: ", x_val.shape)
print("y_val size: ", y_val.shape, "\n")

# TRAINING SET

# 0 = Ahegao
ahegao_count = 0
# 1 = Angry
angry_count = 0
# 2 = Happy
happy_count = 0
# 3 = Neutral
neutral_count = 0
# 4 = Sad
sad_count = 0
# 5 = Surprise
surprise_count = 0

for array in y_train:
  #print(array, array[0])
  if array[0] == 1:
    ahegao_count += 1
  elif array[1] == 1:
    angry_count += 1
  elif array[2] == 1:
    happy_count += 1
  elif array[3] == 1:
    neutral_count += 1
  elif array[4] == 1:
    sad_count += 1
  elif array[5] == 1:
    surprise_count += 1

print("Training set")
print("Ahegao: ", ahegao_count, "images")
print("Angry: ", angry_count, "images")
print("Happy: ", happy_count, "images")
print("Neutral: ", neutral_count, "images")
print("Sad: ", sad_count, "images")
print("Surprise: ", surprise_count, "images\n")


# VALIDATION SET

# 0 = Ahegao
ahegao_count = 0
# 1 = Angry
angry_count = 0
# 2 = Happy
happy_count = 0
# 3 = Neutral
neutral_count = 0
# 4 = Sad
sad_count = 0
# 5 = Surprise
surprise_count = 0

for array in y_val:
  #print(array, array[0])
  if array[0] == 1:
    ahegao_count += 1
  elif array[1] == 1:
    angry_count += 1
  elif array[2] == 1:
    happy_count += 1
  elif array[3] == 1:
    neutral_count += 1
  elif array[4] == 1:
    sad_count += 1
  elif array[5] == 1:
    surprise_count += 1

print("Validation set")
print("Ahegao: ", ahegao_count, "images")
print("Angry: ", angry_count, "images")
print("Happy: ", happy_count, "images")
print("Neutral: ", neutral_count, "images")
print("Sad: ", sad_count, "images")
print("Surprise: ", surprise_count, "images\n")


# TEST SET

# 0 = Ahegao
ahegao_count = 0
# 1 = Angry
angry_count = 0
# 2 = Happy
happy_count = 0
# 3 = Neutral
neutral_count = 0
# 4 = Sad
sad_count = 0
# 5 = Surprise
surprise_count = 0

for array in y_test:
  #print(array, array[0])
  if array[0] == 1:
    ahegao_count += 1
  elif array[1] == 1:
    angry_count += 1
  elif array[2] == 1:
    happy_count += 1
  elif array[3] == 1:
    neutral_count += 1
  elif array[4] == 1:
    sad_count += 1
  elif array[5] == 1:
    surprise_count += 1

print("Test set")
print("Ahegao: ", ahegao_count, "images")
print("Angry: ", angry_count, "images")
print("Happy: ", happy_count, "images")
print("Neutral: ", neutral_count, "images")
print("Sad: ", sad_count, "images")
print("Surprise: ", surprise_count, "images\n")




x_train size: (9889, 128, 128, 3)
y_train size: (9889, 6)
x_test size: (3091, 128, 128, 3)
y_test size: (3091, 128, 128, 3)
x_val size:  (2473, 128, 128, 3)
y_val size:  (2473, 6) 

Training set
Ahegao:  760 images
Angry:  850 images
Happy:  2401 images
Neutral:  2553 images
Sad:  2529 images
Surprise:  796 images

Validation set
Ahegao:  205 images
Angry:  211 images
Happy:  598 images
Neutral:  659 images
Sad:  606 images
Surprise:  194 images

Test set
Ahegao:  240 images
Angry:  252 images
Happy:  741 images
Neutral:  815 images
Sad:  799 images
Surprise:  244 images



In [None]:
import pickle as pkl

# Saving unnormalized training, validation and test sets to pickle files

# Saving training set
with open("train.pkl", "wb") as f:
    pkl.dump([x_train, y_train], f)
    print("training set exported")

# Saving validation set
with open("validation.pkl", "wb") as f:
    pkl.dump([x_val, y_val], f)
    print("validation set exported")

# Saving training set
with open("test.pkl", "wb") as f:
    pkl.dump([x_test, y_test], f)
    print("test set exported")

#to load it
"""
with open("train.pkl", "r") as f:
    train_x, train_y = pkl.load(f)
"""


training set exported
validation set exported
test set exported


'\nwith open("train.pkl", "r") as f:\n    train_x, train_y = pkl.load(f)\n'

In [None]:

# Normalized sets
x_train_reshaped = x_train.reshape(x_train.shape[0], -1)
x_val_reshaped = x_val.reshape(x_val.shape[0], -1)
x_test_reshaped = x_test.reshape(x_test.shape[0], -1)

print(x_train_reshaped.shape)
print(x_val_reshaped.shape)
print(x_test_reshaped.shape)

scaler = MinMaxScaler()
x_train_normalized = scaler.fit_transform(x_train_reshaped)
x_val_normalized = scaler.transform(x_val_reshaped)
x_test_normalized = scaler.transform(x_test_reshaped)


(9889, 49152)
(2473, 49152)
(3091, 49152)


'\n# Saving normalized training set\nwith open("train_normalized.pkl", "wb") as f:\n    pkl.dump([x_train_normalized, y_train], f)\n    print("training set exported")\n\n# Saving normalized validation set\nwith open("validation_normalized.pkl", "wb") as f:\n    pkl.dump([x_val_normalized, y_val], f)\n    print("validation set exported")\n\n# Saving normalized test set\nwith open("test_normalized.pkl", "wb") as f:\n    pkl.dump([x_test_normalized, y_test], f)\n    print("test set exported")\n'

In [1]:
# Reshape the normalized data back to its original shape
x_train_normalized = x_train_normalized.reshape(x_train.shape)
x_val_normalized = x_val_normalized.reshape(x_val.shape)
x_test_normalized = x_test_normalized.reshape(x_test.shape)

NameError: name 'x_train_normalized' is not defined

In [None]:
# Saving normalized training set
with open("train_normalized.pkl", "wb") as f:
    pkl.dump([x_train_normalized, y_train], f)
    print("training set exported")

# Saving normalized validation set
with open("validation_normalized.pkl", "wb") as f:
    pkl.dump([x_val_normalized, y_val], f)
    print("validation set exported")

# Saving normalized test set
with open("test_normalized.pkl", "wb") as f:
    pkl.dump([x_test_normalized, y_test], f)
    print("test set exported")

In [None]:
# Loading pickle sets

# training set
with open("train.pkl", "rb") as f:
    x_train, y_train = pkl.load(f)
    print("training set loaded")

# Validation set
with open("validation.pkl", "rb") as f:
    x_val, y_val = pkl.load(f)
    print("validation set loaded")

# testing set
with open("test.pkl", "rb") as f:
    x_test, y_test = pkl.load(f)
    print("testing set loaded")

