In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import sys
import os
import shutil
import random
# import glob

In [2]:
df=pd.read_csv('data/legend.csv', low_memory=False)
df

Unnamed: 0,user.id,image,emotion
0,628,facial-expressions_2868588k.jpg,anger
1,628,facial-expressions_2868585k.jpg,surprise
2,628,facial-expressions_2868584k.jpg,disgust
3,628,facial-expressions_2868582k.jpg,fear
4,dwdii,Aaron_Eckhart_0001.jpg,neutral
...,...,...,...
13685,jhamski,SharmilaTagore_80.jpg,HAPPINESS
13686,jhamski,SharmilaTagore_81.jpg,HAPPINESS
13687,jhamski,SharmilaTagore_82.jpg,HAPPINESS
13688,jhamski,SharmilaTagore_83.jpg,HAPPINESS


In [3]:
# List of duplicate entries
df[df.duplicated(subset='image', keep=False) == True]

Unnamed: 0,user.id,image,emotion
7270,dwdii,Kimberly_Bruckner_0001.jpg,happiness
7271,dwdii,Kimberly_Stewart_0001.jpg,happiness
7272,dwdii,Kimi_Raikkonen_0001.jpg,neutral
7273,dwdii,Kimi_Raikkonen_0002.jpg,neutral
7274,dwdii,Kimi_Raikkonen_0003.jpg,neutral
7275,dwdii,Kimora_Lee_0001.jpg,happiness
7328,JN721,Kimberly_Bruckner_0001.jpg,happiness
7329,JN721,Kimberly_Stewart_0001.jpg,happiness
7330,JN721,Kimi_Raikkonen_0001.jpg,neutral
7331,JN721,Kimi_Raikkonen_0002.jpg,neutral


In [4]:
# Remove all duplicates
df.drop_duplicates(subset='image', inplace=True, ignore_index=True)

# Check to see if there are any duplicate left
df[df.duplicated(subset='image', keep=False) == True]

Unnamed: 0,user.id,image,emotion


In [5]:
# Drop user id column
df.drop(columns='user.id', inplace=True)

# Change all values in emotion column to lowercase
df.emotion = df.emotion.str.lower()
df.emotion.unique()

array(['anger', 'surprise', 'disgust', 'fear', 'neutral', 'happiness',
       'sadness', 'contempt'], dtype=object)

In [6]:
emotions = list(df.emotion.unique())
emotions

['anger',
 'surprise',
 'disgust',
 'fear',
 'neutral',
 'happiness',
 'sadness',
 'contempt']

In [8]:
os.chdir(f'images')

for emotion in emotions:
  if os.path.isdir(f'train/{emotion}') is False:
    os.makedirs(f'train/{emotion}')
    os.makedirs(f'validate/{emotion}')
    os.makedirs(f'test/{emotion}')
    print('Success')
  else:
    print(f'Directories already created for {emotion}')

Success
Success
Success
Success
Success
Success
Success
Success


In [17]:
emotions_dict = {
    'anger' : [],
    'surprise' : [],
    'disgust' : [],
    'fear' : [],
    'neutral' : [],
    'happiness' : [],
    'sadness' : [],
    'contempt' : []
}

In [18]:
# Add image names from legend.csv to the appropriate emotion list
# These lists will be used later to get random sample for train data, validation data, and test data

for idx, row in df.iterrows():
  if row['emotion'] in emotions_dict:
    emotions_dict[row['emotion']].append(row['image'])

In [19]:
# sanity check - make sure all records in the legend.csv are added to lists
sum = 0

for emotion, image_list in emotions_dict.items():
  print(f'{emotion}: {len(image_list)}')
  sum += len(image_list)

print(f'\nTotal records: {sum}')
print(len(df))

anger: 252
surprise: 368
disgust: 208
fear: 21
neutral: 6864
happiness: 5693
sadness: 267
contempt: 9

Total records: 13682
13682


In [20]:
'''
Move images to train, validate, test folders to its appropriate emotion folders
  88% in train folders
  7% in validate folders
  5% in test folders
'''

for emotion, image_list in emotions_dict.items():
  count = len(image_list)
  traint_cnt = round(count * .88)
  validate_cnt = round(count *  .07)
  test_cnt = count - traint_cnt - validate_cnt

  for i in random.sample(image_list, traint_cnt):
    if os.path.exists(f'train/{emotion}/{i}') is False:
      shutil.move(i, f'train/{emotion}')
    image_list.remove(i)

  for i in random.sample(image_list, validate_cnt):
    if os.path.exists(f'validate/{emotion}/{i}') is False:
      shutil.move(i, f'validate/{emotion}')
    image_list.remove(i)

  for i in random.sample(image_list, test_cnt):
    if os.path.exists(f'test/{emotion}/{i}') is False:
      shutil.move(i, f'test/{emotion}')
    image_list.remove(i)