This notebook creates our own dataset from the dataset listed here http://benedekkurdi.com/oasis.php.

To run the notebook, download the original dataset and place this notebook in the root folder. Also create a folder called "img" in the root folder.

In [3]:
import pandas as pd
from shutil import copy
import cv2

In [4]:
legend = pd.read_csv("OASIS_bygender_CORRECTED_092617.csv")
legend["avg_valence"] = (legend["Valence_mean_men"] + legend["Valence_mean_women"]) / 2
legend["avg_arousal"] = (legend["Arousal_mean_men"] + legend["Arousal_mean_women"]) / 2

# Happy

In [5]:
happy = legend[legend["avg_valence"] >= 5.7].copy()
happy = happy.drop(happy[(happy["Category"] == 3) & (happy["avg_arousal"] > 4)].index)
happy["emotion"] = "happy"

# Neutral

In [6]:
neutral = legend[(legend["avg_valence"] >= 3.8) & (legend["avg_valence"] <= 4.3)].copy()
neutral["emotion"] = "neutral"

# Sad

In [7]:
sad = legend[(legend["avg_valence"] >= 2) & (legend["avg_valence"] <= 3.2)].copy()
sad["emotion"] = "sad"

In [8]:
data = pd.concat([happy,neutral,sad], axis=0)

In [9]:
data["Theme"] = data["Theme"].str.strip()

In [10]:
data.head()

Unnamed: 0.1,Unnamed: 0,Theme,Category,Source,Valence_mean_men,Valence_SD_men,Valence_N_men,Valence_mean_women,Valence_SD_women,Valence_N_women,Arousal_mean_men,Arousal_SD_men,Arousal_N_men,Arousal_mean_women,Arousal_SD_women,Arousal_N_women,avg_valence,avg_arousal,emotion
32,I33,Baby 2,3,Pixabay,5.789474,1.081283,57,6.294118,0.96528,51,3.367347,1.764156,49,3.927273,1.874335,55,6.041796,3.64731,happy
35,I36,Baby 5,3,Pixabay,5.859649,1.14078,57,6.45098,0.878948,51,3.510204,1.827139,49,3.909091,1.828506,55,6.155315,3.709647,happy
36,I37,Baby 6,3,Pixabay,5.807018,1.059626,57,6.313725,0.836426,51,3.428571,1.707825,49,3.854545,1.809626,55,6.060372,3.641558,happy
38,I39,Baby 8,3,Pixabay,5.859649,1.201764,57,6.372549,0.958348,51,3.632653,1.867417,49,4.181818,1.816498,55,6.116099,3.907236,happy
39,I40,Baby 9,3,Pixabay,5.622222,1.248433,45,6.017857,1.183079,56,3.367347,1.728365,49,3.903846,1.659903,52,5.82004,3.635597,happy


# Manual processing to remove unwanted images

In [11]:
to_remove = []

for emotion in data.emotion.unique():
    for path in data[data["emotion"] == emotion]["Theme"]:
        imgpath = "Images/" + path + ".jpg"
        frame = cv2.imread(imgpath, 1)
        cv2.imshow('Window', frame)

        key = cv2.waitKey(0)
        if key == 97: # press "a" to remove images
            to_remove.append(path)
        if key == 100: # press "d" to select image
            continue
            
cv2.destroyAllWindows()

In [17]:
for r in to_remove:  
    data = data.drop(data[(data["Theme"] == r)].index)

In [26]:
data = data[["Theme", "emotion", "avg_valence", "avg_arousal"]]

In [27]:
data

Unnamed: 0,Theme,emotion,avg_valence,avg_arousal
32,Baby 2,happy,6.041796,3.647310
35,Baby 5,happy,6.155315,3.709647
36,Baby 6,happy,6.060372,3.641558
38,Baby 8,happy,6.116099,3.907236
39,Baby 9,happy,5.820040,3.635597
...,...,...,...,...
864,War 2,sad,2.323777,4.588179
866,War 4,sad,3.004167,4.774922
869,War 7,sad,3.137129,4.248941
872,Weapon 1,sad,2.820949,3.799072


In [19]:
data.to_csv("legend.csv")

In [21]:
for img in data["Theme"]:
    imgpath = "Images/" + img + ".jpg"
    copy(imgpath, "img")

In [22]:
data[data["emotion"] == "happy"].shape

(99, 19)

In [23]:
data[data["emotion"] == "neutral"].shape

(104, 19)

In [24]:
data[data["emotion"] == "sad"].shape

(117, 19)