## Given a collection of artworks (e.g., images of ArtEmis) for which humans have indicated an emotion, extract for each artwork a _histogram_ that indicates the human's emotional preference. 
- you will use this to train an image2emotion classifier

In [1]:
import pandas as pd
import numpy as np
from artemis.emotions import emotion_to_int

In [3]:
#
# SET YOUR PATHS.
#


#
# I use the ArtEmis dataset with _minimal_ preprocessing
# as prepared by the script preprocess_artemis_data.py --preprocess-for-deep-nets **False** (see STEP.1 at top-README)
# Note, that here you can also use the directly downloaded "artemis_dataset_release_v0.csv'" since the 
# preprocess_artemis_data.py does not change the emotions of the "raw" data.
# 

artemis_csv = '/home/optas/DATA/OUT/artemis/preprocessed_data/for_analysis/artemis_preprocessed.csv'
# or
# artemis_csv = '/home/optas/DATA/OUT/artemis/official_data/artemis_dataset_release_v0.csv'


save_file = '../../data/image-emotion-histogram.csv'   # where to save the result.

In [5]:
df = pd.read_csv(artemis_csv)
print(len(df))
print(df.columns)
u_emo = df.emotion.unique()

print('\nUnique Emotions:', u_emo)
n_emotions = len(u_emo)

454684
Index(['art_style', 'painting', 'emotion', 'utterance', 'repetition', 'split',
       'tokens', 'tokens_len', 'utterance_spelled', 'tokens_encoded',
       'emotion_label'],
      dtype='object')

Unique Emotions: ['something else' 'sadness' 'contentment' 'awe' 'amusement' 'excitement'
 'fear' 'disgust' 'anger']


In [6]:
df['emotion_label'] = df.emotion.apply(emotion_to_int)

In [7]:
def collect_image_distribution(g):
    """ Apply to each pandas group:g (artwork) to extract an *unormalized* distribution of the emotions indicated.
    """
    image_distribution = np.zeros(n_emotions, dtype=np.float32)
    for l in g.emotion_label:
        image_distribution[l] += 1
    return image_distribution

In [8]:
image_groups = df.groupby(['art_style', 'painting'])  # each group is now a unique artwork
image_distibutions = image_groups.apply(collect_image_distribution)

In [9]:
# assert each image has at least 5 (human) votes!
x = image_distibutions.apply(sum)
assert all(x.values >= 5)

In [10]:
data = []
for row in image_distibutions.items():
    style = row[0][0]
    name = row[0][1]
    dist = row[1]
    data.append([style, name, dist.tolist()])    
data = pd.DataFrame(data, columns=['art_style', 'painting',  'emotion_histogram'])

In [9]:
data.head()

Unnamed: 0,art_style,painting,emotion_histogram
0,Abstract_Expressionism,aaron-siskind_acolman-1-1955,"[1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0]"
1,Abstract_Expressionism,aaron-siskind_chicago-1951,"[0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0]"
2,Abstract_Expressionism,aaron-siskind_chicago-6-1961,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0]"
3,Abstract_Expressionism,aaron-siskind_feet-102-1957,"[0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0]"
4,Abstract_Expressionism,aaron-siskind_gloucester-16a-1944,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0]"


In [13]:
# Quick check of third row above.
mask = (df.art_style == 'Abstract_Expressionism') &  (df.painting == 'aaron-siskind_feet-102-1957')
df[mask]['emotion_label']

108425    8
108426    5
108427    2
108428    5
227077    8
Name: emotion_label, dtype: int64

In [11]:
data.to_csv(save_file, index=False)

In [12]:
## OK now you go and run the next notebook to use this histograms to train an Image-2-Emotion classifier!