In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data_df = pd.read_csv('./data/NAPS_valence_arousal_2014.csv', sep=';')
data_df.head()

Unnamed: 0,ID,Category,Nr,V_H,Description,Valence,Arousal
0,Animals_001_h,Animals,1,h,Dead Stork,257,644
1,Animals_002_v,Animals,2,v,Lion,624,668
2,Animals_003_h,Animals,3,h,Snake,524,552
3,Animals_004_v,Animals,4,v,Wolf,450,702
4,Animals_005_h,Animals,5,h,Bat,531,582


In [4]:
data_df['Valence_float'] = data_df['Valence'].map(lambda x: float(x.replace(',','.')))
data_df['Arousal_float'] = data_df['Arousal'].map(lambda x: float(x.replace(',','.')))

data_df.describe()

Unnamed: 0,Nr,Valence_float,Arousal_float
count,1356.0,1356.0,1356.0
mean,144.870944,5.391947,5.101881
std,92.42842,1.628732,1.058333
min,1.0,1.33,2.04
25%,68.0,4.06,4.43
50%,136.0,5.66,4.98
75%,208.25,6.7725,5.82
max,372.0,8.54,8.05


In [5]:
def mapping(x, labels, series, percentile_width):
        for i in range(len(labels)):
            if x < series.quantile((i + 1) * percentile_width):
                return labels[i]

def divide_into(labels, series):
    range = series.max() - series.min()
    percentile_width = 1 / len(labels)
    
    return series.map(lambda x: mapping(x, labels, series, percentile_width))

data_df['Valence_class'] = divide_into(['low', 'medium', 'high'], data_df['Valence_float'])
data_df['Arousal_class'] = divide_into(['low', 'medium', 'high'], data_df['Arousal_float'])

data_df.head(10)

Unnamed: 0,ID,Category,Nr,V_H,Description,Valence,Arousal,Valence_float,Arousal_float,Valence_class,Arousal_class
0,Animals_001_h,Animals,1,h,Dead Stork,257,644,2.57,6.44,low,high
1,Animals_002_v,Animals,2,v,Lion,624,668,6.24,6.68,medium,high
2,Animals_003_h,Animals,3,h,Snake,524,552,5.24,5.52,medium,medium
3,Animals_004_v,Animals,4,v,Wolf,450,702,4.5,7.02,low,high
4,Animals_005_h,Animals,5,h,Bat,531,582,5.31,5.82,medium,high
5,Animals_006_v,Animals,6,v,Snake,513,623,5.13,6.23,medium,high
6,Animals_007_h,Animals,7,h,Wolf,476,706,4.76,7.06,medium,high
7,Animals_008_v,Animals,8,v,Fighting Chickens,263,680,2.63,6.8,low,high
8,Animals_009_v,Animals,9,v,Cat,579,561,5.79,5.61,medium,high
9,Animals_010_h,Animals,10,h,Sick Kitten,459,590,4.59,5.9,low,high


In [15]:
classification_data = data_df.filter(items=['ID', 'Valence_class', 'Arousal_class'])
classification_data.head()

Unnamed: 0,ID,Valence_class,Arousal_class
0,Animals_001_h,low,high
1,Animals_002_v,medium,high
2,Animals_003_h,medium,medium
3,Animals_004_v,low,high
4,Animals_005_h,medium,high


In [113]:
from os import listdir
import time
import datetime
import re
data_base_dir = './data/2018-afcai-spring/'

def read_filenames_regex(regex, dir):
    return list(filter(lambda filename: re.match(regex, filename), listdir(dir)))
    

    
def read_showed_images_file(dir_name):
    dir = data_base_dir + dir_name + '/'
    ts_filename = read_filenames_regex('.*timestamp.csv', dir)
    all_ts_file = pd.read_csv(dir + ts_filename[0], names=['filename', 'datetime'])
    without_trail = all_ts_file.loc[all_ts_file.iloc[:,0].str.contains(r'^((?!trail).)*$')]
    without_trail['id'] = without_trail['filename'].map(lambda x: re.search('[a-zA-Z]*_[0-9]*_[a-z]', x).group(0))
    without_trail['timestamp'] = without_trail['datetime'].map(lambda dt: int(datetime.datetime.strptime(dt, "%Y-%m-%d %H:%M:%S.%f").timestamp()*1000))
    return without_trail.filter(items=['id', 'timestamp'])
    
    
read_showed_images_file('B303').head()

  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,id,timestamp
6,Landscapes_097_v,1520846027691
7,Landscapes_049_h,1520846038694
8,Objects_160_h,1520846049703
9,Animals_056_h,1520846060662
10,People_038_h,1520846071673


In [139]:
def read_bitalino_bpm(dir_name):
    dir = data_base_dir + dir_name + '/BITalino/'
    bpm_file = dir + read_filenames_regex('.*BPM.*', dir)[0]
    return pd.read_csv(bpm_file)

def read_bitalino_gsr(dir_name):
    dir = data_base_dir + dir_name + '/BITalino/'
    bpm_file = dir + read_filenames_regex('.*GSR.*', dir)[0]
    return pd.read_csv(bpm_file)

def read_person_data_from(dir_name, process_function):
    if dir_name[0] == 'B':
        gsr = process_function(read_bitalino_gsr(dir_name))
        bpm = process_function(read_bitalino_bpm(dir_name))
    showed_images = read_showed_images_file(dir_name)
    return {'images': showed_images, 'bpm': bpm, 'gsr': gsr}
    
def mean_for(millis, ts_val_df):
    ts_val_df['timestamp'] = ts_val_df['timestamp'].map(lambda x: int(x/1000))
    return ts_val_df
    
def one_sec_mean(ts_val_df):
    return mean_for(1000, ts_val_df)

identity = lambda x: x
    
    
person_data = read_person_data_from('B303', identity)
person_data['bpm'].head()

  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,timestamp,value
0,1520845822529,528.0
1,1520845822530,525.0
2,1520845822531,520.0
3,1520845822532,514.0
4,1520845822533,508.0


In [134]:
bitalino_dirs = read_filenames_regex('B.*', data_base_dir)
bitalino_data = list(map(lambda x: read_person_data_from(x) ,bitalino_dirs))

  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [135]:
bitalino_data[0]

{'bpm':             timestamp   value
 0       1520845822529   528.0
 1       1520845822530   525.0
 2       1520845822531   520.0
 3       1520845822532   514.0
 4       1520845822533   508.0
 5       1520845822534   502.0
 6       1520845822535   496.0
 7       1520845822536   492.0
 8       1520845822537   488.0
 9       1520845822538   486.0
 10      1520845822539   483.0
 11      1520845822540   482.0
 12      1520845822541   481.0
 13      1520845822542   479.0
 14      1520845822543   476.0
 15      1520845822544   473.0
 16      1520845822545   471.0
 17      1520845822546   469.0
 18      1520845822547   467.0
 19      1520845822548   465.0
 20      1520845822549   463.0
 21      1520845822550   462.0
 22      1520845822551   461.0
 23      1520845822552   460.0
 24      1520845822553   460.0
 25      1520845822554   461.0
 26      1520845822555   461.0
 27      1520845822556   461.0
 28      1520845822557   462.0
 29      1520845822558   462.0
 ...               ...     ...
 