In [1]:
import glob
import numpy as np
import pandas as pd
import imageio

In [2]:
def search_sessions_by_media(mediafile):
    files = map(lambda x: np.load(x, encoding="latin1"), glob.glob("../export/data/feature_data_*_with_pupil.npz"))
    return [ f for f in files if f['session_info'].tolist()["mediaFile"] == '{}.avi'.format(mediafile) ]

def get_max_len_from_media(mediafile):
    reader = imageio.get_reader('../Dataset/Mahnob/data/media_24/{}.avi'.format(mediafile))
    return reader.get_meta_data()['nframes']

def filter_valence_columns(dataframe):
    valence_coef = {
        'ECG_max': 0.001888109648987091,
        'ECG_mean': 0.00662804554999091,
        'ECG_mean_abs_diff': 0.005231125053370179,
        'ECG_mean_diff': 0.002781320605918507,
        'ECG_min': 0.0018591598781892674,
        'ECG_std': 0.0081674871185979,
        'EDA_max': 0.016387547992571822,
        'EDA_mean': 0.027063790450369923,
        'EDA_mean_abs_diff': 0.09415495854705104,
        'EDA_mean_diff': 0.011888787093495388,
        'EDA_min': 0.024962515338228963,
        'EDA_std': 0.09222202087231755,
        'Pupil': 0.005767991044728216,
        'Resp_mean': 0.025309698048227646,
        'Resp_mean_abs_diff': 0.006296669544639612,
        'Resp_min': 0.038120661037035986,
        'Resp_std': 0.01175162024068132,
        'SKT_max': 0.11762136629734976,
        'SKT_mean': 0.22798868669787506,
        'SKT_mean_abs_diff': 0.08268450584592513,
        'SKT_mean_diff': 0.009693591729136704,
        'SKT_min': 0.11280498263646793,
        'SKT_std': 0.06872535872884428
    }
    valid_column= list(valence_coef.keys())
    new_df = dataframe[valid_column]
    
    for feature, coef in valence_coef.items():
        new_df[feature] = coef * new_df[feature]
    
    new_df = new_df.sum(axis=1)

    return new_df

def filter_arousal_columns(dataframe):
    arousal_coef = {
        'ECG_max': 0.0023171822420858454,
        'ECG_mean': 0.003435947784077022,
        'ECG_mean_abs_diff': 0.17187376101040663,
        'ECG_mean_diff': 0.022275230532693268,
        'ECG_min': 0.0019855308564706124,
        'ECG_std': 0.019509019119034472,
        'EDA_max': 0.07833002942405486,
        'EDA_mean': 0.08276724553427814,
        'EDA_mean_abs_diff': 0.00964545796852378,
        'EDA_mean_diff': 0.005272582078906494,
        'EDA_min': 0.05966502825551385,
        'EDA_std': 0.11743339935251362,
        'Resp_max': 0.15683957483750396,
        'Resp_mean': 0.0009181313883094536,
        'Resp_mean_abs_diff': 0.02187283189050024,
        'Resp_min': 0.004742530853476951,
        'Resp_std': 0.002688807600921281,
        'SKT_max': 0.00420333255286016,
        'SKT_mean': 0.03667924596802952,
        'SKT_min': 0.07822802257838406,
        'SKT_std': 0.11931710817145574
    }
    
    valid_column= list(arousal_coef.keys())
    new_df = dataframe[valid_column]
    
    for feature, coef in arousal_coef.items():
        new_df[feature] = coef * new_df[feature]
    
    new_df = new_df.sum(axis=1)
    
    return new_df


def create_dataframes_for_media(mediafile):
    
    sessions = search_sessions_by_media(mediafile)
    max_len = get_max_len_from_media(mediafile)
    
    all_valence = None 
    all_arousal = None
    
    N = len(sessions)
    
    for session in sessions:
                
        sid = session['session_info'].tolist()['sessionId']
                
        valence = pd.DataFrame(session['valence'].tolist())[:max_len]
        arousal = pd.DataFrame(session['arousal'].tolist())[:max_len]
                
        # ADD pupil feature
        PL = session['pupil'].tolist()[0][:max_len]
        PR = session['pupil'].tolist()[1][:max_len]
        mean_pupil = np.array([PL, PR]).mean(axis=0)
        data_pupil = pd.DataFrame(mean_pupil, columns=['Pupil'])
        
        # concatenate pupil data
        valence = pd.concat([valence, data_pupil], axis=1, sort=False)
        arousal = pd.concat([arousal, data_pupil], axis=1, sort=False)
        
        # remove nan values with the median
        valence = valence.fillna(valence.median())
        arousal = arousal.fillna(arousal.median())
        
        # filter selected columns and sum feature values from this session    
        valence = filter_valence_columns(valence)
        arousal = filter_arousal_columns(arousal)
    
        valence = pd.DataFrame(valence, columns = [sid])
        arousal = pd.DataFrame(arousal, columns = [sid])   
        
        if all_valence is None:
            all_valence = valence
            all_arousal = arousal
        else:
            all_valence = pd.concat([all_valence, valence], axis=1)
            all_arousal = pd.concat([all_arousal, arousal], axis=1)
    

    return all_valence, all_arousal

In [3]:
for mediafile in [30, 53, 69, 90, 111]:
    print('process media {}'.format(mediafile))
    valence, arousal = create_dataframes_for_media(mediafile)
    valence.to_csv("valence_combined_feature_{}.csv".format(mediafile))
    arousal.to_csv("arousal_combined_feature_{}.csv".format(mediafile))

process media 30


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


process media 53


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


process media 69


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


process media 90


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


process media 111


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
