In [1]:
import pandas as pd
import numpy as np
import shutil
import os
import re

In [2]:
#GENERATION OF FILE NUMBERS - write from which file to which file you need to analyze
FIRST = 1
LAST = 46

#Exclude participants
excluded = []

#FILE PATH - specify the path to the folders in which the corresponding files are located
VMRK = './Data/' #.vmrk files
TXT = './Data/' #.txt files
TASKS = './Data/Tasks/' #task result files
DIR = './FR_pictures_Data_2s/' #folder where the generated files will be saved - complete data
DIR_OUT = './FR_pictures_substracted_2s/' #folder where generated files will be saved - subtracted values

#The generated files will be located in the './HR_Data/' folder

In [3]:
#ASSIGNING VIDEO LENGTH (start baseline + manually calculated video length)

#length of manually processed video
display_length = 2000
baseline_length = 1000


In [4]:
#This function generates 3-digital numbers. Input: first number and last number. Output: an array of strings from the first number to the last in 3-degit format.
#Example: input: 19, 22. Output: ['019', '020', '021', '022']

def generate_numbers(start=1, end=999):
    numbers = []

    i = start

    while i <= end:
        if i < 10:
            number = '00' + str(i)
        elif i < 100:
            number = '0' + str(i)
        else:
            number = str(i)
        numbers.append(number)
        i +=1
    
    return numbers

In [5]:
# function that converts timestamp to time format as in FR file,
# and also finds the closest time stamp
# assumes FR recording rate = 10 FPS
def transform(trial_t):
    trial_min = trial_t//60000
    trial_s = (trial_t % 60000) //1000
    trial_ms = (trial_t % 1000) //100*100
    return f'00:{trial_min if trial_min > 9 else "0" + str(trial_min)}:{trial_s if trial_s > 9 else "0" + str(trial_s)}.{trial_ms if trial_ms > 0 else "000"}'

In [6]:
#input: data of the .vmrk file: list of file lines. Output: 2 arrays - video_begin: stamps of the video beginning; baselines: beginning of the video baselines

def get_timestamps(filedata, baseline=1000):
    #baseline = baseline time taking into account the sampling rate
    baselines = [] #there will be timestamps containing the beginning of baseline
    image_begin = [] #here will be the timestamps for when the image starts to be shown
    doubled = "init_null"
    for i in filedata:
        if i[0] != 'M':
            continue
        temp = i.split(',')
        if doubled == temp[1]:
            continue
        doubled = temp[1]
        
        if (doubled == 'S  6'):
            image_begin.append(int(temp[2]))
            baselines.append(int(temp[2])*2 - baseline)
    return image_begin, baselines

In [7]:
def timestamps_round(video_begin):
    baselines = []
    baseline = 1000
    for i in range(len(video_begin)):
        baselines.append(video_begin[i] - baseline)
    
    return baselines

In [8]:
#This function returns a file name. Input: array of strings - names of files; a string 'file' to find; 'ext' - extension of the file. If 'file' is a substring of a string from the array, the string will be returned.

def get_filename(file_names, file, ext):
    for i in file_names:
        if file in i and ext in i:
            return i
    return False

In [9]:
#this function returns data from requested file or return False. Input: directory name, substring that a needed file name should contain.
#Output: list of strings of a file, where file name contains substring "file_substring".If file was not found, it returns False

def get_filedata(directory_name, file_substr, ext):
    files = os.listdir(directory_name)
    fname = get_filename(files, file_substr, ext)
    if fname:
        data = open(directory_name + fname, mode='r')
        data_lines = data.readlines()
        return data_lines
    else:
        return False
        #print('Your file was not found in the given directory.')

In [10]:
def get_txt_name(directory_name, file_substr, ext):
    file_substr_new = file_substr
    if directory_name == TXT:
        file_substr_new = str(int(file_substr))
        
    files = os.listdir(directory_name)
    fname = directory_name + get_filename(files, file_substr_new, ext)
   
    return fname
        #print('Your file was not found in the given directory.')

In [11]:
#Input: .log file data; output: array of target videos in order they were shown to a subject

def getTargetImages(directory_name, file_substr, ext):
    #targ_videos = []
    df = pd.DataFrame()
    files = os.listdir(directory_name)
    fname = get_filename(files, file_substr, ext)
    if fname:
        df = pd.read_csv(directory_name+fname, delimiter=',')
    
    return df['stim'][8:40].tolist()
    #return df['stim'][8:15].tolist()

In [12]:
#GENERATE FILE NUMBERS - write from which file to which file you need to analyze
file_numbers = generate_numbers(FIRST, LAST)

if os.path.exists(DIR):
    shutil.rmtree(DIR)
os.makedirs(DIR)

for p_id in file_numbers:
    if p_id in excluded:
        continue
    print("processing file", p_id, '...')

#START OF VIDEO TIME FRAME (VMRK)
#Display the beginning of the video time frame in milliseconds. Calculation is based on marks from the .vmrk file
#path to the file with FaceReader. It is in the vera folder and is called {p_id}.vmrk
    stamps = get_filedata(VMRK, p_id, '.vmrk')
    image_begin_0, baselines = get_timestamps(stamps)
    #baselines = timestamps_round(image_begin_0)
    
#EXTRACT VIDEO ORDER ({p_id}_tasks_A)
#open file {p_id}_tasks_A
    targ_images = getTargetImages(TASKS, p_id, '.csv') #Целевые видео в порядке предъявления
    
#OPEN TEXT LOG VIDEO
# path to the file with FaceReader. It is in the vera folder and is called {p_id}.txt
    fr_file = get_txt_name(TXT, p_id, '.txt') 

    df = pd.DataFrame(columns = ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal', 'Heart Rate', 'Image'])

# loading a file in a table format    
    fr_data = pd.read_csv(fr_file, sep='\t', skiprows=8)
    
# selecting relevant columns
    fr_data = fr_data[['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal', 'Heart Rate']]
    col_names = ['Neutral',	'Happy',	'Sad',	'Angry',	'Surprised',	'Scared',	'Disgusted',	'Valence',	'Arousal', 'Heart Rate']

#We look for time frames in a text file and write them to a new file called {p_id}_FR.csv
    i = 0
    for k in targ_images:   
#for each image:
        print('image = ', k)
        time_begin = transform(baselines[i])
        temp = display_length + baselines[i] + baseline_length - 100
        time_end = transform(temp)
        ind_start = fr_data.loc[fr_data['Video Time'] == time_begin].index[0]
        ind_end = fr_data.loc[fr_data['Video Time'] == time_end].index[0]
        frame = fr_data.loc[ind_start:ind_end]
        frame['Image'] = pd.Series(k, index=frame.index) #Add a label with a picture to the data
        df = pd.concat([df, frame])
        i += 1

    pd.DataFrame.from_dict(df).to_csv(f'{DIR}{p_id}_FR_selected.csv', sep=';', mode='w', index=False)

processing file 001 ...
image =  73.jpg
image =  88.jpg
image =  31.jpg
image =  35.jpg
image =  2.jpg
image =  45.jpg
image =  78.jpg
image =  89.jpg
image =  91.jpg
image =  7.jpg
image =  12.jpg
image =  71.jpg
image =  76.jpg
image =  4.jpg
image =  22.jpg


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


image =  75.jpg
image =  79.jpg
image =  24.jpg
image =  66.jpg
image =  29.jpg
image =  28.jpg
image =  63.jpg
image =  23.jpg
image =  44.jpg
image =  53.jpg
image =  32.jpg
image =  21.jpg
image =  55.jpg
image =  43.jpg
image =  90.jpg
image =  17.jpg
image =  52.jpg
processing file 002 ...
image =  73.jpg
image =  88.jpg
image =  31.jpg
image =  35.jpg
image =  2.jpg
image =  45.jpg
image =  78.jpg
image =  89.jpg
image =  91.jpg
image =  7.jpg
image =  12.jpg
image =  71.jpg
image =  76.jpg
image =  4.jpg
image =  22.jpg
image =  75.jpg
image =  79.jpg
image =  24.jpg
image =  66.jpg
image =  29.jpg
image =  28.jpg
image =  63.jpg
image =  23.jpg
image =  44.jpg
image =  53.jpg
image =  32.jpg
image =  21.jpg
image =  55.jpg
image =  43.jpg
image =  90.jpg
image =  17.jpg
image =  52.jpg
processing file 003 ...
image =  73.jpg
image =  88.jpg
image =  31.jpg
image =  35.jpg
image =  2.jpg
image =  45.jpg
image =  78.jpg
image =  89.jpg
image =  91.jpg
image =  7.jpg
image =  12.j

image =  45.jpg
image =  2.jpg
image =  78.jpg
image =  89.jpg
image =  22.jpg
image =  71.jpg
image =  88.jpg
image =  53.jpg
image =  28.jpg
image =  76.jpg
image =  31.jpg
image =  52.jpg
image =  43.jpg
image =  17.jpg
image =  29.jpg
image =  79.jpg
image =  32.jpg
image =  91.jpg
image =  63.jpg
image =  35.jpg
image =  55.jpg
image =  44.jpg
image =  75.jpg
image =  12.jpg
image =  23.jpg
image =  4.jpg
image =  24.jpg
image =  90.jpg
image =  66.jpg
processing file 018 ...
image =  73.jpg
image =  21.jpg
image =  7.jpg
image =  45.jpg
image =  2.jpg
image =  78.jpg
image =  89.jpg
image =  22.jpg
image =  71.jpg
image =  88.jpg
image =  53.jpg
image =  28.jpg
image =  76.jpg
image =  31.jpg
image =  52.jpg
image =  43.jpg
image =  17.jpg
image =  29.jpg
image =  79.jpg
image =  32.jpg
image =  91.jpg
image =  63.jpg
image =  35.jpg
image =  55.jpg
image =  44.jpg
image =  75.jpg
image =  12.jpg
image =  23.jpg
image =  4.jpg
image =  24.jpg
image =  90.jpg
image =  66.jpg
proce

image =  28.jpg
image =  76.jpg
image =  31.jpg
image =  52.jpg
image =  43.jpg
image =  44.jpg
image =  75.jpg
image =  12.jpg
image =  23.jpg
image =  4.jpg
image =  24.jpg
image =  90.jpg
image =  66.jpg
image =  73.jpg
image =  21.jpg
image =  7.jpg
image =  45.jpg
image =  2.jpg
image =  78.jpg
image =  89.jpg
image =  22.jpg
image =  17.jpg
image =  29.jpg
image =  79.jpg
image =  32.jpg
image =  91.jpg
image =  63.jpg
image =  35.jpg
image =  55.jpg
processing file 034 ...
image =  71.jpg
image =  88.jpg
image =  53.jpg
image =  28.jpg
image =  76.jpg
image =  31.jpg
image =  52.jpg
image =  43.jpg
image =  44.jpg
image =  75.jpg
image =  12.jpg
image =  23.jpg
image =  4.jpg
image =  24.jpg
image =  90.jpg
image =  66.jpg
image =  73.jpg
image =  21.jpg
image =  7.jpg
image =  45.jpg
image =  2.jpg
image =  78.jpg
image =  89.jpg
image =  22.jpg
image =  17.jpg
image =  29.jpg
image =  79.jpg
image =  32.jpg
image =  91.jpg
image =  63.jpg
image =  35.jpg
image =  55.jpg
proces

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


 55.jpg
processing file 038 ...
image =  71.jpg
image =  88.jpg
image =  53.jpg
image =  28.jpg
image =  76.jpg
image =  31.jpg
image =  52.jpg
image =  43.jpg
image =  44.jpg
image =  75.jpg
image =  12.jpg
image =  23.jpg
image =  4.jpg
image =  24.jpg
image =  90.jpg
image =  66.jpg
image =  73.jpg
image =  21.jpg
image =  7.jpg
image =  45.jpg
image =  2.jpg
image =  78.jpg
image =  89.jpg
image =  22.jpg
image =  17.jpg
image =  29.jpg
image =  79.jpg
image =  32.jpg
image =  91.jpg
image =  63.jpg
image =  35.jpg
image =  55.jpg
processing file 039 ...
image =  71.jpg
image =  88.jpg
image =  53.jpg
image =  28.jpg
image =  76.jpg
image =  31.jpg
image =  52.jpg
image =  43.jpg
image =  44.jpg
image =  75.jpg
image =  12.jpg
image =  23.jpg
image =  4.jpg
image =  24.jpg
image =  90.jpg
image =  66.jpg
image =  73.jpg
image =  21.jpg
image =  7.jpg
image =  45.jpg
image =  2.jpg
image =  78.jpg
image =  89.jpg
image =  22.jpg
image =  17.jpg
image =  29.jpg
image =  79.jpg
image 

Дальше открываем тот файл, который получился - с выбранными временными метками. 
Считаем среднее по бейзлайн и вычитаем это значение из строк датасета.
Записываем результат в новый файл - {p_id}_FR_substracted_video_only.csv

In [13]:
#Calculating mean values

targ_images = getTargetImages(TASKS, '001', '.csv')

file_numbers = generate_numbers(FIRST, LAST)

if os.path.exists(DIR_OUT):
    shutil.rmtree(DIR_OUT)
os.makedirs(DIR_OUT)

meanDifferenceResult = pd.DataFrame(columns = ['Participant', 'stim', 'Neutral',	'Happy',	'Sad',	'Angry',	'Surprised',	'Scared',	'Disgusted',	'Valence',	'Arousal', 'Heart Rate'])


for p_id in file_numbers:
    if p_id in excluded:
        continue
    print("processing file", p_id, '...')

    substracted = pd.DataFrame(columns = ['Video Time', 'Neutral',	'Happy',	'Sad',	'Angry',	'Surprised',	'Scared',	'Disgusted',	'Valence',	'Arousal', 'Heart Rate', 'Image'])
    full_data = pd.DataFrame(columns = ['Video Time', 'Neutral',	'Happy',	'Sad',	'Angry',	'Surprised',	'Scared',	'Disgusted',	'Valence',	'Arousal', 'Heart Rate', 'Image'])
        
    
    file = get_txt_name(DIR, p_id, '.csv') 
    data = pd.read_csv(file, sep=';')
    
    for i in targ_images:
        full_set = data.loc[data['Image'] == i]
        baseline = full_set[0:10]
    

        imageData = full_set[10:]

        drop_failed = baseline #copying to another dataset to drop all failed values and calculate the average
        drop_failed = drop_failed.replace('FIT_FAILED',np.nan)
        drop_failed = drop_failed.replace('FIND_FAILED',np.nan)
        drop_failed = drop_failed.replace('Unknown',np.nan)
        drop_failed =drop_failed.astype({'Neutral':'float',	'Happy':'float',	'Sad':'float',	'Angry':'float',	'Surprised':'float',	'Scared':'float',	'Disgusted':'float',	'Valence':'float',	'Arousal':'float', 'Heart Rate' : 'float'})
    #an average for a baseline
        dropFailedBaseline = drop_failed[col_names]
        mean = [tuple(dropFailedBaseline.mean(axis=0))]
        mean_data = pd.DataFrame(mean, columns=col_names)
        drop_failed = imageData #copying to another dataset to drop all broken values and calculate the average
        drop_failed = drop_failed.replace('FIT_FAILED',np.nan)
        drop_failed = drop_failed.replace('FIND_FAILED',np.nan)
        drop_failed = drop_failed.replace('Unknown',np.nan)
        drop_failed =drop_failed.astype({'Neutral':'float',	'Happy':'float',	'Sad':'float',	'Angry':'float',	'Surprised':'float',	'Scared':'float',	'Disgusted':'float',	'Valence':'float',	'Arousal':'float', 'Heart Rate' : 'float'})        
    #Subtract the average value for the baseline from all rows of the dataset
        drop_failed = drop_failed.sub(mean_data.squeeze(), axis=1)
        
    #Recording the average deviation    
        imageDifferenceData = drop_failed[col_names]
        meanDifference = [tuple(imageDifferenceData.mean(axis=0))]
        meanDifferenceData = pd.DataFrame(meanDifference, columns = col_names)
        meanDifferenceData['stim'] = i
        meanDifferenceData['Participant'] = p_id
        meanDifferenceResult = pd.concat([meanDifferenceResult, meanDifferenceData])
        
    #Attribute a row of average values before the video
        mean_data['Video Time'] = pd.Series('Average', index=mean_data.index)
        mean_data['Image'] = pd.Series('baseline', index=mean_data.index) #Adding the baseline label to the data
        drop_failed['Video Time'] = imageData['Video Time']
        drop_failed['Image'] = pd.Series(i, index=drop_failed.index) #Adding a tag from the video to the data
        temp = pd.concat([mean_data, drop_failed])
        substracted = pd.concat([substracted, temp])
        
    
    
    pd.DataFrame.from_dict(substracted).to_csv(f'{DIR_OUT}{p_id}_FR_substracted_image_only.csv', sep=';', mode='w', index=False)

pd.DataFrame.from_dict(meanDifferenceResult).to_csv(f'{DIR_OUT}FRmeanDifferenceTotal.csv', sep=';', mode='w', index=False)



processing file 001 ...
processing file 002 ...
processing file 003 ...
processing file 004 ...
processing file 005 ...
processing file 006 ...
processing file 007 ...
processing file 008 ...
processing file 009 ...
processing file 010 ...
processing file 011 ...
processing file 012 ...
processing file 013 ...
processing file 014 ...
processing file 015 ...
processing file 016 ...
processing file 017 ...
processing file 018 ...
processing file 019 ...
processing file 020 ...
processing file 021 ...
processing file 022 ...
processing file 023 ...
processing file 024 ...
processing file 025 ...
processing file 026 ...
processing file 027 ...
processing file 028 ...
processing file 029 ...
processing file 030 ...
processing file 031 ...
processing file 032 ...
processing file 033 ...
processing file 034 ...
processing file 035 ...
processing file 036 ...
processing file 037 ...
processing file 038 ...
processing file 039 ...
processing file 040 ...
processing file 041 ...
processing file 