In [17]:
import pandas as pd
from os import listdir, path
from os.path import isfile, join
import numpy as np


main data path

In [18]:
dataPath ="data_to_process/"
onlyfiles = [f for f in listdir(dataPath) if isfile(join(dataPath, f))]
video_lengths = pd.read_json("video_lengths.json")
video_lengths

Unnamed: 0,session,slideshow,duration
0,4,A,47250
1,4,B,26600
2,3,A,35070
3,3,B,46900


## getters
gets conditions, pids, and trial #s

In [19]:

def getCondition(cond):
    # example condition strings
    # c1 = "https: // unicorn.cim.mcgill.ca/teleaffect_experiment/stimuli/S3/S3-SlideshowA-VizB.mp4"
    # c2 = "https: // unicorn.cim.mcgill.ca/teleaffect_experiment/stimuli/S4/S4-SlideshowA-VizA.mp4"

    condition = cond[-22:-4].split("-")
    # S4-SlideshowA-VizA
    condition = [x[-1] for x in condition]
    # 4, A, A
    return {"session":condition[0],
             "slideshow":condition[1],
             "viz":condition[2]}

def getPid(fileName):
    # example fileName: PID-13_trial-4_datetime-2021-10-9_11h48-25.json
    pid = fileName.split("-")[1].split("_")[0]
    return pid

def getTrial(fileName):
    trialNo = fileName.split("_")[1].split("-")[1]
    return int(trialNo)

In [20]:

def getTrialDataframe(filePath, fileName):
    df = pd.read_json(path.join(filePath,fileName))
    cond = getCondition(df.condition[0])
    ts = list(df.time_series)

    dt = np.dtype([('pid', np.int8),
                    ('arousal', np.float64),
                    ('valence', np.float64),
                    ('timeMs', np.int64),
                    ('slideshow', 'U1'),
                    ('viz', 'U1'),
                    ('session', 'U1'),
                    ('trial', np.int8), ])

    # print("number of samples: ", len(ts))
    arr = np.empty([len(ts)], dtype=dt)
    for i in range(0,len(ts)):
        arr[i]['pid'] = getPid(fileName)
        arr[i]['arousal'] = ts[i]["arousal"]
        arr[i]['valence'] = ts[i]["valence"] 
        arr[i]['timeMs'] = ts[i]["timeMs"]
        arr[i]['slideshow'] = cond["slideshow"]
        arr[i]['viz'] = cond["viz"]
        arr[i]['session'] = cond["session"]
        arr[i]['trial'] = getTrial(fileName)

    return pd.DataFrame(arr)



In [21]:

def trimTail(df):
    '''
        1. crops 1 second from the end of the video
        2. makes all videos the same length by duplicating last affect rating 
           if needed 
    '''
    maxLength = video_lengths[(video_lengths.session == int(df.session[0])) &
                             (video_lengths.slideshow == df.slideshow[0])]
    maxLength = maxLength.duration.iloc[0] - 1000

    croppedDF = df[df.timeMs < maxLength]

    lastRow = croppedDF.iloc[-1:].copy()
    lastRow.index = lastRow.index+1
    lastRow.timeMs = maxLength
    return(pd.concat([croppedDF, lastRow]))

def trimStart(df):
    '''
        1. makes the starting value the same as the first logged value
        2. trims so that it starts in 1s
    '''
    df = df[df.timeMs>1000]
    df.loc[-1] = df.iloc[0]
    df.loc[-1,"timeMs"] = 1000
    df.index = df.index + 1
    df.sort_index(inplace=True)
    return(df)

def cropData(trial):
    print(
        f'cropping df > slideshow: {trial.slideshow[0]} | session: {trial.session[0]}')
    trial = trimTail(trial)
    trial = trimStart(trial)
    return(trial.reset_index())


concatenate all dfs

In [22]:
dfs = []

for f in onlyfiles:
    df = getTrialDataframe(dataPath, f)
    df = cropData(df)
    dfs.append(df)

lomgData = pd.concat(dfs)

lomgData.pid


cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)


cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)


cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)


cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)


cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_i

cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 3
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: B | session: 4
cropping df > slideshow: A | session: 4
cropping df > slideshow: A | session: 3
cropping df > slideshow: B | session: 4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)


0       9
1       9
2       9
3       9
4       9
       ..
649    12
650    12
651    12
652    12
653    12
Name: pid, Length: 86377, dtype: int64

In [8]:
lomgData.to_csv('long_data_raw.csv',index=False)