## The code below,

1. Matches the Prolific ID to the Participant ID of the survey users & extracts the duration of time taken by the users to complete the survey
2. Calculates the Attention Score of the survey users with respect to the stimulus video
3. Appends the newUsers to the participant_log of the existing users
4. Extracts the number of responses 

We find this information through the Survey User Response form exported from Qualtrics

In [1]:
import os
import re
import pandas as pd
import datetime

In [2]:
studyDataResponseLogDirectory = '/Users/sukruthgl/Desktop/Spring/CS 5999 - MEng Project/StudyData/pilots/Bad Robots Empathy_May 20, 2023_14.26.csv'
participantLogDirectory = '/Users/sukruthgl/Desktop/Spring/CS 5999 - MEng Project/StudyData/pilots/participant_log.xlsx'
prolificId_match_directory = '/Users/sukruthgl/Desktop/Spring/CS 5999 - MEng Project/StudyData/pilots/'

In [3]:
studyDataResponseLog_df = pd.read_csv(studyDataResponseLogDirectory)
participant_log = pd.read_excel(participantLogDirectory)

#### 1. Match Prolific ID to the Participant & extract survey time

In [4]:
# Here, we create a dictionary of - {Prolific ID: (randomID, Duration (in seconds))}
# It is then converted into a list of required format which is used to create the
# prolificID_matches_df dataframe

prolificID_dict = {}
participantIDPattern = r'\d{4}'

for index, row in studyDataResponseLog_df.iterrows():
    prolificID_dict[row['randomID']] = row['Q78'], row['Duration (in seconds)']

prolificID_dict = {key: value for key, value in prolificID_dict.items() if re.match(participantIDPattern, key)}
prolificID_matches_lst = [(key, value1, str(datetime.timedelta(seconds=int(value2)))) for key, (value1, value2) in prolificID_dict.items()]


# Creating a df of the Participant ID and their corresponding Prolific User ID
prolificID_matches_df = pd.DataFrame(prolificID_matches_lst, columns = ['Participant', 'Prolific ID', 'Survey Duration'])

prolificID_matches_df

Unnamed: 0,Participant,Prolific ID,Survey Duration
0,8006,602e9c46d37c26a6f0ec65ad,0:33:17
1,3260,5faa741b7e564e3971432a66,0:40:56
2,7444,5f2ef7740f87702b99055397,0:33:07
3,3518,6162226f379aa5c552808895,1:06:47
4,9214,601903a3842bcc20df15b296,0:40:17
5,8417,5b8e8feeee5b0500010f16d0,0:27:14
6,6110,6163fe9cd62993d57a578f0c,0:37:33
7,1889,5f4894201c384e3607e91d57,0:45:49
8,1357,6168174598124da7e933f76d,1:02:11
9,6953,5d40a12f4994c40001e4b80c,0:37:54


#### 2. Calculate Attention Score of the Survey Users

In [5]:
# Identifies all the columns of stimulusVideo required to calculate the Attention Score
attentionCheckerPattern = r'Please indicate your agreement with the following statement.*'
attentionCheckerResponseColumns = []

for column in studyDataResponseLog_df.columns:
    if re.match(attentionCheckerPattern, studyDataResponseLog_df[column].iloc[0]):
        attentionCheckerResponseColumns.append(column)
attentionCheckerResponseColumns.append('randomID')
print(attentionCheckerResponseColumns)

['Q163#1_1', 'Q754#1_1', 'Q761#1_1', 'Q768#1_1', 'Q775#1_1', 'Q782#1_1', 'Q789#1_1', 'Q796#1_1', 'Q803#1_1', 'Q810#1_1', 'Q817#1_1', 'Q824#1_1', 'Q831#1_1', 'Q838#1_1', 'Q845#1_1', 'Q852#1_1', 'Q859#1_1', 'Q866#1_1', 'Q873#1_1', 'Q880#1_1', 'Q887#1_1', 'Q894#1_1', 'Q901#1_1', 'Q908#1_1', 'Q915#1_1', 'Q922#1_1', 'Q929#1_1', 'Q936#1_1', 'Q943#1_1', 'Q950#1_1', 'randomID']


In [6]:
# We identify the context response columns 
# (i.e context reponse columns of the corresponding stimulusVideos respectively)
# which is used for labelling the class of the stimulusVideos & calculating the attention scores
# and return those column names

def contextResponseColumns(columns):
    contextResponseColumnPattern = r"Q\d+#\d_\d"
    
    columns = [col for col in columns if re.match(contextResponseColumnPattern, col)]
    return columns

In [7]:
# Here we label the class the stimulusVideos to its respective context response columns
# This dictionary is used in calculating the attention score.

context_dict = {}

context = {1 : 'FAIL', 
           2: 'CONTROL'}

for contextQuestionID in contextResponseColumns(attentionCheckerResponseColumns):
    contextKey = int(input(f'Enter the context for the video: {contextQuestionID} : '))
    context_dict[contextQuestionID] = context[contextKey]

Enter the context for the video: Q163#1_1 : 2
Enter the context for the video: Q754#1_1 : 2
Enter the context for the video: Q761#1_1 : 2
Enter the context for the video: Q768#1_1 : 2
Enter the context for the video: Q775#1_1 : 2
Enter the context for the video: Q782#1_1 : 2
Enter the context for the video: Q789#1_1 : 2
Enter the context for the video: Q796#1_1 : 2
Enter the context for the video: Q803#1_1 : 2
Enter the context for the video: Q810#1_1 : 2
Enter the context for the video: Q817#1_1 : 1
Enter the context for the video: Q824#1_1 : 1
Enter the context for the video: Q831#1_1 : 1
Enter the context for the video: Q838#1_1 : 1
Enter the context for the video: Q845#1_1 : 1
Enter the context for the video: Q852#1_1 : 1
Enter the context for the video: Q859#1_1 : 1
Enter the context for the video: Q866#1_1 : 1
Enter the context for the video: Q873#1_1 : 1
Enter the context for the video: Q880#1_1 : 1
Enter the context for the video: Q887#1_1 : 1
Enter the context for the video: Q

In [8]:
# context_dict

In [9]:
# Here, we identify the valid prolific IDs 
# We drop all the duplicate entries (this way, if a surveyer has attempted to perform the survey twice, we identify only the first survey)
valid_prolific_ids = list(prolificID_matches_df["Prolific ID"])

studyDataResponseLog_df = studyDataResponseLog_df.drop_duplicates(subset='Q78', keep='first')
# studyDataResponseLog_df[['Q78', 'randomID']]
# len(studyDataResponseLog_df)

In [10]:
# Here, we create the attention_df 
# This consists of all the context response columns of the corresponding stimulusVideos
# We drop all the entries in the prolific ID column (identified by the header 'Q78') that are != 24
# This way we remove all the pilot_test entries

attention_df = studyDataResponseLog_df[attentionCheckerResponseColumns].drop(studyDataResponseLog_df[studyDataResponseLog_df['Q78'].apply(lambda x: len(str(x)) != 24)].index)
attention_df
# len(attention_df)

Unnamed: 0,Q163#1_1,Q754#1_1,Q761#1_1,Q768#1_1,Q775#1_1,Q782#1_1,Q789#1_1,Q796#1_1,Q803#1_1,Q810#1_1,...,Q894#1_1,Q901#1_1,Q908#1_1,Q915#1_1,Q922#1_1,Q929#1_1,Q936#1_1,Q943#1_1,Q950#1_1,randomID
10,Strongly agree,Somewhat agree,Somewhat agree,Disagree,Strongly agree,Disagree,Somewhat agree,Agree,Neither agree or disagree,Somewhat disagree,...,Somewhat agree,Agree,Neither agree or disagree,Somewhat disagree,Somewhat disagree,Disagree,Somewhat agree,Somewhat disagree,Somewhat disagree,8006
11,Strongly agree,Agree,Agree,Agree,Strongly agree,Agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Strongly disagree,Strongly disagree,Somewhat agree,Strongly disagree,Disagree,Strongly disagree,Strongly disagree,Disagree,Disagree,3260
12,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Strongly disagree,Disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,7444
13,Strongly agree,Strongly agree,Agree,Neither agree or disagree,Disagree,Strongly disagree,Agree,Strongly disagree,Strongly agree,Strongly agree,...,Agree,Agree,Somewhat disagree,Somewhat agree,Agree,Somewhat disagree,Somewhat agree,Agree,Agree,3518
21,Somewhat agree,Somewhat agree,Somewhat agree,Neither agree or disagree,Agree,Somewhat disagree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Disagree,Strongly disagree,Somewhat agree,Somewhat agree,Agree,Strongly disagree,Disagree,Somewhat agree,Somewhat agree,9214
22,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Disagree,Disagree,Agree,Somewhat disagree,Somewhat disagree,Somewhat disagree,Disagree,Somewhat disagree,Somewhat agree,8417
23,Agree,Agree,Agree,Strongly agree,Somewhat agree,Somewhat agree,Agree,Strongly disagree,Agree,Somewhat agree,...,Disagree,Disagree,Disagree,Disagree,Strongly disagree,Strongly disagree,Disagree,Disagree,Disagree,6110
24,Agree,Somewhat agree,Neither agree or disagree,Neither agree or disagree,Somewhat disagree,Strongly agree,Somewhat disagree,Disagree,Neither agree or disagree,Strongly agree,...,Disagree,Disagree,Somewhat disagree,Somewhat agree,Disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,1889
25,Somewhat agree,Neither agree or disagree,Disagree,Disagree,Somewhat agree,Agree,Agree,Somewhat disagree,Agree,Agree,...,Agree,Agree,Agree,Agree,Somewhat agree,Agree,Neither agree or disagree,Somewhat agree,Agree,1357
26,Agree,Agree,Agree,Somewhat agree,Agree,Agree,Agree,Agree,Agree,Agree,...,Somewhat agree,Agree,Agree,Agree,Agree,Agree,Agree,Agree,Agree,6953


In [11]:
# Here, we define the attention_dict used in attention_score calculation

attention_dict = {'Strongly disagree': 0,
                 'Disagree': 1,
                 'Somewhat disagree': 2,
                 'Neither agree or disagree': 3,
                 'Somewhat agree': 4,
                 'Agree': 5,
                 'Strongly agree': 6}

In [12]:
# Here we calculate the attention_score for each surveyer

attention_scores = []

for index, row in attention_df.filter(items = contextResponseColumns(attention_df.columns), axis = 1).iterrows():
    attentionScore = 0
    for columnName, columnValue in row.items():
        try:
#             print(f'{columnName}: {context_dict[columnName]} Video, Response: {columnValue}, Response Value = {attention_dict[columnValue]}')
            if(context_dict[columnName] == 'CONTROL'):
                if (attention_dict[columnValue] > 3):
                    attentionScore += 1
                elif(attention_dict[columnValue] < 3):
                    attentionScore -= 1
                else:
                    attentionScore += 0
            elif(context_dict[columnName] == 'FAIL'):
                if (attention_dict[columnValue] < 3):
                    attentionScore += 1
                elif(attention_dict[columnValue] > 3):
                    attentionScore -= 1
                else:
                    attentionScore += 0
        except KeyError:
            # Any survey entry where there are missing responses - penalise them heavily - to indicate not to be considered for study
            attentionScore += -10
        except AttributeError:
            attentionScore += -10
#         print(f'{columnName}: {context_dict[columnName]} Video, Response: {columnValue}, Response Value = {attention_dict[columnValue]}, score = {attentionScore}')
#     print(f'Total Score: {attentionScore} | Normalized Score : {attentionScore/30}')
#     break     
    attention_scores.append(attentionScore/30)


attention_df['Attention Score'] = attention_scores

# len(attention_df[['randomID', 'Attention Score']])
# attention_df[['randomID', 'Attention Score']]
attention_df.to_excel(f'{prolificId_match_directory}attentionResponses.xlsx', index = False)
attention_df

Unnamed: 0,Q163#1_1,Q754#1_1,Q761#1_1,Q768#1_1,Q775#1_1,Q782#1_1,Q789#1_1,Q796#1_1,Q803#1_1,Q810#1_1,...,Q901#1_1,Q908#1_1,Q915#1_1,Q922#1_1,Q929#1_1,Q936#1_1,Q943#1_1,Q950#1_1,randomID,Attention Score
10,Strongly agree,Somewhat agree,Somewhat agree,Disagree,Strongly agree,Disagree,Somewhat agree,Agree,Neither agree or disagree,Somewhat disagree,...,Agree,Neither agree or disagree,Somewhat disagree,Somewhat disagree,Disagree,Somewhat agree,Somewhat disagree,Somewhat disagree,8006,-0.1
11,Strongly agree,Agree,Agree,Agree,Strongly agree,Agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Strongly disagree,Somewhat agree,Strongly disagree,Disagree,Strongly disagree,Strongly disagree,Disagree,Disagree,3260,0.933333
12,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,Strongly disagree,7444,0.933333
13,Strongly agree,Strongly agree,Agree,Neither agree or disagree,Disagree,Strongly disagree,Agree,Strongly disagree,Strongly agree,Strongly agree,...,Agree,Somewhat disagree,Somewhat agree,Agree,Somewhat disagree,Somewhat agree,Agree,Agree,3518,-0.166667
21,Somewhat agree,Somewhat agree,Somewhat agree,Neither agree or disagree,Agree,Somewhat disagree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Strongly disagree,Somewhat agree,Somewhat agree,Agree,Strongly disagree,Disagree,Somewhat agree,Somewhat agree,9214,0.2
22,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,Strongly agree,...,Disagree,Agree,Somewhat disagree,Somewhat disagree,Somewhat disagree,Disagree,Somewhat disagree,Somewhat agree,8417,0.733333
23,Agree,Agree,Agree,Strongly agree,Somewhat agree,Somewhat agree,Agree,Strongly disagree,Agree,Somewhat agree,...,Disagree,Disagree,Disagree,Strongly disagree,Strongly disagree,Disagree,Disagree,Disagree,6110,0.933333
24,Agree,Somewhat agree,Neither agree or disagree,Neither agree or disagree,Somewhat disagree,Strongly agree,Somewhat disagree,Disagree,Neither agree or disagree,Strongly agree,...,Disagree,Somewhat disagree,Somewhat agree,Disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,1889,0.066667
25,Somewhat agree,Neither agree or disagree,Disagree,Disagree,Somewhat agree,Agree,Agree,Somewhat disagree,Agree,Agree,...,Agree,Agree,Agree,Somewhat agree,Agree,Neither agree or disagree,Somewhat agree,Agree,1357,-0.4
26,Agree,Agree,Agree,Somewhat agree,Agree,Agree,Agree,Agree,Agree,Agree,...,Agree,Agree,Agree,Agree,Agree,Agree,Agree,Agree,6953,-0.333333


In [13]:
prolificID_matches_df = prolificID_matches_df.merge(attention_df[['randomID', 'Attention Score']], 'inner', left_on = 'Participant', right_on = 'randomID').drop('randomID', axis=1)
prolificID_matches_df

Unnamed: 0,Participant,Prolific ID,Survey Duration,Attention Score
0,8006,602e9c46d37c26a6f0ec65ad,0:33:17,-0.1
1,3260,5faa741b7e564e3971432a66,0:40:56,0.933333
2,7444,5f2ef7740f87702b99055397,0:33:07,0.933333
3,3518,6162226f379aa5c552808895,1:06:47,-0.166667
4,9214,601903a3842bcc20df15b296,0:40:17,0.2
5,8417,5b8e8feeee5b0500010f16d0,0:27:14,0.733333
6,6110,6163fe9cd62993d57a578f0c,0:37:33,0.933333
7,1889,5f4894201c384e3607e91d57,0:45:49,0.066667
8,1357,6168174598124da7e933f76d,1:02:11,-0.4
9,6953,5d40a12f4994c40001e4b80c,0:37:54,-0.333333


#### Score/video/participant

In [14]:
score_video_participant = attention_df
# score_video_participant

In [15]:
for index, row in attention_df.filter(items = contextResponseColumns(attention_df.columns), axis = 1).iterrows():
    attentionScore = 0
    for columnName, columnValue in row.items():
        try:
#             print(f'{columnName}: {context_dict[columnName]} Video, Response: {columnValue}, Response Value = {attention_dict[columnValue]}')
            if(context_dict[columnName] == 'CONTROL'):
                if (attention_dict[columnValue] > 3):
                    score_video_participant.at[index, columnName] = 1
                elif(attention_dict[columnValue] < 3):
                    score_video_participant.at[index, columnName] = -1
                else:
                    score_video_participant.at[index, columnName] = 0
            elif(context_dict[columnName] == 'FAIL'):
                if (attention_dict[columnValue] < 3):
                    score_video_participant.at[index, columnName] = 1
                elif(attention_dict[columnValue] > 3):
                    score_video_participant.at[index, columnName] = -1
                else:
                    score_video_participant.at[index, columnName] = 0
        except KeyError:
            continue
        except AttributeError:
            continue
score_video_participant

Unnamed: 0,Q163#1_1,Q754#1_1,Q761#1_1,Q768#1_1,Q775#1_1,Q782#1_1,Q789#1_1,Q796#1_1,Q803#1_1,Q810#1_1,...,Q901#1_1,Q908#1_1,Q915#1_1,Q922#1_1,Q929#1_1,Q936#1_1,Q943#1_1,Q950#1_1,randomID,Attention Score
10,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,0.0,-1.0,...,-1.0,0.0,1.0,1.0,1.0,-1.0,1.0,1.0,8006,-0.1
11,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,3260,0.933333
12,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7444,0.933333
13,1.0,1.0,1.0,0.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,...,-1.0,1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,3518,-0.166667
21,1.0,1.0,1.0,0.0,1.0,-1.0,1.0,1.0,1.0,1.0,...,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,9214,0.2
22,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,-1.0,8417,0.733333
23,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6110,0.933333
24,1.0,1.0,0.0,0.0,-1.0,1.0,-1.0,-1.0,0.0,1.0,...,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1889,0.066667
25,1.0,0.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,1357,-0.4
26,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,6953,-0.333333


In [16]:
score_video_participant.to_excel(f'{prolificId_match_directory}score_video_participant.xlsx', index = False)

#### 3. Update participant_log to include new survey user's information

In [17]:
print("Existing Survey Users:")
participant_log

Existing Survey Users:


Unnamed: 0,Participant,Prolific ID,Survey Duration,Type,Attention Score,Number of videos,Video Good?,Survey good?,Comments,Repeated Video #s,Pay?,Include as Study Data?
0,8006,602e9c46d37c26a6f0ec65ad,00:33:17,Pilot,-0.1,31,No,Yes,The response recordings have not taken place a...,QID947,Y,
1,3260,5faa741b7e564e3971432a66,00:40:56,Pilot,0.933333,34,Yes,Yes,Participant has fallen asleep during the survey,"QID842, QID940, QID898, QID891",Y,
2,7444,5f2ef7740f87702b99055397,00:33:07,Pilot,0.933333,32,Yes,Yes,Participant has rewatched few videos out of th...,"QID947, QID940",Y,
3,3518,6162226f379aa5c552808895,01:06:47,Pilot,-0.166667,42,Neutral,Yes,The video quality is pixelated & not that great,"QID947, QID772, QID905, QID884, QID856, QID793...",N,
4,9214,601903a3842bcc20df15b296,00:40:17,MainStudy,0.2,30,Yes,Yes,Very definitive responses,,Y,
5,8417,5b8e8feeee5b0500010f16d0,00:27:14,MainStudy,0.733333,30,Yes,Yes,Very definitive responses,,Y,
6,6110,6163fe9cd62993d57a578f0c,00:37:33,MainStudy,0.933333,33,No,Yes,Participant seems to be talking during the sur...,"QID884, QID919, QID751",Y,
7,1889,5f4894201c384e3607e91d57,00:45:49,MainStudy,0.066667,33,Yes,Yes,Participant has rewatched few videos out of th...,"QID842, QID891, QID786",Y,
8,1357,6168174598124da7e933f76d,01:02:11,MainStudy,-0.4,44,Yes,Yes,Participant has rewatched few videos out of th...,"QID786, QID835, QID814, QID821, QID891, QID842...",N,
9,6953,5d40a12f4994c40001e4b80c,00:37:54,MainStudy,-0.333333,30,Yes,Yes,No definitive reactions by the participant,,Y,


In [18]:
# Creating a new dataFrame equivalent to the participant_log document 
# Here, in this dataFrame we add all the new users who have taken the survey
newUsers = pd.DataFrame(columns=participant_log.columns)
newUsers

Unnamed: 0,Participant,Prolific ID,Survey Duration,Type,Attention Score,Number of videos,Video Good?,Survey good?,Comments,Repeated Video #s,Pay?,Include as Study Data?


In [19]:
# Here, we iterate over all the Prolific ID & Participant ID matches
# Then we try to check if each of users in the matches already exist in the participant_log
# If they don't, then they are a newUser and they get appended to the newUsers dataframe
# The newUsers dataFrame is then append to the existing participant_log

for _, newUser in prolificID_matches_df.iterrows():
    newUserId = newUser['Prolific ID']
    found = False
    for _, user in participant_log.iterrows():
        userId = user['Prolific ID']
        if(newUserId == userId):
            found = True
    if (not found): # new user
        newUsers = newUsers.append(newUser, ignore_index=True)

print("The new survey users are as follows: ")
newUsers

The new survey users are as follows: 


Unnamed: 0,Participant,Prolific ID,Survey Duration,Type,Attention Score,Number of videos,Video Good?,Survey good?,Comments,Repeated Video #s,Pay?,Include as Study Data?


In [20]:
# Here we concatenate the new users to the exisitng users who have taken the survey in the participant_log
participant_log = pd.concat([participant_log, newUsers], ignore_index = True)
participant_log

Unnamed: 0,Participant,Prolific ID,Survey Duration,Type,Attention Score,Number of videos,Video Good?,Survey good?,Comments,Repeated Video #s,Pay?,Include as Study Data?
0,8006,602e9c46d37c26a6f0ec65ad,00:33:17,Pilot,-0.1,31,No,Yes,The response recordings have not taken place a...,QID947,Y,
1,3260,5faa741b7e564e3971432a66,00:40:56,Pilot,0.933333,34,Yes,Yes,Participant has fallen asleep during the survey,"QID842, QID940, QID898, QID891",Y,
2,7444,5f2ef7740f87702b99055397,00:33:07,Pilot,0.933333,32,Yes,Yes,Participant has rewatched few videos out of th...,"QID947, QID940",Y,
3,3518,6162226f379aa5c552808895,01:06:47,Pilot,-0.166667,42,Neutral,Yes,The video quality is pixelated & not that great,"QID947, QID772, QID905, QID884, QID856, QID793...",N,
4,9214,601903a3842bcc20df15b296,00:40:17,MainStudy,0.2,30,Yes,Yes,Very definitive responses,,Y,
5,8417,5b8e8feeee5b0500010f16d0,00:27:14,MainStudy,0.733333,30,Yes,Yes,Very definitive responses,,Y,
6,6110,6163fe9cd62993d57a578f0c,00:37:33,MainStudy,0.933333,33,No,Yes,Participant seems to be talking during the sur...,"QID884, QID919, QID751",Y,
7,1889,5f4894201c384e3607e91d57,00:45:49,MainStudy,0.066667,33,Yes,Yes,Participant has rewatched few videos out of th...,"QID842, QID891, QID786",Y,
8,1357,6168174598124da7e933f76d,01:02:11,MainStudy,-0.4,44,Yes,Yes,Participant has rewatched few videos out of th...,"QID786, QID835, QID814, QID821, QID891, QID842...",N,
9,6953,5d40a12f4994c40001e4b80c,00:37:54,MainStudy,-0.333333,30,Yes,Yes,No definitive reactions by the participant,,Y,


#### 4. Calculate the number of responses provided in each survey

In [21]:
# Calculates the Number of Response Videos recorded by each survey participant

participantResponsePattern = r'QID\d+'

for participant in participant_log['Participant']:
    try:
        # print('Participant Directory is ->', f'{prolificId_match_directory}{participant}')
        participantResponses = os.listdir(f'{prolificId_match_directory}/{participant}')
        # print('Directory Files:\n', '\n'.join(participantResponses))
        numberOfParticipantResponses = len([response for response in participantResponses if re.match(participantResponsePattern, response)])
        # print('Number of Participant Responses = ', numberOfParticipantResponses)
    except FileNotFoundError:
        numberOfParticipantResponses = 0
    participant_log.loc[participant_log['Participant'] == participant, 'Number of videos'] = numberOfParticipantResponses

participant_log

Unnamed: 0,Participant,Prolific ID,Survey Duration,Type,Attention Score,Number of videos,Video Good?,Survey good?,Comments,Repeated Video #s,Pay?,Include as Study Data?
0,8006,602e9c46d37c26a6f0ec65ad,00:33:17,Pilot,-0.1,31,No,Yes,The response recordings have not taken place a...,QID947,Y,
1,3260,5faa741b7e564e3971432a66,00:40:56,Pilot,0.933333,34,Yes,Yes,Participant has fallen asleep during the survey,"QID842, QID940, QID898, QID891",Y,
2,7444,5f2ef7740f87702b99055397,00:33:07,Pilot,0.933333,32,Yes,Yes,Participant has rewatched few videos out of th...,"QID947, QID940",Y,
3,3518,6162226f379aa5c552808895,01:06:47,Pilot,-0.166667,42,Neutral,Yes,The video quality is pixelated & not that great,"QID947, QID772, QID905, QID884, QID856, QID793...",N,
4,9214,601903a3842bcc20df15b296,00:40:17,MainStudy,0.2,30,Yes,Yes,Very definitive responses,,Y,
5,8417,5b8e8feeee5b0500010f16d0,00:27:14,MainStudy,0.733333,30,Yes,Yes,Very definitive responses,,Y,
6,6110,6163fe9cd62993d57a578f0c,00:37:33,MainStudy,0.933333,33,No,Yes,Participant seems to be talking during the sur...,"QID884, QID919, QID751",Y,
7,1889,5f4894201c384e3607e91d57,00:45:49,MainStudy,0.066667,33,Yes,Yes,Participant has rewatched few videos out of th...,"QID842, QID891, QID786",Y,
8,1357,6168174598124da7e933f76d,01:02:11,MainStudy,-0.4,44,Yes,Yes,Participant has rewatched few videos out of th...,"QID786, QID835, QID814, QID821, QID891, QID842...",N,
9,6953,5d40a12f4994c40001e4b80c,00:37:54,MainStudy,-0.333333,30,Yes,Yes,No definitive reactions by the participant,,Y,


In [22]:
participant_log.to_csv(f'{prolificId_match_directory}participant_log.csv', index = False)