In [176]:
import pandas as pd
import numpy as np
from QualtricsAPI.Setup import Credentials
from QualtricsAPI.Survey import Responses

In [178]:
# credentials to get data via the qualtrics API

id_s1 = 'SV_0O0EKLOMn3AEoLQ'
id_s2 = 'SV_8pGlqv9GqN2OrVI'
id_s3 = 'SV_bQ75Bb7jwCEnHBI'

qtoken ='...' # safer to remove
            
qdc = 'fra1'

# import data through API

Credentials().qualtrics_api_credentials(token=qtoken,data_center=qdc)

df = Responses().get_survey_responses(survey=id_s1)
df2 = Responses().get_survey_responses(survey=id_s2)
df3 = Responses().get_survey_responses(survey=id_s3)

In [179]:
# quick cleanup of the data

dropcols = ([ 'RecordedDate','ResponseId','RecipientLastName','RecipientFirstName','RecipientEmail','ExternalReference',
             'LocationLatitude','LocationLongitude','DistributionChannel','UserLanguage', 'Status','IPAddress', 'Finished'])

df = df.drop(columns = dropcols)

# only finished surveys

# df = df.loc[df.Progress == '100']

# store and remove duration

duration = df.loc[df.Progress == '100'][['Q1','Duration (in seconds)']]
df = df.drop(columns = ['Duration (in seconds)'])

# store and remove dates

dates = df.loc[df.Progress == '100'][['Q1','StartDate','EndDate']]
df = df.drop(columns = ['StartDate','EndDate'])

# remove the timer columns except for total block time

timer_cols = [i for i in df.columns if (('First' in i) or ('Last' in i) or ('Count' in i))]

df = df.drop(columns = timer_cols)


In [180]:
# functions to check time and consistency

def get_scores(rater_code, df):
    # get all the scores for one rater, and split them up by block
    rater_results = [list(df.loc[df.Q1 == rater_code].iloc[0,:].values[i:i+10]) for i in range(2,len(df.columns) - 2,11)]
    
    # separate into rank and binary scores
    rank_scores = np.array(rater_results)[:,::2]
    binary_scores = np.array(rater_results)[:,1::2]
    
    return {"rank_scores":rank_scores,"binary_scores":binary_scores}

def check_consistency(scores):
    
    # get the indices that would order the rank scores, and use this to order to binary scores
    rank_argsort = np.argsort(scores["rank_scores"], axis = 1)
    binary_argsort = np.take_along_axis(scores["binary_scores"],rank_argsort,1)
    
    # check if the binary scores ordered by rank scores never have 0s before 1s
    
    consistency_lst = []
    
    for i in binary_argsort:
        if sorted(list(i),reverse=True) != list(i):
            consistency_lst.append(0)

        else:
            consistency_lst.append(1)
            
    return np.array(consistency_lst)

def check_times(rater_code, df):
    
    # get the total block time for all blocks
    time = [float(df.loc[df.Q1 == rater_code].iloc[0,:].values[i]) for i in range(12,len(df.columns) ,11)]
    
    return np.array(time)

def print_results(consistency_lst, time):

    # print the blocks that were inconsistent
    print(f"The rater was inconsistent in blocks {np.where(consistency_lst == 0)[0]}")
    
    # print the blocks that were done too rapidly
    print(f"The rater took too little time in blocks {np.where(time < 100)[0]}")
    

def check_results(rater_code, df):
    
    scores = get_scores(rater_code, df)
    consistency = check_consistency(scores)
    time = check_times(rater_code, df)
    print_results(consistency, time)
    

            
            
    

In [181]:
# test

check_results('STIJN2',df)

The rater was inconsistent in blocks [1 2 3]
The rater took too little time in blocks [ 1  2  3  4  5  6  7  8  9 10 11 12]
