In [1]:
## Script to analyze Turbo Typing data

Note on where I'm leaving off: Just wrote code that removes the extra spaces before characters from the imported data without deleting actual spaces. Next I want to use the collapsed sentences with no extra spaces to use the diff function and compare them.  

In [2]:
import os
import numpy as np
import pandas as pd
import math
import glob
import editdistance
import diff_match_patch as dmp_module
dmp = dmp_module.diff_match_patch()

In [3]:
# The imported data from Turbo Typing stores listed data as a long string. 
# This splits it up into actual lists.
def str_to_list(dataframe, columns):
    for col in columns:
        data_str = dataframe[col]
        if type(data_str.iloc[0]) == list:
            pass
        elif type(data_str.iloc[0]) == str:
            if data_str.iloc[0][0].isdigit() == True:
                data_list = data_str.apply(lambda trial: 
                                           [int(char) for char in trial.split(',')])
                dataframe[col] = data_list
            elif data_str.iloc[0][0].isdigit() == False:
                data_list = [x.split(',') for x in data_str]
                dataframe[col] = data_list
        else:
            print('Column must have data in string form.')

# Calculates time to keypress (TTK) for all presses in one trial.
# TTK is the reaction time for the first press and the interkey 
# interval for any subsequent presses.
def ttk(row):
    presses = pd.DataFrame(row.loc['timeData'])
    ttk_df = presses.sub(presses.shift(fill_value=0))
    ttk_list = ttk_df.values.flatten().tolist()
    return ttk_list

# Creates a list of keys that would have been pressed if the sentence was typed
# with no errors (ie. no backspacing). 
def sentence_to_keydata(string):
    curr_keyData = []
    for index, data in enumerate(string):
        if data.isupper() == True:
            curr_keyData.append('#') # Replaces 'Right/LeftShift'
            curr_keyData.append(data)
        elif data.isupper() == False and index != len(string)-1:
            curr_keyData.append(data)
        elif data.isupper() == False and index == len(string)-1:
            curr_keyData.append(data)
            curr_keyData.append('>') # Replaces 'Return'
    # curr_keyData_str = ''.join(curr_keyData)
    return curr_keyData

# # Splits up keyDataConverted data from a string to a list of pressed keys.
# def split_str(row):
#     keys_str = row.loc['keyDataConverted']
#     keys_list = keys_str.split(',')
#     return keys_list

In [4]:
# sentence_to_keydata(all_turbo.iloc[0,2])
# all_turbo.iloc[0,2]
# all_turbo.iloc[0,10]

In [5]:
# Setting save directory
save_dir = os.getcwd()
save_dir

'/Users/rubi/Desktop/Github/typingexp/typing_task_analysis'

In [6]:
# Getting subject data folders from server.
server = r'/Volumes/greenhouse/typingtask_data/subject_data'
os.chdir(server)
folders = os.listdir()

# Looping through subject folders, getting appropriate paths to data, 
# and making sID list.
sub_folders = list(filter(lambda x: x.startswith('s', 0, 1), folders))
all_turbo = pd.DataFrame()
ID_list = []
for sub in sub_folders:
    sub_folder = r'/Volumes/greenhouse/typingtask_data/'\
                 'subject_data/%s/turbotyping_data' % sub
    os.chdir(sub_folder)
    sID = sub.split('_', 1)[0]
    turbo = pd.read_csv(glob.glob('*_datafile.tsv')[0], sep='\t')
    all_turbo = pd.concat([all_turbo, turbo])
    ID_list.append(sID)

In [7]:
# Transforms listed data stored in long strings to actual lists.
str_to_list(all_turbo, ['timeData', 
                        'keyData', 
                        'keyDataConverted', 
                        'timeDataUp',
                        'keyDataUp'])

# Creates column that represents the correct key press sequence if there were
# no errors.
current_keyData = [sentence_to_keydata(x) for x in all_turbo['currentSentence']]
all_turbo.insert(7, 'keyDataCurrent', current_keyData)

In [8]:
# Calculates edit distance of each trial and stores to an additional column.
edit_dist = all_turbo.apply(lambda row: 
                            editdistance.eval(row.loc['currentSentence'], 
                                              row.loc['typedSentence']), 
                            axis=1)
all_turbo['editDistance'] = edit_dist
# Calculates total number of correct keypressess and stores to an 
# additional column.
### EDIT THIS THE NUMBER OF CORRECT KEYPRESSES IS DIFFERENT THAN THE SENTENCE ###
num_corr = all_turbo.apply(lambda row:
                       (len(row.loc['currentSentence']) - row.loc['editDistance']),
                       axis=1)
all_turbo['numCorr'] = num_corr

# Calculates TTK for every character in each trial and stores to 
# an additional column.
ttks = all_turbo.apply(ttk, axis=1)
all_turbo['ttk'] = ttks

# Calculates mean TTK for each trial and stores to an additional column.
mean_ttk = [np.mean(x) for x in all_turbo['ttk']]
all_turbo['ttkMean'] = mean_ttk

# Calculates sum of all TTKs for each trial and stores to an additional column.
sum_ttk = [np.sum(x) for x in all_turbo['ttk']]
all_turbo['ttkSum'] = sum_ttk
all_turbo

Unnamed: 0,participantID,trialNumber,currentSentence,typedSentence,trialStart,wpm,accuracy,keyDataCurrent,timeData,keyData,keyDataConverted,timeDataUp,keyDataUp,editDistance,numCorr,ttk,ttkMean,ttkSum
0,262,-2,Looking for some gift hints from you.,Looking for some gift hints from you.,1/23/2024 9:17:58 AM,86,100,"[#, L, o, o, k, i, n, g, , f, o, r, , s, o, ...","[21454, 21906, 22156, 22284, 22371, 22611, 226...","[LeftShift, L, O, O, K, I, N, G, Space...","[LeftShift, L, o, o, k, i, n, g, , f...","[22043, 22051, 22220, 22411, 22500, 22691, 227...","[L, LeftShift, O, O, K, I, N, G, Space...",0,37,"[21454, 452, 250, 128, 87, 240, 17, 120, 79, 1...",651.195122,26699
1,262,-1,I can't tell you when they'll be back.,I can't tell you when they'll be back.,1/23/2024 9:18:28 AM,63,100,"[#, I, , c, a, n, ', t, , t, e, l, l, , y, ...","[22442, 22633, 22968, 23041, 23498, 23809, 239...","[LeftShift, I, C, A, Backspace, Backspace...","[LeftShift, I, c, a, Backspace, Backspace...","[22769, 22825, 23081, 23131, 23562, 23890, 240...","[I, LeftShift, C, A, Backspace, Backspace...",0,38,"[22442, 191, 335, 73, 457, 311, 136, 104, 57, ...",665.681818,29290
2,262,0,Our forces are ready to come immediately.,Our forces are ready to come immedaitely.,1/23/2024 9:19:04 AM,80,95,"[#, O, u, r, , f, o, r, c, e, s, , a, r, e, ...","[927, 1118, 1311, 1494, 1631, 1718, 1854, 1903...","[LeftShift, LeftShift, O, U, R, Space, F...","[LeftShift, LeftShift, O, u, r, , f, o...","[1071, 1470, 1478, 1655, 1758, 1846, 1926, 199...","[LeftShift, LeftShift, O, U, R, Space, F...",2,39,"[927, 191, 193, 183, 137, 87, 136, 49, 103, 19...",147.954545,6510
3,262,1,A bad thing has been turned into a good thing.,A bad thing has been turned into a good thing.,1/23/2024 9:19:12 AM,108,100,"[#, A, , b, a, d, , t, h, i, n, g, , h, a, ...","[586, 857, 970, 1146, 1281, 1346, 1458, 1554, ...","[LeftShift, A, Space, B, A, D, Space, T...","[LeftShift, A, , b, a, d, , t, h, i...","[962, 994, 1066, 1211, 1402, 1449, 1601, 1681,...","[LeftShift, A, Space, B, A, D, Space, T...",0,46,"[586, 271, 113, 176, 135, 65, 112, 96, 80, 72,...",110.541667,5306
4,262,2,Do you want to eat lunch somewhere before?,Do you want to eat lunch somewhere before?,1/23/2024 9:19:19 AM,98,100,"[#, D, o, , y, o, u, , w, a, n, t, , t, o, ...","[530, 730, 858, 962, 1065, 1130, 1194, 1266, 1...","[LeftShift, D, O, Space, Y, O, U, Space...","[LeftShift, D, o, , y, o, u, , w, a...","[802, 841, 946, 1114, 1186, 1234, 1313, 1394, ...","[LeftShift, D, O, Space, Y, O, U, Space...",0,42,"[530, 200, 128, 104, 103, 65, 64, 72, 95, 65, ...",114.533333,5154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,s305,10,A bad thing has been turned into a good thing.,A bad thing has been turned into a good thing.,7/11/2024 7:09:58 PM,90,100,"[#, A, , b, a, d, , t, h, i, n, g, , h, a, ...","[617, 840, 945, 1113, 1233, 1321, 1409, 1505, ...","[LeftShift, A, Space, B, A, D, Space, T...","[LeftShift, A, , b, a, d, , t, h, i...","[897, 953, 1040, 1201, 1336, 1409, 1513, 1601,...","[LeftShift, A, Space, B, A, D, Space, T...",0,46,"[617, 223, 105, 168, 120, 88, 88, 96, 72, 104,...",125.460000,6273
13,s305,11,Our forces are ready to come immediately.,Our forces are ready to come immediately,7/11/2024 7:10:06 PM,67,98,"[#, O, u, r, , f, o, r, c, e, s, , a, r, e, ...","[570, 722, 834, 986, 1034, 1154, 1235, 1355, 1...","[LeftShift, O, U, R, Space, F, O, R, C...","[LeftShift, O, u, r, , f, o, r, c, e...","[762, 882, 929, 1058, 1145, 1250, 1362, 1426, ...","[LeftShift, O, U, R, Space, F, O, R, C...",1,40,"[570, 152, 112, 152, 48, 120, 81, 120, 216, 55...",156.700000,7835
14,s305,12,Sunday tickets have been on sale for weeks.,Sunday tickets have been on sale for weeks.,7/11/2024 7:10:16 PM,101,100,"[#, S, u, n, d, a, y, , t, i, c, k, e, t, s, ...","[490, 802, 898, 1067, 1107, 1203, 1322, 1386, ...","[LeftShift, S, U, N, D, A, Y, Space, T...","[LeftShift, S, u, n, d, a, y, , t, i...","[859, 906, 970, 1147, 1211, 1306, 1443, 1491, ...","[LeftShift, S, U, N, D, A, Y, Space, T...",0,43,"[490, 312, 96, 169, 40, 96, 119, 64, 96, 105, ...",109.933333,4947
15,s305,13,I will need to have my memory refreshed.,I will need to have my memory refreshed.,7/11/2024 7:10:22 PM,94,100,"[#, I, , w, i, l, l, , n, e, e, d, , t, o, ...","[371, 523, 642, 754, 803, 883, 1019, 1091, 119...","[LeftShift, I, Space, W, I, L, L, Space...","[LeftShift, I, , w, i, l, l, , n, e...","[571, 610, 746, 851, 939, 955, 1107, 1187, 128...","[LeftShift, I, Space, W, I, L, L, Space...",0,40,"[371, 152, 119, 112, 49, 80, 136, 72, 104, 64,...",119.119048,5003


In [32]:
no_space = all_turbo['keyDataConverted'].apply(lambda trial: 
                                           [char.replace(" ","") if char.isspace() != True else ' ' for char in trial])

no_space.iloc[0]
# test.iloc[0]
# all_turbo.iloc[0, 9]

# [char.replace(" ","") for char in trial if len(char) > 2]

['LeftShift',
 'L',
 'o',
 'o',
 'k',
 'i',
 'n',
 'g',
 ' ',
 'f',
 'o',
 'r',
 ' ',
 's',
 'o',
 'm',
 'e',
 ' ',
 'g',
 'i',
 'f',
 't',
 ' ',
 'i',
 'Backspace',
 'h',
 'i',
 'n',
 't',
 's',
 ' ',
 'f',
 'r',
 'o',
 'm',
 ' ',
 'y',
 'o',
 'u',
 '.',
 'Return']

In [None]:
# Isolates characters that were typed correctly during the trial.
corr_keys = all_turbo.apply(lambda row: 
                      dmp.diff_main(row.loc['keyDataConverted'], 
                                    row.loc['keyDataCurrent']),
                      axis=1)
corr_keys.iloc[0]

In [None]:
# Calculates rate correct score (RCS) for each trail and stores in column.
rcs = all_turbo.apply(lambda row: 
                      row.loc['numCorr']/row.loc['ttkSum'],
                      axis=1)
all_turbo['rcs'] = rcs

# Calculates linear integrated speed-ccuracy score (LISAS) and stores in column.
# lisas = 

In [None]:



# for trial in data_str:
#     data_list = [int(x) for x in trial.split(',')]
# data_list
# for trial in data_list:
#     data_int = 
# data_list[0]
# data_int = [print(x) for x in data_list]
# data_int

# edit_dist = all_turbo.apply(lambda row: 
#                             editdistance.eval(row.loc['currentSentence'], 
#                             row.loc['typedSentence']), 
#                             axis=1)