In [1]:
import pandas as pd
import numpy as np
import csv
from scipy.spatial.distance import cityblock
from sklearn.metrics import roc_curve

In [2]:
data = pd.read_csv("myInputCsv.csv")
data = data[data.key != 'R']
#print(data)
df = pd.DataFrame(columns=['subject','idOfWord','key','H','UD','DD'])

#This code is used to calculate the H, UD, DD values for each key 
# It uses dictionaries to keep track of the key values
# This code will not work if the entered word has repeating letters
# To solve this problem make each key have its stroke id, and use that as the id in the dictionary

class TypedKeyObject:
    def __init__(self):
        self.id = 0
        self.pressTime = 0
        self.releaseTime = 0
        
keyDictionary = {}

for i in range(0, len(data)):
    
    #This part of the code adds the key to the dictionary if it is not already in it
    # This prevents multiple data for Up and Down Events for the same key
    if data.iloc[i].key not in keyDictionary:
        keyDictionary[data.iloc[i].key] = TypedKeyObject()

    keyDictionary[data.iloc[i].key].id = data.iloc[i].ascii
    if data.iloc[i].keyEvent == "Down":
        keyDictionary[data.iloc[i].key].pressTime = data.iloc[i].Time
    elif data.iloc[i].keyEvent == "Up":
        keyDictionary[data.iloc[i].key].releaseTime = data.iloc[i].Time

    current_value = None
    current_key = None
    next_key = None
    next_value = None

    #This part of the code calculates the H, UD, DD values for each key when the Return key is pressed and released
    if "Return" in keyDictionary and keyDictionary["Return"].releaseTime != 0:
        for key, value in keyDictionary.items():
            finalData = {}
            if current_value is None and current_key is None:
                current_value = value
                current_key = key
            else:
                next_key = key
                next_value = value
                finalData['subject'] = data.iloc[i].user
                finalData['key'] = current_key
                finalData['H'] = (int(current_value.releaseTime) - int(current_value.pressTime))/1000
                finalData['idOfWord'] = data.iloc[i].idOfWord

                
                finalData['UD'] = (int(next_value.pressTime) - int(current_value.releaseTime))/1000
                finalData['DD'] = (int(next_value.pressTime) - int(current_value.pressTime))/1000
                current_value = value
                current_key = key

                df = pd.concat([df, pd.DataFrame(finalData, index=[0])], ignore_index=True)

        #Calculates the H value for the last key (Enter in this case)
        lastKeyStroke ={}
        lastKeyStroke['subject'] = data.iloc[i].user
        lastKeyStroke['key'] = next_key
        lastKeyStroke['idOfWord'] = data.iloc[i].idOfWord

        lastKeyStroke['H'] = (int(next_value.releaseTime) - int(next_value.pressTime))/1000
        df = pd.concat([df, pd.DataFrame(lastKeyStroke, index=[0])], ignore_index=True)
        
        #Resets the dictionary for the next word
        keyDictionary = {}
        

f = open("KeyStrokeDistance.csv", 'w',newline='\n')
writer = csv.writer(f)
writer.writerow(['subject','idOfWord', 'key','H','UD','DD'])
for row in df.iterrows():
    #print(row[1])
    writer.writerow(row[1])

f.close()
      




In [3]:
data = pd.read_csv("keystroke.csv")

df = pd.DataFrame(columns=['subject','sessionIndex','rep','H.period','DD.period.t','UD.period.t','H.t', 'DD.t.i', 'UD.t.i', 'H.i','DD.i.e','UD.i.e','H.e','DD.e.five', 'UD.e.five', 'H.five', 'DD.five.Shift.r', 'UD.five.Shift.r', 'H.Shift.r', 'DD.Shift.r.o', 'UD.Shift.r.o' , 'H.o', 'DD.o.a', 'UD.o.a', 'H.a', 'DD.a.n', 'UD.a.n', 'H.n', 'DD.n.l', 'UD.n.l', 'H.l', 'DD.l.Return', 'UD.l.Return', 'H.Return'])
# get the last row
lastRow = data.iloc[-1]
nextSessionIndex = lastRow['sessionIndex'] + 1

data = pd.read_csv("KeystrokeDistance.csv")
lastRow = data.iloc[-1]
numberOfWords = lastRow['idOfWord']

for i in range(0, numberOfWords):
    finalData = {}
    finalData['subject'] = data.iloc[0+11*i].subject
    finalData['sessionIndex'] = nextSessionIndex
    finalData['rep'] = data.iloc[0+11*i].idOfWord

    finalData['H.period'] = data.iloc[0 + 11*i].H
    finalData['DD.period.t'] = data.iloc[0 + 11*i].DD
    finalData['UD.period.t'] = data.iloc[0 + 11*i].UD
    finalData['H.t'] = data.iloc[1 + 11*i].H
    finalData['DD.t.i'] = data.iloc[1 + 11*i].DD
    finalData['UD.t.i'] = data.iloc[1 + 11*i].UD
    finalData['H.i'] = data.iloc[2 + 11*i].H
    finalData['DD.i.e'] = data.iloc[2 + 11*i].DD
    finalData['UD.i.e'] = data.iloc[2 + 11*i].UD
    finalData['H.e'] = data.iloc[3 + 11*i].H
    finalData['DD.e.five'] = data.iloc[3 + 11*i].DD
    finalData['UD.e.five'] = data.iloc[3 + 11*i].UD
    finalData['H.five'] = data.iloc[4 + 11*i].H
    finalData['DD.five.Shift.r'] = data.iloc[4 + 11*i].DD
    finalData['UD.five.Shift.r'] = data.iloc[4 + 11*i].UD
    finalData['H.Shift.r'] = data.iloc[5 + 11*i].H
    finalData['DD.Shift.r.o'] =    data.iloc[5 + 11*i].DD
    finalData['UD.Shift.r.o'] = data.iloc[5 + 11*i].UD
    finalData['H.o'] = data.iloc[6 + 11*i].H
    finalData['DD.o.a'] = data.iloc[6 + 11*i].DD
    finalData['UD.o.a'] = data.iloc[6 + 11*i].UD
    finalData['H.a'] = data.iloc[7 + 11*i].H
    finalData['DD.a.n'] = data.iloc[7 + 11*i].DD
    finalData['UD.a.n'] = data.iloc[7 + 11*i].UD
    finalData['H.n'] = data.iloc[8 + 11*i].H
    finalData['DD.n.l'] = data.iloc[8 + 11*i].DD
    finalData['UD.n.l'] =   data.iloc[8 + 11*i].UD
    finalData['H.l'] = data.iloc[9 + 11*i].H
    finalData['DD.l.Return'] = data.iloc[9 + 11*i].DD
    finalData['UD.l.Return'] = data.iloc[9 + 11*i].UD
    finalData['H.Return'] = data.iloc[10 + 11*i].H
    df = pd.concat([df, pd.DataFrame(finalData, index=[0])], ignore_index=True)


f = open("keystroke.csv", 'a',newline='\n')
writer = csv.writer(f)
for row in df.iterrows():
    #print(row[1])
    writer.writerow(row[1])

f.close()
print(df)


  subject sessionIndex rep  H.period  DD.period.t  UD.period.t    H.t  DD.t.i  \
0    vuko           19   1     0.079        0.204        0.125  0.093   0.109   
1    vuko           19   2     0.063        0.532        0.469  0.078   0.125   
2    vuko           19   3     0.078        0.250        0.172  0.109   0.156   
3    vuko           19   4     0.125        0.234        0.109  0.110   0.141   
4    vuko           19   5     0.078        0.406        0.328  0.078   0.141   
5    vuko           19   6     0.078        0.172        0.094  0.093   0.234   

   UD.t.i    H.i  ...    H.a  DD.a.n  UD.a.n    H.n  DD.n.l  UD.n.l    H.l  \
0   0.016  0.062  ...  0.125   0.125   0.000  0.062   0.156   0.094  0.078   
1   0.047  0.062  ...  0.125   0.141   0.016  0.062   0.171   0.109  0.094   
2   0.047  0.094  ...  0.110   0.157   0.047  0.093   0.187   0.094  0.094   
3   0.031  0.062  ...  0.093   0.125   0.032  0.093   0.203   0.110  0.093   
4   0.063  0.078  ...  0.094   0.125   0.0