In [1]:
import pandas as pd
import re
import ast
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import levene
import matplotlib.pyplot as plt

In [2]:
trans_pairs = pd.read_csv('data/mt.trans_pairs.csv')
trans_pairs.head()

Unnamed: 0,id,translatedSet,src,tgt
0,8698,Test User 401 | Turtle Story PE L4 | Hindi -> ...,कछुआ,Tortoise.
1,8697,Test User 401 | Turtle Story PE L4 | Hindi -> ...,सुबह जब मेरी आँख खुली तो आसमान में सूरज चम - च...,"When my eyes opened in the morning, the sun wa..."
2,8696,Test User 401 | Turtle Story PE L4 | Hindi -> ...,हैरान होकर मैंने अपने आसपास देखा,"Surprised, I looked around myself."
3,8695,Test User 401 | Turtle Story PE L4 | Hindi -> ...,पहले तो चकरा गई कि मैं हूँ कहाँ फिर एकदम याद आ...,"First I was wondering where I was, then I reme..."
4,8694,Test User 401 | Turtle Story PE L4 | Hindi -> ...,"एक असीम नीले संसार के बीचोंबीच , जिसे लोग सागर...","In the middle of an infinite blue world, which..."


In [4]:
keystrokes = pd.read_csv('data/mt.dockeystroke.csv')
keystrokes.head()

Unnamed: 0,id,translatedSet,keystrokeseries,trump
0,339,Test User | Jungle Brew MT L4 | Hindi -> English,"[['Default Layout', 1], [1, 8872], ['card37', ...",Y
1,338,Test User | At Least I'm Ok MT L3 | Hindi -> E...,"[['Default Layout', 2], [1, 3919], ['card42', ...",Y
2,337,Test User 301 | Monsoon | Hindi -> English,"[['Default Layout', 2], [1, 7694], ['card0', 7...",Y
3,336,Test User 301 | Whats That Smell PE L3 | Hindi...,"[['Default Layout', 4], [1, 16561], ['card0', ...",Y
4,335,Test User 301 | A Helping Hand BL L4 | Hindi -...,"[['Default Layout', 2], [1, 8334], ['card1', 8...",Y


In [3]:
def break_user(x):
    xs1 = x.split('|')
    
    if len(xs1) > 1:
        return xs1[0]
        
def breakdown_level(x):
    xs1 = x.split('|')
    if len(xs1) > 1:
        xs2 = xs1[1].split(' ')
        xs2 = list(filter(None, xs2))
        if len(xs2) >= 3:
            if re.match(r"L\d", xs2[-1]):
                return xs2[-1]
    return ''

def breakdown_type(x):
    xs1 = x.split('|')
    if len(xs1) > 1:
        xs2 = xs1[1].split(' ')
        xs2 = list(filter(None, xs2))
        if len(xs2) >= 3:
            if re.match(r"L\d", xs2[-1]):
                return xs2[-2]
    return ''

def breakdown_story(x):
    xs1 = x.split('|')
    if len(xs1) > 1:
        xs2 = xs1[1].split(' ')
        xs2 = list(filter(None, xs2))
        if len(xs2) >= 3:
            if re.match(r"L\d", xs2[-1]):
                return ' '.join(xs2[:-2])
    return ''

In [5]:
trans_pairs['user'] = trans_pairs['translatedSet'].apply(lambda x: break_user(x))
trans_pairs['type'] = trans_pairs['translatedSet'].apply(lambda x: breakdown_type(x))
trans_pairs['level'] = trans_pairs['translatedSet'].apply(lambda x: breakdown_level(x))
trans_pairs['story'] = trans_pairs['translatedSet'].apply(lambda x: breakdown_story(x))

keystrokes['user'] = keystrokes['translatedSet'].apply(lambda x: break_user(x))
keystrokes['type'] = keystrokes['translatedSet'].apply(lambda x: breakdown_type(x))
keystrokes['level'] = keystrokes['translatedSet'].apply(lambda x: breakdown_level(x))
keystrokes['story'] = keystrokes['translatedSet'].apply(lambda x: breakdown_story(x))
keystrokes['total_keys'] = keystrokes['keystrokeseries'].apply(lambda x: len(ast.literal_eval(x)))
keystrokes['total_time'] = keystrokes['keystrokeseries'].apply(lambda x: ast.literal_eval(x)[-1][1])

In [6]:
list_users = list(trans_pairs['user'].unique())
list_stories = list(trans_pairs['story'].unique())

In [7]:
def actions_to_dict_keystrokes(actions):
    keydict = {}
    for a in range(len(actions)):
        if type(actions[a][0]) == str:
            if actions[a][0] not in keydict:
                keydict[actions[a][0]] = {}
            sub = 1
            while True:
                if a + sub < len(actions):
                    if actions[a][1] not in keydict[actions[a][0]]:
                        keydict[actions[a][0]][actions[a][1]] = []
                    if type(actions[a + sub][0]) == str:
                        break
                    keydict[actions[a][0]][actions[a][1]].append(actions[a + sub])
                    sub += 1
                else:
                    break
    return keydict

In [44]:
actions_to_dict_keystrokes([
  [
    "Default Layout",
    0
  ],
  [
    1,
    1134
  ],
  [
    "card0",
    1143
  ],
  [
    1,
    2530
  ],
  [
    "card1",
    2542
  ],
  [
    1,
    3386
  ],
  [
    "card2",
    3395
  ],
  [
    1,
    4394
  ],
  [
    "card3",
    4400
  ],
  [
    1,
    5050
  ],
  [
    "card4",
    5056
  ],
  [
    1,
    5714
  ],
  [
    "card5",
    5718
  ],
  [
    1,
    7050
  ],
  [
    "card6",
    7056
  ],
  [
    1,
    54019
  ],
  [
    "card7",
    54033
  ],
  [
    1,
    55274
  ],
  [
    "card8",
    55279
  ],
  [
    1,
    57402
  ],
  [
    "card9",
    57409
  ],
  [
    "card9",
    424954
  ],
  [
    1,
    424962
  ],
  [
    "card10",
    424971
  ],
  [
    1,
    426001
  ],
  [
    "card11",
    426008
  ],
  [
    1,
    426888
  ],
  [
    "card12",
    426895
  ],
  [
    1,
    427761
  ],
  [
    "card13",
    427767
  ],
  [
    1,
    428257
  ],
  [
    "card14",
    428263
  ],
  [
    1,
    429416
  ],
  [
    "card16",
    429422
  ],
  [
    1,
    431104
  ],
  [
    "card27",
    431113
  ],
  [
    1,
    432989
  ],
  [
    "card39",
    432996
  ],
  [
    1,
    433672
  ],
  [
    "card40",
    433679
  ]
])

{'Default Layout': {0: [[1, 1134]]},
 'card0': {1143: [[1, 2530]]},
 'card1': {2542: [[1, 3386]]},
 'card2': {3395: [[1, 4394]]},
 'card3': {4400: [[1, 5050]]},
 'card4': {5056: [[1, 5714]]},
 'card5': {5718: [[1, 7050]]},
 'card6': {7056: [[1, 54019]]},
 'card7': {54033: [[1, 55274]]},
 'card8': {55279: [[1, 57402]]},
 'card9': {57409: [], 424954: [[1, 424962]]},
 'card10': {424971: [[1, 426001]]},
 'card11': {426008: [[1, 426888]]},
 'card12': {426895: [[1, 427761]]},
 'card13': {427767: [[1, 428257]]},
 'card14': {428263: [[1, 429416]]},
 'card16': {429422: [[1, 431104]]},
 'card27': {431113: [[1, 432989]]},
 'card39': {432996: [[1, 433672]]},
 'card40': {}}

In [11]:
keystrokes['keydict'] = keystrokes['keystrokeseries'].apply(lambda x: actions_to_dict_keystrokes(ast.literal_eval(x)))

In [37]:
for user in list_users:
    for story in list_stories:
        try:
            count = 0
            df = trans_pairs[(trans_pairs['user'].str.contains(user)) & (trans_pairs['story'].str.contains(story))]
            series = keystrokes[(keystrokes['user'].str.contains(user)) & keystrokes['story'].str.contains(story)]['keydict'].values[0]
            count = 0
            for index, row in df.iloc[::-1].iterrows():
                card = 'card' +  str(count)
                if card in series:
                    trans_pairs.loc[trans_pairs['id'] == row['id'], "repeat"] = len(series[card])
                    trans_pairs.loc[trans_pairs['id'] == row['id'], "actions"] = str(series[card])
                else:
                    trans_pairs.loc[trans_pairs['id'] == row['id'], "repeat"] = -1
                    trans_pairs.loc[trans_pairs['id'] == row['id'], "actions"] = '{}'
                count += 1
        except Exception as e:
            print(e)

index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument must be string or compiled pattern
first argument mus

In [None]:
trans_pairs.head()

In [42]:
trans_pairs[(trans_pairs['user'].str.contains('User 401')) & (trans_pairs['story'].str.contains('Turtle Story'))]

Unnamed: 0,id,translatedSet,src,tgt,user,type,level,story,repeat,actions
0,8698,Test User 401 | Turtle Story PE L4 | Hindi -> ...,कछुआ,Tortoise.,Test User 401,PE,L4,Turtle Story,-1.0,{}
1,8697,Test User 401 | Turtle Story PE L4 | Hindi -> ...,सुबह जब मेरी आँख खुली तो आसमान में सूरज चम - च...,"When my eyes opened in the morning, the sun wa...",Test User 401,PE,L4,Turtle Story,-1.0,{}
2,8696,Test User 401 | Turtle Story PE L4 | Hindi -> ...,हैरान होकर मैंने अपने आसपास देखा,"Surprised, I looked around myself.",Test User 401,PE,L4,Turtle Story,-1.0,{}
3,8695,Test User 401 | Turtle Story PE L4 | Hindi -> ...,पहले तो चकरा गई कि मैं हूँ कहाँ फिर एकदम याद आ...,"First I was wondering where I was, then I reme...",Test User 401,PE,L4,Turtle Story,-1.0,{}
4,8694,Test User 401 | Turtle Story PE L4 | Hindi -> ...,"एक असीम नीले संसार के बीचोंबीच , जिसे लोग सागर...","In the middle of an infinite blue world, which...",Test User 401,PE,L4,Turtle Story,-1.0,{}
5,8693,Test User 401 | Turtle Story PE L4 | Hindi -> ...,"चारों ओर नीला , हरा पानी लहरा रहा था और सूरज क...","Blue, green water was sparkling all around and...",Test User 401,PE,L4,Turtle Story,-1.0,{}
6,8692,Test User 401 | Turtle Story PE L4 | Hindi -> ...,मैनें अपने मीनपक्षों ( फ़्लिपर ) १ को पेट त¬ले ...,I collected my meenpaksh (flipper) 1 in my sto...,Test User 401,PE,L4,Turtle Story,-1.0,{}
7,8691,Test User 401 | Turtle Story PE L4 | Hindi -> ...,मुझे नहीं मालूम था कि मैं कहाँ हूँ,I didn't know where I was.,Test User 401,PE,L4,Turtle Story,-1.0,{}
8,8690,Test User 401 | Turtle Story PE L4 | Hindi -> ...,लेकिन जब आसपास नर्म - नर्म धूप बिछी हो और पेटप...,But when there is soft sunshine around and the...,Test User 401,PE,L4,Turtle Story,-1.0,{}
9,8689,Test User 401 | Turtle Story PE L4 | Hindi -> ...,गुनगुनी धूप में मैं भी गुनगुनी हो गई,I also became warm in the warm sun.,Test User 401,PE,L4,Turtle Story,-1.0,{}


In [47]:
(keystrokes[(keystrokes['user'].str.contains('User 401')) & (keystrokes['story'].str.contains('Turtle Story'))]["keydict"].values[0])

{'Default Layout': {1: [[1, 6392]]},
 'card4': {6427: [[1, 10333], [1, 11403]]},
 'card5': {11410: [[1, 16206]]},
 'card8': {16225: [[1, 20678]], 59762: [[1, 64436]]},
 'card7': {20693: [[1, 21932]], 64451: [[1, 65987]]},
 'card9': {21947: [[1, 24285]], 58328: [[1, 59748]]},
 'card10': {24300: [[1, 27501]], 56731: [[1, 58307]]},
 'card12': {27515: [[1, 29492], [1, 30308]], 54667: [[1, 56716]]},
 'card13': {30315: [[1, 33327], [1, 34269]], 51687: [[1, 54651]]},
 'card15': {34275: [[1, 37916]]},
 'card14': {37931: [[1, 41004]]},
 'card16': {41018: [[1, 51670]]},
 'card6': {66004: [[1, 76445],
   [46, 78507],
   [1, 80242],
   [84, 83216],
   [72, 83488],
   [69, 83632],
   [32, 83712],
   [82, 84824],
   [69, 85168],
   [69, 85336],
   [70, 85592],
   [32, 85728],
   [87, 85984],
   [65, 86264],
   [83, 86577],
   [32, 86816],
   [70, 89488],
   [65, 89712],
   [73, 89920],
   [82, 90160],
   [89, 90248],
   [32, 90504],
   [87, 90688],
   [73, 90848],
   [79, 90857],
   [82, 90968],
   