In [188]:
from scipy import stats
import numpy as np
import pandas as pd
from statsmodels.stats.multicomp import pairwise_tukeyhsd, MultiComparison
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats as ss

from numpy.linalg import norm

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import HTML,display

from ast import literal_eval as make_tuple

from os import listdir
from os.path import isfile, join, splitext, basename
import csv
import itertools
import glob

from util import csv_length

%matplotlib inline

In [124]:
data_path = './data/deviation/'
t1_list = glob.glob(data_path + '*_t1.csv')
t2_list = glob.glob(data_path + '*_t2.csv')
com_list = glob.glob(data_path + '*_com.csv')

t1_num_list = [csv_length(data) for data in t1_list]
t2_num_list = [csv_length(data) for data in t2_list]
com_num_list = [csv_length(data) for data in com_list]

In [220]:
def ingest(data, nums):
    tmp = pd.read_csv(data, names = [i for i in range(nums)])
    tmp_sers = [tmp[i].dropna() for i in range(nums)]
    
    tmp_chkpoints = [ser[:ser[ser.str.contains('Score')].index[0]] for ser in tmp_sers]
    tmp_datas = [ser[ser[ser.str.contains('Score')].index[0]+1:] for ser in tmp_sers]
    tmp_scores = [ser[ser[ser.str.contains('Score')].index[0]] for ser in tmp_sers]
    
    return tmp_chkpoints, tmp_datas, tmp_scores

def make_arrays(sers):
    ret = [ser.apply(lambda x: np.array(make_tuple(x))) for ser in sers]
    return ret

def refine(sers):
    ret = [ser[(ser.apply(lambda x: x[2]) >= 0) & (ser.apply(lambda x: x[2]) <= 600)].reset_index(drop=True) for ser in sers]
    return ret
    

def find_dist(chkpoint, data):
    res_dict = {}
    res_list = []

    for i in range(len(chkpoint)):
        idx = min(range(len(data)), key=lambda j: abs(data[j][2]-chkpoint[i][2]))
        minidx = -5
        maxidx = 5
        if idx < 5:
            minidx = -idx
        if idx > len(data) - 5:
            maxidx = len(data)-idx
        new_idxs = np.arange(minidx, maxidx) + idx
        res_dict[i] = data.loc[new_idxs].values

    for key in res_dict.keys():
        for item in res_dict[key]:
            res_list.append(np.linalg.norm(chkpoint[key][:2] - item[:2]))
            
    return np.mean(res_list)

def score_to_list(scores):
    return [int(score.split(':')[-1]) for score in scores]

In [221]:
chkpoints, datas, scores = ingest(com_list[0], com_num_list[0])
score_to_list(scores)

[18, 15, 15]

In [222]:
def make_result(data, nums):
    chkpoints, datas, scores = ingest(data, nums)
    chkpoints, datas = make_arrays(chkpoints), make_arrays(datas)
    chkpoints, datas = refine(chkpoints), refine(datas)
    score_list = score_to_list(scores)
    result = [[find_dist(chkpoints[i], datas[i]), score_list[i]] for i in range(nums)]
    return result

In [201]:
make_result(com_list[1], com_num_list[1])

[[4.720565142896691, 22],
 [4.512724893935976, 25],
 [7.402794125129793, 18],
 [5.748244166216569, 22],
 [5.1855678724747465, 22]]

In [183]:
def ingest2(data, num):
    tmp = pd.read_csv(data, names=[i for i in range(num)])
    return [tmp[column].apply(lambda x: int(x.split(':')[-1])).values for column in tmp]

In [181]:
df = pd.read_csv(t2_list[0], names = [i for i in range(t2_num_list[0])])
for column in df:
    df[column].apply(lambda x: int(x.split(':')[-1])).values

In [184]:
t2_res_dict = {splitext(basename(data))[0]: ingest2(data, nums) for data, nums in zip(t2_list, t2_num_list)}

In [185]:
t2_res_dict

{'p10_t2': [array([40]), array([40]), array([40]), array([40]), array([40])],
 'p1_t2': [array([40]), array([40]), array([39]), array([40]), array([39])],
 'p2_t2': [array([39]), array([40]), array([39]), array([40]), array([40])],
 'p3_t2': [array([39]), array([40]), array([38]), array([39]), array([40])],
 'p5_t2': [array([40]), array([40]), array([38]), array([40]), array([39])],
 'p7_t2': [array([40]), array([40]), array([39]), array([40]), array([40])],
 'p8_t2': [array([39]), array([39]), array([40]), array([40]), array([37])],
 'p9_t2': [array([40]), array([40]), array([39]), array([40]), array([38])]}

In [157]:
t1_res_com_res_dict = {splitext(basename(data))[0]: make_result(data, nums) for data, nums in zip(t1_list, t1_num_list)}dict = {splitext(basename(data))[0]: make_result(data, nums) for data, nums in zip(t1_list, t1_num_list)}
com_res_dict = {splitext(basename(data))[0]: make_result(data, nums) for data, nums in zip(t1_list, t1_num_list)}

In [223]:
com_res_dict = {splitext(basename(data))[0]: make_result(data, nums) for data, nums in zip(com_list, com_num_list)}

In [225]:
com_res_dict

{'p10_com': [[9.39029579603402, 17],
  [5.999877499008621, 17],
  [5.261191895346471, 22],
  [5.828867367902421, 23],
  [7.294952566685448, 17]],
 'p2_com': [[6.317080742608382, 26],
  [6.317080742608382, 26],
  [6.951640596260343, 32],
  [6.658250494282986, 34],
  [7.329520876321976, 34]],
 'p3_com': [[7.721822485360722, 21],
  [7.729581016481224, 21],
  [7.013154255231468, 28],
  [9.093814463278202, 22],
  [6.822291857351103, 23]],
 'p5_com': [[6.124199442386478, 18],
  [9.121894838457965, 15],
  [7.893431062515279, 15]],
 'p7_com': [[8.602892796144536, 15],
  [5.594983713661252, 29],
  [3.9091281531062596, 18],
  [3.3534880548341865, 18],
  [4.536952668649013, 21]],
 'p9_com': [[5.9095928385394245, 29],
  [7.713701231072887, 16],
  [8.130159098872703, 23],
  [9.193557568017653, 20],
  [6.293319635808794, 23]],
 't8_com': [[4.720565142896691, 22],
  [4.512724893935976, 25],
  [7.402794125129793, 18],
  [5.748244166216569, 22],
  [5.1855678724747465, 22]]}

In [218]:
com_list

['./data/deviation/p5_com.csv',
 './data/deviation/t8_com.csv',
 './data/deviation/p3_com.csv',
 './data/deviation/p7_com.csv',
 './data/deviation/p9_com.csv',
 './data/deviation/p10_com.csv',
 './data/deviation/p2_com.csv']

In [204]:
com_res_dict['p2_']

dict_keys(['p9_t1', 'p10_t1', 'p2_t1', 'p7_t1', 'p3_t1', 'p5_t1', 'p8_t1'])

In [168]:
string ='p2_t1'
for i in range(len(res_dict[string])):
    df.loc[-1] = [string.split('_')[0][-1], 'T1', res_dict['p9_t1'][i], 0, i+1]
    df.index = df.index+1
    df = df.sort_index()
df

KeyError: 'p1_t1'

In [186]:
df = pd.DataFrame(columns=['Participants No.', 'Task', 'Distance', 'Score', 'Trial'])
df

Unnamed: 0,Participants No.,Task,Distance,Score,Trial


In [104]:
t1_num_list[0]

5

In [115]:
t1_list[0]

'./data/deviation/p9_t1.csv'

In [114]:
[find_dist(chkpoints[3], datas[3])]

[4.09827200975125]

In [95]:
t1_res_dict = {splitext(basename(data))[0]: make_result(data, nums) for data, nums in zip(t1_list, t1_num_list)}

[0        [-3.924615, 21.44665, 14.99292]
1            [2.809, 29.29745, 43.13107]
2         [4.339758, 22.59884, 72.92099]
3          [8.975751, 24.04423, 102.899]
4         [4.496086, 29.98404, 131.9466]
5        [-2.903792, 17.89462, 158.8745]
6         [7.385743, 15.51435, 187.8897]
7        [-5.648148, 17.99056, 215.8902]
8        [-6.063065, 21.10863, 246.4083]
9       [0.05689099, 21.02533, 276.6189]
10        [5.770912, 18.57292, 306.8619]
11       [-5.802941, 17.74261, 335.4375]
12        [-3.063105, 27.10756, 364.368]
13        [7.777572, 25.58026, 392.7149]
14        [-3.803408, 30.4225, 418.8282]
15       [-3.672556, 26.59524, 445.8516]
16        [-2.846588, 19.6266, 475.1123]
17       [-1.004245, 17.74133, 505.1772]
18    [-0.003203675, 26.69716, 533.5602]
19        [2.837936, 33.26009, 561.7103]
20          [6.534966, 28.78083, 591.41]
Name: 0, dtype: object, 0      [2.744862, 27.03364, 1.726585]
1       [6.36146, 24.03343, 31.96721]
2      [2.367036, 19.67836, 61.92709]


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]


TypeError: 'float' object is not subscriptable

In [84]:
t1_res_dict['p10_t1']

([0        [-5.54975, 29.94616, 13.97997]
  1       [0.3554421, 22.61518, 42.13364]
  2       [-1.785244, 25.97851, 70.34202]
  3       [-5.846662, 22.17454, 98.54153]
  4        [-4.98737, 17.01192, 128.1992]
  5        [0.1366266, 25.10795, 156.791]
  6         [4.619321, 26.4267, 184.3691]
  7        [3.253657, 18.33548, 213.3611]
  8        [1.034745, 19.98874, 243.4666]
  9        [4.565527, 27.24193, 273.2781]
  10       [2.760857, 28.42876, 302.0266]
  11      [-4.268908, 20.45674, 330.3269]
  12      [-3.654237, 18.27928, 361.1005]
  13     [-0.7668205, 16.94001, 391.7962]
  14    [-0.08830614, 20.05177, 421.4944]
  15      [-2.251411, 28.98203, 450.2191]
  16        [0.5683993, 28.025, 479.3131]
  17      [-2.744901, 18.68155, 505.9878]
  18       [9.843883, 26.57516, 532.8712]
  19       [5.182849, 31.77628, 562.9664]
  20      [-6.230176, 33.56803, 591.5527]
  Name: 0, dtype: object, 0      [-1.796792, 27.57187, 17.83473]
  1      [-2.926536, 25.27756, 48.34079]
  2       [4