In [1]:
%matplotlib
#%matplotlib inline
import os
import csv
import fnmatch
import numpy as np
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import math
import xlsxwriter
import scipy.stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels
from statsmodels.graphics.regressionplots import abline_plot
from statsmodels.stats.api import anova_lm
import seaborn as sns

pd.options.mode.use_inf_as_na = True

Using matplotlib backend: Qt5Agg


  from pandas.core import datetools


In [3]:
# csv file with the saved data
fileName = r"C:\DTU\Data\201805_HealthnRehab\data_summary.csv"

df = pd.read_csv(fileName, delimiter=',')

typingMechanismDictKeys = set(df['typing_mechanism'])

# create reference data frame for age bins and gender (0:Less than 30/Male, 1:Greater than 30/Female)
labels = ['age_bins', 'gender', 'typing_mechanism']
dataReference = [['Less than 30', 'Male', 'Dwell-Time'], ['Greater than 30', 'Female', 'Multi-Key Selection']]
df_refAgeGender = pd.DataFrame.from_records(dataReference, columns=labels)

# replace data in df in string or category
df = df.replace({'Less than 30': 0, 'Greater than 30': 1, 'Male': 0, 'Female': 1, 'DT' : 0, 'MS' : 1})

# Create dataframe without Nan values
df_woNaGenderAgeExperience = df.dropna(subset=['gender','age_bins', 'gaze_interaction_experience'], how='any') 

# Create a copy of the dataframe to avoid loc errors.
df_ToAnalyze = df_woNaGenderAgeExperience.copy()

# Convert columns of age_bins, typing_mechanism, gender into categorical columns
df_ToAnalyze['gender'] = pd.Categorical(df_woNaGenderAgeExperience.gender).codes
df_ToAnalyze['typing_mechanism'] = pd.Categorical(df_woNaGenderAgeExperience.typing_mechanism).codes
df_ToAnalyze['age_bins'] = pd.Categorical(df_woNaGenderAgeExperience.age_bins).codes
df_ToAnalyze['gaze_interaction_experience'] = pd.Categorical(df_woNaGenderAgeExperience.gaze_interaction_experience).codes


In [5]:
df

Unnamed: 0,timestamp,subject_name,typing_mechanism,age,age_bins,gender,profession,vision,gaze_interaction_experience,application_of_gaze_interaction_used_before,...,how_comfortable_was_it_during_the_task?,would_you_use_Optikey_or_recommend_it?,any_suggestions/comments?,dataLog_saved?,comments,typing_speed,ms_per_char,error_rate,read_text_events_frequency_ratio,read_text_events_time_ratio
0,5/15/2018 14:03,akt_MS,1,,,,,,,,...,0,,,Yes,,5.780715,2.075868,39.059692,0.134454,0.013809
1,5/15/2018 14:56,be_DT,0,40,1.0,1.0,business,Lenses,Never,,...,9,Maybe,,Yes,English speaker,1.878433,6.388303,66.27566,0.277778,0.0073
2,5/15/2018 10:21,jl_DT,0,55-60,1.0,1.0,,,,,...,0,,,Yes,,4.090573,2.933575,92.592593,0.052632,0.002964
3,5/15/2018 12:35,KEA_MS,1,20-25,0.0,0.0,Student,Normal,Never,,...,6,Yes,,Yes,,10.785736,1.112581,13.147724,0.056,0.003742
4,5/15/2018 11:11,lone_DT,0,50-55,1.0,1.0,nurse,Glasses,Never,,...,9,Yes,kom hurtigst mulig igang\n,Yes,,4.103884,2.924059,53.722222,0.205128,0.02469
5,5/15/2018 12:12,mcc_MS,1,20-25,0.0,0.0,student,Normal,Never,,...,6,Yes,,Yes,,8.778835,1.366924,40.37037,0.076087,0.005991
6,5/15/2018 14:44,MK_DT,0,45-50,1.0,1.0,Product Developer,Not wearing glasses during exp,Never,,...,9,Yes,A brilliant tool for thouse that need it.,Yes,,4.62549,2.59432,18.445341,0.092593,0.005869
7,5/15/2018 10:58,MT_MS,1,25,0.0,0.0,Programmer,Normal,Multiple times,"Virtual reality with eye tracking, Eye tracking",...,10,Yes,Backspace,No,,7.144239,1.679675,8.816964,0.185185,0.013547
8,5/15/2018 12:28,ok_MS,1,20-25,0.0,0.0,student,Normal,Never,,...,4,Maybe,,Yes,,11.135985,1.077588,8.210327,0.045161,0.003269
9,5/15/2018 12:51,pt_DT,0,27,0.0,0.0,Student - Health informatic KU,Lenses,Never,,...,3,,,Yes,,6.230274,1.926079,9.67665,0.058442,0.00491


### Gaze interaction experience

In [10]:
nSubj = len(df_ToAnalyze)

ax = df_ToAnalyze.gaze_interaction_experience.value_counts(sort = False).plot.pie(labels = ['Multiple times', 'Never', 'Once'], autopct='%1.1f%%')
ax.set_title('Gaze interaction experience distribution \n Total count = ' + str(nSubj))

Text(0.5,1,'Gaze interaction experience distribution \n Total count = 29')

### Gender

In [8]:
nSubj = len(df_ToAnalyze)

ax = df_ToAnalyze.gender.value_counts(sort = False).plot.pie(labels = ['Male', 'Female'], autopct='%1.1f%%')
ax.set_title('Gender distribution \n Total count = ' + str(nSubj))

Text(0.5,1,'Gender distribution \n Total count = 29')

In [7]:
# Mean Age
df.age_bins.value_counts

<bound method IndexOpsMixin.value_counts of 0     NaN
1     1.0
2     1.0
3     0.0
4     1.0
5     0.0
6     1.0
7     0.0
8     0.0
9     0.0
10    NaN
11    0.0
12    0.0
13    1.0
14    0.0
15    1.0
16    1.0
17    0.0
18    1.0
19    0.0
20    1.0
21    0.0
22    0.0
23    0.0
24    0.0
25    1.0
26    1.0
27    1.0
28    1.0
29    0.0
30    1.0
31    1.0
Name: age_bins, dtype: float64>