# Dragging gestures: Analysis

In [None]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
import sklearn.metrics as metrics
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os, shutil
import sympy as sp

# before attemping to import these, two, make sure you upload them to the folder
# where this ipynb is located !!!!
import display_properties
import curve_functions

# for linear regression summary
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.diagnostic import normal_ad
from scipy.stats import shapiro as normal_shapiro

from analysis import Analysis
from analysisargs import AnalysisArgs, Logs
import participants
from statutils import isDataNormallyDistributed

In [None]:
args = AnalysisArgs()
args.centralTendency = 'mean'
logs = Logs()
AN = Analysis(args=args, logs=logs)
PA = participants.ParticipantAnalysis(logs=logs)

In [None]:
# prepare folders and erase all figures
# only remove folders which are going to be changed by executing this script

figuresFoldername = 'figures'
drawingTimeHistogramsFoldername = 'drawing_time_histograms'
linearRegressionFoldername = 'linear_regressions'
takeScreenshots = False
useCroatian = True

drawingTimeHistogramsFolderPath = figuresFoldername + "/" + drawingTimeHistogramsFoldername + "/"
linearRegressionsFolderPath = figuresFoldername + "/" + linearRegressionFoldername \
    + "/" + args.iodModelName \
    + "/" + args.centralTendency + "/"
participantDataFolderPath = figuresFoldername + "/participants/"

def saveFigure(figurename):
    figurename = figurename.replace(' ', '_').replace('\n', '')
    if(takeScreenshots is True):
        plt.savefig(figurename)

def resetFigureFolder(foldername):
    if(os.path.exists(foldername)):
        shutil.rmtree(foldername)
    os.makedirs(foldername)

if(takeScreenshots is True):
    foldersToBeChanged = [
        drawingTimeHistogramsFolderPath,
        linearRegressionsFolderPath,
        participantDataFolderPath
    ]
    
    for foldername in foldersToBeChanged:
        print(foldername)
        resetFigureFolder(foldername)    

def translateWord(word):
    retval = ""
    if(word == "Cartesian"):
        retval = "Kartezijev"
    elif(word == "Polar"):
        retval = "Polarni"
    elif word == "Mouse":
        retval = "Miš"
    elif word == "Graphic tablet":
        retval = "Grafički tablet"
    elif word == "median":
        retval = "medijan"
    elif word == "mean":
        retval = "Arit.sred."
    else:
        retval = word + " - neprevedeno"
    return retval
        
def translate(words):
    if(useCroatian is False):
        return words
    
    if(type(words) is not list):
        return translateWord(words)
    
    retval = []
    for word in words:
            retval.append(translateWord(word))
    return retval

# Linear regression

In [None]:
for projections in [["Cartesian"], ["Polar"], args.PROJECTIONS]:
    for device in args.DEVICES:
        AN.trainRegressionModelThenValidate(projections, device)
        
        #reg2 = getRegressionModel(projections, [0, 1], device)
        # test the residuals as shown here: https://jeffmacaluso.github.io/post/LinearRegressionAssumptions/

# Participant data

In [None]:
PA.get_age_histogram(); plt.show()
PA.get_handedness_histogram(); plt.show()
PA.get_device_experience()

In [None]:
# this line below will give us, ['Cartesian', 'Polar'], ['Cartesian'], ['Polar']
__tmp_projections = [*[[p] for p in args.PROJECTIONS], args.PROJECTIONS]
figure, axes = plt.subplots(len(args.DEVICES), len(__tmp_projections))
figure.set_size_inches(15, 7)

for k, device in enumerate(args.DEVICES):
    for j, proj in enumerate(__tmp_projections):
        dt_means = []
        for partname in logs.df['Participant name'].unique():
            mean, std = AN.dt.get_mean_drawtime_for_participant(
                participantname=partname,
                device=device,
                projections=proj
            )
            dt_means.append(mean)
        ax = axes[k][j]
        ax.hist(dt_means, color='skyblue', bins=15, edgecolor="navy")
        pValueNormDist = isDataNormallyDistributed(np.array(dt_means))
        title = "Average drawing time for \n Device %s, Projection(s) %s" %(device, proj)
        ax.set_ylabel("Count")
        ax.set_xlabel("Average drawing time [seconds]\nNorm dist p_value=%.5f" % (pValueNormDist))
        ax.set_title(title)
        # this lim is hard-coded. if I ever have time, I should make this soft-coded :)
        ax.set_ylim([0, 7])
        ax.set_xlim([10, AN.MAX_AVG_DRAW_TIME])

figure.tight_layout(pad=2)
plt.show()

## Error approximation

In [None]:
# TODO implement

## Throughput calculation

In [None]:
sortedParticipants = sorted(participants)

fp = curve_functions.FunctionProvider()

x = sp.Symbol("x")

# array of ALL throughputs for mouse, i.e. TPs from ALL participants
allTPsForMouse = []
allTPsForGraphicTablet = []

## NOTE::: look in calculate_integrals.y
# this throughput calculation is not exactly correct :)
for i in range(len(sortedParticipants)):
    participant = sortedParticipants[i]
    # print(participant, end='')
    # this will have two values; one for each device
    TPsForThisParticipant = {}
    
    for device in DEVICES:
        # the average throughput for this participant and this device
        # each user produces two throughputs: one for each device
        TPsForThisDevice = []
        for experimentMode in TEST_MODES:
            filename = "../Results_backup%s/%s/%s" %(experimentMode, participant, device)
            files = os.listdir(filename)
            
            # each curve was passed through twice on each device,
            # so for example, curve with ID=1 and proj=2, was passed through twice on Mouse,
            # and then twice on graphic tablet.
            # We must find the average st dev for those two passes
            
            # 4= [0,1,2,3], testmode + projection combo
            errorStDevForFunc = np.zeros((len(FUNC_IDS), 4))
            
            for file in sorted(files):

                funcId = int(file[3])
                projtmp = file[10]
                # we are searching for an entry in the logs which can tell us
                # the average MT for user
                # and the st dev of error rate for user.
                # from the stdev of error rate, we will caluclate effective width of target (W_e)
                # and from that we'll get effective index of difficulty - ID_e
                # when we divide ID_e by the MT of the user, we get the user's throughput for a single curve
                # and then we find the mean of all throughputs for this user, which we
                # then use for t-test to compare the two pointing devices
                
                f = open(filename + "/" + file)
                    
                # find the stdev of the error by dividing the sum of errors with the square root of
                # the number of points (this is the stdev formula)
                pointsDrawn = [pointDrawn.replace('\n', '') for pointDrawn in f.readlines()]
                f.close()

                test = int(projtmp)        
                difficulty = int(int(funcId) / 2)
                task = int(funcId) % 2
                real_func = fp.provide_function(difficulty, task, test)
                real_func = sp.lambdify(x, real_func)

                # ALL of the error the user had made on this specific curve
                allErrorVals = []

                for pointDrawn in pointsDrawn:
                    x_coord = float(pointDrawn.split()[0])
                    y_coord = float(pointDrawn.split()[1])
                    real_y = real_func(x_coord)

                    y_diff = abs(y_coord - real_y)

                    if(projtmp in ["2", "3"]):
                        # polar projection, erroval should be multiplied with the polar unit length
                        # so that we get effective width in inches
                        y_diff *= display_properties.POLAR_UNIT_LENGTH_IN_INCH
                    else:
                        y_diff *= display_properties.CARTESIAN_UNIT_LENGTH_IN_INCH

                    allErrorVals.append(y_diff)
                # print(np.mean(allErrorVals), np.std(allErrorVals))
                errorVal = np.std(allErrorVals)
                # print(funcId, int(projtmp))
                errorStDevForFunc[funcId][int(projtmp)] += errorVal
            # print(errorStDevForFunc)
                
            for projtmp in [0, 1, 2, 3]:
                for funcId in FUNC_IDS: 
                    
                    projection = "Cartesian"
                    if(projtmp in [2, 3]):
                        projection="Polar"
                        
                    # filter out by projection, Cartesian or Polar
                    participantMovement = df[df['Function projection'] == projection]
                    # filter out by function ID
                    participantMovement = participantMovement[participantMovement['Function ID'] == funcId]
                    # filter out by test (experiment mode)
                    participantMovement = participantMovement[participantMovement['Test mode'] == experimentMode]
                    participantMovement = participantMovement[participantMovement['Participant name'] == participant]
                    # filter out by device
                    participantMovement = participantMovement[participantMovement['Device'] == device]
                    
                    # this is from the effective target width (Fitts law), a true-tried-tested formula
                    # we have to divide it by 2 because there are two passes through this curve
                    # and we're looking for the mean of the standard deviation
                    W_e = 4.133 * errorStDevForFunc[funcId][projtmp] / 2
                    if(W_e <= 0):
                        continue

                    # calculate effective ID_e for this W_e
                    kappa = getIodForFunc(projection, experimentMode, funcId, 'kappa')

                    length = getIodForFunc(projection, experimentMode, funcId, 'length')

                    # Id_e = np.log2(length / W_e + kappa + 1)
                    Id_e = length / W_e + kappa

                    # movement time
                    MT = np.mean(participantMovement["Drawing time"].values)

                    # throughput for this curve and this specific user
                    TP = Id_e / MT
                    TPsForThisDevice.append(TP)
                # print(participant, projection, "(%s)" %projtmp, experimentMode, funcId, device, errorVal)
        # this is where the loop for each device ends --> we have to calculate
        # the avg throughput for this participant and this device

        TPsForThisParticipant[device] = np.mean(TPsForThisDevice)
    print(participant, TPsForThisParticipant["Mouse"], TPsForThisParticipant["Graphic tablet"], sep=separator)
    # 3/0
    allTPsForMouse.append(TPsForThisParticipant["Mouse"])
    allTPsForGraphicTablet.append(TPsForThisParticipant["Graphic tablet"])


In [None]:
# now we should test whether the TPs are normally distributed

for i, tps in enumerate([allTPsForMouse, allTPsForGraphicTablet]):
    pValueNormDist = isDataNormallyDistributed(np.array(tps))
    device = DEVICES[i]
    
    plt.figure(figsize=(8,5))
    plt.hist(tps, color="lightgreen", edgecolor="green")
    title = "Average throughput distribution, " + device
    xlabel = "Average throughput, [bit/s]\nNorm dist p_value=%.5f" % (pValueNormDist)
    ylabel = "Participant count"

    if useCroatian is True:
        title = "Histogram prosječne propusnosti, " + translate(device)
        xlabel = "Propusnost [bit/s]"
        ylabel = "Broj ispitanika"

    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # these limits are hard-coded, and they're here to make the histograms have the same x 
    # and y ranges on both plots
    plt.xlim([3, 9.5])
    plt.ylim([0, 5.5])
    
    saveFigure(participantDataFolderPath + "Throughput_dist" +  device)
    plt.show()
