# Data-analysis, group 3
Our subjects: 3,5,11,17,19,25,31,33

In [26]:
# imports and stuff
from dataclasses import dataclass
from typing import List, Optional
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Reading the data

In [27]:
# constants and helper variables
subjects = [3, 5, 11, 17, 19, 25, 31, 33]
subjects_formatted = ['s' + str(s) for s in subjects]

In [28]:
@dataclass
class Sample:
    """Class for a sample from the data"""
    sid: str
    known: bool
    xi: Optional[List[float]] = None
    yi: Optional[List[float]] = None


In [29]:
def parse_line(line):
    return line.split(',')


def parse_classes(lines):
    parsed_lines = [parse_line(line) for line in lines]
    return [Sample(line[0], line[1], line[2::2], line[3::2]) for line in parsed_lines]


In [30]:
# reading the lines
with open('data/train.csv') as f:
    lines = f.readlines()

    our_lines = [line for line in lines
                 if line.split(',')[0]
                 in subjects_formatted]

    samples = parse_classes(our_lines)


## Fixation detection algorithm

In [31]:
def get_dispersion(x, y):
    """Calculate dispersion somehow, idea from: https://github.com/ecekt/eyegaze

    Args:
        x (numpy array): x coordinates
        y (numpy array): y coordinates

    Returns:
        float: amount of dispersion
    """
    x = np.array(x)
    y = np.array(y)

    argxmin = np.min(x.astype(float))
    argxmax = np.max(x.astype(float))

    argymin = np.min(y.astype(float))
    argymax = np.max(y.astype(float))

    return ((argxmax - argxmin) + (argymax - argymin))/2

In [32]:
def our_idt(x, y, t, max_disp=10, min_dur=50000):

    # TODO we dont have time variable in our dataset?

    # helper variables
    fixations = []
    i = 0
    last = 0

    # loop points
    while(i < len(x)):

        # reset variables
        dispersion = 0.0
        window = []

        # add first timepoint
        window.append(t[last])

        last_time = t[last]
        start_time = last_time
        last = last + 1

        y_fixations = []
        x_fixations = []

        # Initialize window over first points to cover the duration threshold
        while((start_time + min_dur) >= last_time and last + 1 < len(x)):
            x_fixations.append(x[last])
            y_fixations.append(y[last])

            last = last + 1
            last_time = t[last]

        # If dispersion of window points <= threshold  
        if len(x_fixations) > 0 and get_dispersion(x_fixations, y_fixations) <= max_disp:

            # Add additional points to the window until dispersion > threshold  
            while (dispersion <= max_disp and last + 1 < len(x)):
                x_fixations.append(x[last])
                y_fixations.append(y[last])

                dispersion = get_dispersion(x_fixations, y_fixations)

                last = last + 1
                last_time = t[last]

            # add window to the fixations :D
            window.append(last_time)
            fixations.append(window)

            # Remove window points from points  
            i = last
        else:
            #print("dispersion liikaa lol", last, i)
            # Remove first point from points 
            i = i + 1

    # return fixation points
    return fixations


## Visualization and plots