# Data-analysis, group 3
Our subjects: 3,5,11,17,19,25,31,33

In [41]:
# imports and stuff
from dataclasses import dataclass
from typing import List, Optional
import matplotlib.pyplot as plt
import statistics as stat
import pandas as pd
import numpy as np
import csv


## 1. Reading the data

In [42]:
# constants and helper variables
subjects = [3, 5, 11, 17, 19, 25, 31, 33]
subjects_formatted = ['s' + str(s) for s in subjects]

In [43]:
@dataclass
class Sample:
    """Class for a sample from the data"""
    sid: str
    known: bool
    xi: Optional[List[float]] = None
    yi: Optional[List[float]] = None


In [44]:
def parse_line(line) -> List:
    return line.split(',')


def parse_classes(lines) -> List[Sample]:
    parsed_lines = [parse_line(line) for line in lines]
    return [Sample(line[0], line[1], line[2::2], line[3::2]) for line in parsed_lines]


In [45]:
# reading the lines
with open('data/train.csv') as f:
    lines = f.readlines()

    our_lines = [line for line in lines
                 if line.split(',')[0]
                 in subjects_formatted]

    samples = parse_classes(our_lines)

    sample_dict = {}

    for sample in samples:
        sample_dict.setdefault(sample.sid, []).append(sample)

    for key, val in sample_dict.items():
        print(key, len(val))


s5 34
s17 23
s19 14
s31 33
s3 29
s11 10
s25 17
s33 33


## 2. Fixation detection algorithm

In [46]:
def get_dispersion(x, y):
    """Calculate dispersion, idea from: https://github.com/ecekt/eyegaze

    Args:
        x (numpy array): x coordinates
        y (numpy array): y coordinates

    Returns:
        float: amount of dispersion
    """
    x = np.array(x)
    y = np.array(y)

    return ((np.max(x.astype(float)) - np.min(x.astype(float)))
            + (np.max(y.astype(float)) - np.min(y.astype(float))))/2


In [47]:
def idt(x, y, t, max_disp=10, min_dur=50000):

    # TODO centroidin ja fiksaation pituuden palauttaminen

    # helper variables
    fixations = []
    i = 0
    last = 0

    # loop points
    while(i < len(x)):

        # reset variables
        dispersion = 0.0
        window = []

        # add first timepoint
        window.append(t[last])

        last_time = t[last]
        start_time = last_time
        last = last + 1

        y_fixations = []
        x_fixations = []

        # Initialize window over first points to cover the duration threshold
        while((start_time + min_dur) >= last_time and last + 1 < len(x)):
            x_fixations.append(x[last])
            y_fixations.append(y[last])

            last = last + 1
            last_time = t[last]

        # If dispersion of window points <= threshold  
        if len(x_fixations) > 0 and get_dispersion(x_fixations, y_fixations) <= max_disp:

            # Add additional points to the window until dispersion > threshold  
            while (dispersion <= max_disp and last + 1 < len(x)):
                x_fixations.append(x[last])
                y_fixations.append(y[last])

                dispersion = get_dispersion(x_fixations, y_fixations)

                last = last + 1
                last_time = t[last]

            # add window to the fixations :D
            window.append(last_time)
            fixations.append(window)

            # Remove window points from points  
            i = last
        else:
            #print("dispersion liikaa lol", last, i)
            # Remove first point from points 
            i = i + 1

    # return fixation points
    return fixations


## 3. Data-analysis

In [48]:
def mfd(fixations):
    # TODO
    return sum(fixations['duration']) / len(fixations)


def msa(fixations):
    # TODO
    pass


## 4. CSV-file from the analysed data

In [49]:
header = [
    'subject_id',
    'MFD_true',
    'MFD_SD_true',
    'MFD_false',
    'MFD_SD_false',
    'MSA_true',
    'MSA_SD_true',
    'MSA_false',
    'MSA_SD_false',
    'MFD_overall',
    'MFD_overall_SD',
    'MSA_overall',
    'MSA_overall_SD',
]


In [50]:
# kirjoitellaan csv-tiedosto
with open("./results_group_3.csv", "w", encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow([i.lower().strip() for i in header])

    # TODO

## 5. Visualization and plots