![CMCC](http://cmcc.ufabc.edu.br/images/logo_site.jpg)
# **Classificação de Eventos em Logs de Interação Capturados por Eye Tracker**

### Leandro Marega Ferreira Otani - RA 131710240
#### Universidade Federal do ABC - Programa de Pós-graduação em Ciência da Computação - Inteligência na Web e Big-Data

### O relatório e implementação individual notebook podem ser conferidos na URL:

###  https://github.com/leandro-otani/BIGDATA2017/tree/master/atividades/projeto-final 

### ** Implementação  original - Algoritmo I-VT **

In [24]:
import math
import time
from operator import add

def euclideanDistance(coordinateA, coordinateB):
    return (math.sqrt(math.pow((coordinateB[1] - coordinateA[1]), 2) +
        math.pow((coordinateB[2] - coordinateA[2]), 2)))

def iterativeVelocityThreshold(dataset, threshold):
    mappedVelocities = []
    for index, point in enumerate(dataset):
        if index < (len(dataset) - 1):
            nextPoint = dataset[index+1]
            mappedVelocities.append((point, euclideanDistance(nextPoint, point) / abs(point[0] - nextPoint[0])))

    classifiedVelocities = []
    for index, point in enumerate(mappedVelocities):
        label = "Saccade" if point[1] > threshold else "Fixation"
        classifiedVelocities.append((point, label))

    count = -1
    colapsedFixations = []
    saccadeHappened = False

    for elem in classifiedVelocities:
        if elem[1] == "Saccade":
            saccadeHappened = True
        else:
            if len(colapsedFixations) == 0 or saccadeHappened:
                count += 1
                saccadeHappened = False
                colapsedFixations.append([])
                colapsedFixations[count] = []
            colapsedFixations[count].append(elem[0])

    fixations = []
    for group in colapsedFixations:
        centroid = [0, 0, 0]
        for point in group: 
            centroid[0] = centroid[0] + point[0][0]
            centroid[1] = centroid[1] + point[0][1]
            centroid[2] = centroid[2] + point[0][2]
            
        centroid[0] = centroid[0]/len(group)
        centroid[1] = centroid[1]/len(group)
        centroid[2] = centroid[2]/len(group)
        fixations.append(centroid)
    return fixations

print (iterativeVelocityThreshold([
    (1512264894, 30, 27),
    (1512264904, 35, 27),
    (1512264914, 39, 30),
    (1512264924, 30, 31),
    (1512264934, 40, 35),
    (1512264944, 300, 27),
    (1512264954, 315, 27),
    (1512264964, 320, 27),
    (1512264974, 317, 27)
], 0.9))

[[1512264899L, 32, 27], [1512264959L, 317, 27]]


### ** Implementação paralelizada - Algoritmo I-VT **

In [25]:
import math
import time  
from operator import add
sc = SparkContext.getOrCreate()
def euclideanDistance(baseTuple):
    coordinateA = baseTuple[0]
    coordinateB = baseTuple[1]

    return (math.sqrt(math.pow((coordinateB[1] - coordinateA[1]), 2) +
        math.pow((coordinateB[2] - coordinateA[2]), 2)))

def parallelizedVelocityThreshold(dataset, threshold):
    firstSetRDD = sc.parallelize(dataset[:-1])
    secondSetRDD = sc.parallelize(dataset[1:])
    zippedRDD = firstSetRDD.zip(secondSetRDD)
    
    mappedVelocities = zippedRDD.map(lambda x: (x[0],(euclideanDistance(x) / abs(x[1][0] - x[0][0]))))
    
    classifiedVelocities =  (mappedVelocities
                                .map(lambda x: (x[0], "Saccade" if x[1] > threshold else "Fixation"))
                                .sortBy(lambda x: x[0][0])
                                .collect())

    count = -1
    colapsedFixations = []
    saccadeHappened = False

    for elem in classifiedVelocities:
        if elem[1] == "Saccade":
            saccadeHappened = True
        else:
            if len(colapsedFixations) == 0 or saccadeHappened:
                count += 1
                saccadeHappened = False
            #print elem
            colapsedFixations.append((count,elem[0]))
    
    colapsedRDD = sc.parallelize(colapsedFixations, 4)
    countRDD = sc.parallelize(colapsedFixations, 4).map(lambda x: (x[0], 1)).reduceByKey(add)

    sumCentroids = colapsedRDD.reduceByKey(lambda (tsa, xa, ya),(tsb, xb, yb): ((tsa+tsb, xa+xb, ya+yb)))
    zippedCentroids = sumCentroids.join(countRDD)
    centroids = zippedCentroids.map(lambda (k,v): (k, v[0][0]/v[1], v[0][1]/v[1], v[0][2]/v[1] ))
    return centroids.collect()

print parallelizedVelocityThreshold([
    (1512264894, 30, 27),
    (1512264904, 35, 27),
    (1512264914, 39, 30),
    (1512264924, 30, 31),
    (1512264934, 40, 35),
    (1512264944, 300, 27),
    (1512264954, 315, 27),
    (1512264964, 320, 27),
    (1512264974, 317, 27)
], 0.9)

[(0, 1512264899L, 32, 27), (1, 1512264959L, 317, 27)]
