### Deep Learning in the Eye Tracking World 
#### the tutorial presented during ETRA 2021 (https://etra.acm.org/2021/acceptedtutorials.html)
#### the code downloaded from: https://github.com/kasprowski/etra2021
@author: pawel@kasprowski.pl


# Classification of "radio" samples using DecisionTree
## Script loads data from /radio folder and classifies all samples as:
- L - layman
- R - resident
- S - radiology specialist  


In [1]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.tree import DecisionTreeClassifier

## Download the dataset

In [2]:
import requests
import zipfile
r = requests.get("http://www.kasprowski.pl/etra2021/radio.zip", allow_redirects=True)
open('radio.zip', 'wb').write(r.content)
print("Downloaded radio.zip")
with zipfile.ZipFile("radio.zip","r") as zip_ref:
    zip_ref.extractall("radio")
print("Uzipped to /radio directory")

Downloaded radio.zip
Uzipped to /radio directory


## Load samples and labels

In [3]:
from keras_preprocessing.sequence import pad_sequences
import os
import numpy as np

def load_files(indir,sequence_len=1000):
    samples = []
    labels = []
    for file in os.listdir(indir):
        sample = np.genfromtxt(os.path.join(indir, file), delimiter=',', skip_header=1)
        if sample.shape[0]>100:
            samples.append(sample)
            labels.append(file[0])
    samples = np.array(samples,dtype=object)
    labels = np.array(labels)
    samples = pad_sequences(samples,sequence_len)
    samples = samples[:,:,1:] ##omit timestamp
    return samples,labels

samples,labels = load_files("radio",700)
print("shape = {}".format(samples.shape))


shape = (611, 700, 5)


## Classify and print results

In [4]:
    
#flatten
samples = samples.reshape(-1,samples.shape[1]*samples.shape[2])

#one-hot encoding
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
classesNum = labels.shape[1]
print ("Classes: {}".format(classesNum))
 
#split to training and test
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels, test_size=0.25, random_state=42)
   
model = DecisionTreeClassifier()
model.fit(trainSamples, trainLabels)    
treeResults = model.predict(testSamples)
print(confusion_matrix(testLabels.argmax(axis=1), treeResults.argmax(axis=1)))
print(classification_report(testLabels.argmax(axis=1), treeResults.argmax(axis=1)))
treeAcc = accuracy_score(testLabels.argmax(axis=1), treeResults.argmax(axis=1)) 
print("Accuracy Tree: {:.2f}".format(treeAcc))
print("Cohen's Kappa {:.2f}".format(cohen_kappa_score(testLabels.argmax(axis=1), treeResults.argmax(axis=1))))

Classes: 3
[[13  5 18]
 [ 6 24 21]
 [ 6 30 30]]
              precision    recall  f1-score   support

           0       0.52      0.36      0.43        36
           1       0.41      0.47      0.44        51
           2       0.43      0.45      0.44        66

    accuracy                           0.44       153
   macro avg       0.45      0.43      0.44       153
weighted avg       0.45      0.44      0.44       153

Accuracy Tree: 0.44
Cohen's Kappa 0.12
