# Autocorrelation of Eye Tracking Data
This notebook calculates the autocorrelation of raw eye tracking data. 

Objectives:
* Autocorrelation plot showing time points with especially high correlation within the time series of data

## Configuration

In [None]:
# General configuration
import os

# data_directory: str
#     Path to a directory to store data.
data_directory = '.'

# install_missing_packages: bool
#     A flag indicating if missing packages should be automatically installed
install_missing_packages = True

# use_conda: bool
#     A flag indicating if conda should be used for software installation.
#     If False, pip will be used. The default is to use conda if jupyter
#     is run in a conda environment.
use_conda = 'CONDA_EXE' in os.environ

### Check for missing packages

In [None]:
import importlib

def check_package(package, pip_pkg: str = None, conda_pkg: str = None):
    """Check if a given package is installed. If missing install
    it (if global flag `install_missing_packages` is True) either with
    pip or with conda (depending on `use_conda`).
    """
    if importlib.util.find_spec(package) is not None:
        return  # ok, package is already installed

    if not install_missing_packages:
        raise RuntimeError(f"{package} is not installed!")

    if use_conda:
        import conda.cli
        conda.cli.main('conda', 'install',  '-y', conda_pkg or package)
    else:
        import subprocess
        import sys            
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pip_pkg or package])
        
# This is to exit cells without error tracebacks (cosmetic purpose)
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

## Requirements

To run all of this notebook, you need the following libraries to be installed:
* ImageIO (`imageio` and `imageio-ffmpeg`): for reading images and accessing the webcam
* Scikit-image (`scikit-image`) for some image manipulation
* MatPlotLib (`matplotlib`): mainly for displaying images in the notebook
* Dlib (`dlib`) providing the HOG face detector
* OpenCV (`opencv`) for real time applications
* Imutils (`imutils`) for image manipulation with OpenCV

Running the following cell will create a file `graphs.yml` that can be used to setup a conda environment containing the required packages. If you already downloaded the file from my GitHub, skip the next cell and create the env directly from it.


In [None]:
%%writefile graphs.yml
name: graphs
channels:
  - conda-forge
  - defaults
dependencies:
  - python=3.6
  - jupyter
  - imageio
  - imageio-ffmpeg
  - matplotlib
  - scikit-image
  - opencv
  - networkx
  - pandas
  - statsmodels

### Environment Creation

To create the environment, open the. terminal, go to the directory where you stored the graphs.yml file (the directory of the notebook) and type
```sh
conda env create -f graphs.yml
```
After running this command you have to activate the environment (Linux/MacOS: `conda activate graphs`, Windows: `activate graphs`) and then reopen the notebook in that environment.

# Main part
### Imports and directory definition
The data directory is adjusted to the folder arrangedment of the github repo. Adjust if necessary.

In [None]:
import numpy as np
import cv2 
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nX
import glob
from pandas.plotting import autocorrelation_plot as AC_plot 
from statsmodels.graphics import tsaplots
from statsmodels.tsa.stattools import acf
from skimage.filters import gaussian



OG_DATA_PATH = './'
DATA_PATH = './EyeBoxPos/'
PROCESSED_DATA_PATH = './Results/Autocorrelation/'

## Autocorrelation Part

### Loading
* Loading in the viewing coordinates (EyePosBox) consisting of X,Y, and Z coordinates.

### Calculation
* Using the statsmodels acf autocorrelation function to calculate the autocorrelation for every time shift of the coordinate series (for each coordinate separately).
* I save these into a table.

### Plotting
* Plotting the autocorrelation plot of (1) each coordinate separately and (2) the mean of coordinate correlations on top of each other.

### Saving
* Saving the plot and autocorrelation table. 

In [None]:
df = pd.concat(map(pd.read_csv,glob.glob(DATA_PATH + '/*.txt')))

In [None]:
# Loading the first data to test the script
data = pd.read_csv(DATA_PATH + 'EyeBoxPos_VP1996.txt', names=['X','Y','Z']) 

# Removing the unwanted brackets from the data
data['X'] =  data['X'].apply(lambda x: x.replace('(','').replace(')','')) 
data['X'] = data['X'].astype('float')
data['Z'] =  data['Z'].apply(lambda x: x.replace('(','').replace(')','')) 
data['Z'] = data['Z'].astype('float')

data['time'] = data.index*0.03/60.0

data[:]


labels = ['X','Y','Z','mean']

#plot autocorrelation function
autoc_X = acf(data['X'],nlags=len(data['X'])-1)
autoc_Y = acf(data['Y'],nlags=len(data['Y'])-1)
autoc_Z = acf(data['Z'],nlags=len(data['Z'])-1)

start = 1
upperX = np.max(data['time'])
lowerX = np.min(data['time'])
upperY = np.max([autoc_X[start::],autoc_Y[start::],autoc_Z[start::]])\
                + 0.1*np.max([autoc_X[start::],autoc_Y[start::],autoc_Z[start::]])
lowerY = np.min([autoc_X[start::],autoc_Y[start::],autoc_Z[start::]])\
                - 0.1*np.max([autoc_X[start::],autoc_Y[start::],autoc_Z[start::]])


# Plotting 
if(autoc_X.size == autoc_Y.size == autoc_Z.size):
    plt.figure(figsize=(15,15))
    plt.plot(data['time'],autoc_X,ls=':',c='b',Linewidth=0.5)
    plt.plot(data['time'],autoc_Y,ls=':',c='g',Linewidth=0.5)
    plt.plot(data['time'],autoc_Z,ls=':',c='r',Linewidth=0.5)
    plt.plot(data['time'],(autoc_X+autoc_Y+autoc_Z)/3,'k',Linewidth=2)
    plt.xlim([lowerX,upperX])
    plt.ylim([lowerY,upperY])
    plt.xlabel('Time (min)',fontsize=20)
    plt.ylabel('Correlation',fontsize=20)
    plt.legend(labels,fontsize=20,markerscale=10)
else:
    disp('The sizes of autocorrelation array do not match')
    
