# Vorverarbeitung der Daten

1. Importieren der benötigten Bibliotheken
2. Dateipfade einlesen (Abgabeverzeichnis 1_Daten/1_raw)
3. Einlesen der Eyetrackingdaten und Ausgabe von Kennzahlen
4. Eyetrackingdaten vorverarbeiten und abspeichern (Abgabeverzeichnis 1_Daten/2_processed)

In [42]:
# Schritt 1: Importieren der benötigten Bibliotheken

import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
import numpy as np
import math
from ipywidgets import interactive, IntSlider, FloatSlider
sys.path.append('../3_Lib/py')
from helper import plot, plot_with_image, plot_vectors, plot_vectors_with_image, plot_vectors_with_image_and_aois, plot_heatmap


In [66]:
# Schritt 2: Dateipfade einlesen (Abgabeverzeichnis 1_Daten/1_raw)

# siehe https://stackoverflow.com/questions/19587118/iterating-through-directories-with-python

root = '../1_Daten/1_raw'
file_names = []

for subdir, dirs, files in os.walk(root):
    for file in files:
        if 'annotation' not in file or '.csv' not in file:
            continue
        file_names.append(os.path.join(subdir, file).replace('\\', '/'))

In [67]:
# Schritt 3: Einlesen der Eyetrackingdaten und Ausgabe von Kennzahlen

info_titles = [
    'Frames',
    'Dauer (s)',
    'FPS',
    'Gesicht',
    'Gesicht Anteil',
    'Blick',
    'Blick Anteil',
    'Anteil Success',
    'Konfidenz > 90%',
    'Dateipfad'
]

info_data = []
for f in file_names:
    df = pd.read_csv(f, sep=',', engine='python')
    count_total = len(df['frame'])
    last_ts = df['server_timestamp'][count_total - 1]
    last_frame_number = df['frame'][count_total - 1]
    count_face = len(df[(df[['eye_lmk_X_0','eye_lmk_Y_0','eye_lmk_Z_0']] != 0.0).all(axis=1)]['frame'])
    count_gaze = len(df[(df[['gaze_direction_0_x','gaze_direction_0_y','gaze_direction_0_z']] != 0.0).all(axis=1)]['frame'])
    count_conf = len(df[(df[['landmark_detection_confidence']] >= 0.90).all(axis=1)]['frame'])
    count_success = len(df[(df[['landmark_detection_success']] == 1).all(axis=1)]['frame'])
    info_data.append([
        last_frame_number,
        last_ts,
        last_frame_number / float(last_ts),
        count_face,
        '%.2f%s' % (float(count_face) / last_frame_number * 100, '%'),
        count_gaze,
        '%.2f%s' % (float(count_gaze) / last_frame_number * 100, '%'),
        '%.2f%s' % (float(count_success) / last_frame_number * 100, '%'),
        '%.2f%s' % (float(count_conf) / last_frame_number * 100, '%'),
        f.replace('../1_data/1_raw/','')
    ])
    
pd.DataFrame(info_data, columns=info_titles)

Unnamed: 0,Frames,Dauer (s),FPS,Gesicht,Gesicht Anteil,Blick,Blick Anteil,Anteil Success,Konfidenz > 90%,Dateipfad
0,4319,1560339000.0,2.767988e-06,12592,291.55%,302,6.99%,6.99%,5.35%,../1_Daten/1_raw/10__annotation_2019_06_12_14:...
1,4268,1560419000.0,2.735162e-06,17297,405.27%,231,5.41%,5.41%,3.42%,../1_Daten/1_raw/30__annotation_2019_06_13_12:...
2,5321,1560417000.0,3.409985e-06,15278,287.13%,311,5.84%,5.84%,3.21%,../1_Daten/1_raw/29__annotation_2019_06_13_11:...
3,4771,1560415000.0,3.05752e-06,17324,363.11%,308,6.46%,6.46%,4.30%,../1_Daten/1_raw/26__annotation_2019_06_13_11:...
4,1837,1560344000.0,1.177305e-06,17325,943.11%,891,48.50%,48.50%,2.67%,../1_Daten/1_raw/12__annotation_2019_06_12_15:...
5,4276,1560354000.0,2.740405e-06,12190,285.08%,350,8.19%,8.19%,6.10%,../1_Daten/1_raw/20__annotation_2019_06_12_18:...
6,5282,1560520000.0,3.38477e-06,17398,329.38%,339,6.42%,6.42%,4.05%,../1_Daten/1_raw/45__annotation_2019_06_14_16:...
7,4537,1560505000.0,2.907392e-06,15831,348.93%,218,4.80%,4.80%,2.62%,../1_Daten/1_raw/43__annotation_2019_06_14_12:...
8,2348,1560413000.0,1.50473e-06,15961,679.77%,336,14.31%,14.31%,9.03%,../1_Daten/1_raw/25__annotation_2019_06_13_10:...
9,4439,1560429000.0,2.84473e-06,16173,364.34%,753,16.96%,16.96%,13.40%,../1_Daten/1_raw/36__annotation_2019_06_13_15:...


In [68]:
# Schritt 4: Eyetrackingdaten vorverarbeiten und abspeichern (Abgabeverzeichnis 1_Daten/2_processed)

def filter_df(df):
    df = df[(df[['eye_lmk_X_0','eye_lmk_Y_0','eye_lmk_Z_0']] != 0).all(axis=1)]
    df = df[(df[['gaze_direction_0_x','gaze_direction_0_y','gaze_direction_0_z']] != 0).all(axis=1)]
    df = df[(df[['landmark_detection_confidence']] >= 0.90).all(axis=1)]
    df = df[(df[['landmark_detection_success']] == 1).all(axis=1)]
        
    return df
    
def get_eye_center(df, landmarks):
    center_x = np.zeros(len(df), dtype=float)
    center_y = np.zeros(len(df), dtype=float)
    center_z = np.zeros(len(df), dtype=float)
    for mark in landmarks:
        center_x += np.array(df['eye_lmk_X_%d' % mark])
        center_y += np.array(df['eye_lmk_Y_%d' % mark])
        center_z += np.array(df['eye_lmk_Z_%d' % mark])
    return center_x / len(landmarks), center_y / len(landmarks), center_z / len(landmarks)
    
def process(f):
    df = filter_df(pd.read_csv(f, sep=',', engine='python'))
    eye_l_x, eye_l_y, eye_l_z = get_eye_center(df, [i for i in range(48, 56)])
    eye_r_x, eye_r_y, eye_r_z = get_eye_center(df, [i for i in range(20, 28)])
            
    df = pd.DataFrame({
        'frame': df['frame'],
        'face_id': df['face_id'],
        'timestamp': df['server_timestamp'],
        'confidence': df['landmark_detection_confidence'],
        'success': df['landmark_detection_success'],
        'position': df['annotation_pos'],
        'aoi': df['annotation_aoi'],
        'tester': df['annotation_test_person_id'][:-1],
        'client_id': df['client_id'],
        'eye_l_X': eye_l_x,
        'eye_l_Y': eye_l_y,
        'eye_l_Z': eye_l_z,
        'eye_r_X': eye_r_x,
        'eye_r_Y': eye_r_y,
        'eye_r_Z': eye_r_z,
        'gaze_l_X': df['gaze_direction_0_x'],
        'gaze_l_Y': df['gaze_direction_0_y'],
        'gaze_l_Z': df['gaze_direction_0_z']       
    })
    if not os.path.exists(os.path.dirname(f.replace('1_raw/', '2_processed/'))):
        os.makedirs(os.path.dirname(f.replace('1_raw/', '2_processed/')))
    
    df.to_csv(f.replace('1_raw/', '2_processed/').replace(':', '_'), index=False)

for f in file_names:
    if not '1_tuesday' in f:
        process(f)