# Description

Takes results of one case-study-based evaluation through Evaluator.exe and Preciser.exe and computes precision and recall.

### Imports

In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Settings

In [None]:
site = 'guardian' # 'webmd', 'walmart', 'guardian', 'cnn'
participants = ['P1', 'P2', 'P3', 'P4']
evaluation = 'gt-' + site
dataset_evaluation_dir = 'C:/GazeMiningDataset/Dataset_evaluation/case-study'
dataset_stimuli_dir = r'C:/GazeMiningDataset/Dataset_stimuli'

# Build up pathes
labels_screencasts_filepath = dataset_evaluation_dir + '/' + evaluation + '-screencasts.csv'
labels_stimuli_filepath = dataset_evaluation_dir + '/' + evaluation + '-stimuli.csv'
contrib_filepath = dataset_evaluation_dir + '/' + evaluation + '-contrib.csv'
events_filepath = dataset_evaluation_dir + '/' + evaluation + '-events.csv'
stimuli_dir = dataset_stimuli_dir + '/' + site + '/stimuli'

# Load dataset

In [None]:
# Load evaluator labels of stimuli
stimuli_df = pd.read_csv(labels_stimuli_filepath)

# Load evaluator labels of screencasts
screencasts_df = pd.read_csv(labels_screencasts_filepath)

# Load contribution (this tells one which frames as contained in the stimuli contributed to the element in the task)
contrib_df = pd.read_csv(contrib_filepath)

# Load events
events_df = pd.read_csv(events_filepath, header=None, names=['timestamp', 'type', 'event'])

# Process events for general information

In [None]:
mode_changes_df = events_df[(events_df.type == 'mode_change')]

# Get timestamps
start_ts = int(mode_changes_df[(mode_changes_df.event == 'mode_start')].timestamp)
videos_ts = int(mode_changes_df[(mode_changes_df.event == 'mode_videos')].timestamp)
stimuli_ts = int(mode_changes_df[(mode_changes_df.event == 'mode_stimuli')].timestamp)
end_ts = int(mode_changes_df[(mode_changes_df.event == 'mode_end')].timestamp)

# Compute durations of modes
video_duration = 0
stimuli_duration = 0
if videos_ts > stimuli_ts:
    video_duration = end_ts - videos_ts
    stimuli_duration = videos_ts - stimuli_ts
else:
    video_duration = stimuli_ts - videos_ts
    stimuli_duration = end_ts - stimuli_ts
    
# Print results
print('Video Mode [s]: ' + str(video_duration/1000) + ', Stimuli Mode [s]: ' + str(stimuli_duration/1000))

# Get infos about visual stimuli

In [None]:
stimuli_without_AOI = len(stimuli_df.index[stimuli_df.label == 0])
stimuli_with_AOI = len(stimuli_df.index[stimuli_df.label == 1])

print("Stimuli count: " + str(stimuli_without_AOI + stimuli_with_AOI))
print("Stimuli count display AOI: " + str(stimuli_with_AOI))
print("Stimuli count do not display AOI: " + str(stimuli_without_AOI))                                           

## Collect frames represented by marked stimuli

In [None]:
# Make groups by layer id from labeled stimuli
layers = stimuli_df.groupby(['layer_id'])

# Set of frames that are represented by the stimuli
marked_stimuli_frames = defaultdict(set) # participant_id -> set of frames

# Go over layer groups
for layer_id, data in layers:
    
    # Get ids of stimuli that are marked
    marked_stimuli_ids = data[data.label == 1]['stimulus_id']
    
    # Go over marked stimuli and collect all represented frames across shots per screencast
    for stimulus_id in marked_stimuli_ids:
        df = pd.read_csv(stimuli_dir + '/' + layer_id + '/' + str(stimulus_id) +'-shots.csv') # read in information about stimulus (which frames are contained...)
        for index, row in df.iterrows(): # go over contained shots and collect the frames
            frames = list(range(row['frame_idx_start'], row['frame_idx_end']+1, 1))
            participant_id = row['session_id'][:2].upper()
            marked_stimuli_frames[participant_id].update(frames) # put frames into the set, one set per screencast
            
# print(marked_stimuli_frames)

## Retrieve contribution

In [None]:
# There are some duplicated frames in the contrib files, fix that
# Note: This happens, when one frame is separated for stimuli discovery
# into more than one layer and the element is found on both layers in the evaluation
groups = contrib_df.groupby(['session'])
new_df = pd.DataFrame()
for key in groups.groups.keys():
    df = groups.get_group(key)
    df = df.drop_duplicates(subset='frame_idx')
    new_df = new_df.append(df, ignore_index=True)
contrib_df = new_df

print('Contrib Frame Count: ' + str(contrib_df.shape[0]))

In [None]:
pos_contrib_count = contrib_df[contrib_df.label == 'POS_CONTRIB'].shape[0]
neg_contrib_count = contrib_df[contrib_df.label == 'NEG_CONTRIB'].shape[0]
neutral_count = contrib_df[contrib_df.label == 'NEUTRAL'].shape[0]

print('POS_CONTRIB: ' + str(pos_contrib_count))
print('NEG_CONTRIB: ' + str(neg_contrib_count))
print('NEUTRAL: ' + str(neutral_count))

## Collect frames marked in screencasts

In [None]:
# Marked frames of screencasts
marked_screencast_frames = {}
marked_screencast_frames['P1'] = screencasts_df.index[screencasts_df.P1 == 1]
marked_screencast_frames['P2'] = screencasts_df.index[screencasts_df.P2 == 1]
marked_screencast_frames['P3'] = screencasts_df.index[screencasts_df.P3 == 1]
marked_screencast_frames['P4'] = screencasts_df.index[screencasts_df.P4 == 1]

print(marked_screencast_frames)

# Count frames
p1_no = len(screencasts_df.index[screencasts_df.P1 == 0])
p1_yes = len(screencasts_df.index[screencasts_df.P1 == 1])
p2_no = len(screencasts_df.index[screencasts_df.P2 == 0])
p2_yes = len(screencasts_df.index[screencasts_df.P2 == 1])
p3_no = len(screencasts_df.index[screencasts_df.P3 == 0])
p3_yes = len(screencasts_df.index[screencasts_df.P3 == 1])
p4_no = len(screencasts_df.index[screencasts_df.P4 == 0])
p4_yes = len(screencasts_df.index[screencasts_df.P4 == 1])

print('Annotated Frames: ' + str(p1_no + p1_yes + p2_no + p2_yes + p3_no + p3_yes + p4_no + p4_yes))
print('Display AOI: ' + str(p1_yes + p2_yes + p3_yes + p4_yes))
print('Do not display AOI: ' + str(p1_no + p2_no + p3_no + p4_no))

# Calculate precision and recall

In [None]:
# Compare both dictionaries
stimuli_frames_count = 0;
recall_count = 0
screencast_frames_count = 0
for participant in participants:
    
    # Get frames marked (indirectly) in stimuli and screencasts
    stimuli_frames = set(marked_stimuli_frames[participant])
    screencast_frames = set(marked_screencast_frames[participant])
    stimuli_frames_count += len(stimuli_frames)
    screencast_frames_count += len(screencast_frames)
    
    # Compute recall
    recall_count += len(screencast_frames.intersection(stimuli_frames)) # frames that are contained in both sets

print('Stimuli Frames Count: ' + str(stimuli_frames_count))
print('Screencast Frames Count: ' + str(screencast_frames_count))
print('Precision: ' + str(pos_contrib_count / (pos_contrib_count + neg_contrib_count)))
print('Recall: ' + str(recall_count / screencast_frames_count))