In [31]:
import os

import h5py
import numpy as np
import pandas as pd
import scipy.misc
import xarray as xr
from pathlib import Path
from result_caching import store
from sklearn.model_selection import StratifiedShuffleSplit
from tqdm import tqdm
from scipy.io import loadmat
from brainio_collection.knownfile import KnownFile as kf

from brainio_base.assemblies import BehavioralAssembly
from brainio_base.stimuli import StimulusSet
from brainio_contrib.packaging import package_stimulus_set, package_data_assembly

def collect_stimuli(data_path):
    stimuli = []

    # search images
    for i in range(1, 68):
        target_path = os.path.join(data_path / 'stimuli', 's_' + str(i) + '.jpg')
        filename = 's_' + str(i) + '.jpg'
        im_kf = kf(target_path)
        image_id = 'klab_vs_waldo_stimuli_' + str(i)
        image_label = 'stimuli'
        sample_number = i
        
        print(target_path, filename, image_id)


        stimuli.append({
            'image_current_local_file_path': target_path,
            'image_path_within_store': filename,
            'image_label': image_label,
            'image_id': image_id,
            'sample_number': sample_number,
        })

    # target images
    for i in range(1, 68):
        target_path = os.path.join(data_path / 'target', 't_' + str(i) + '.jpg')
        filename = 't_' + str(i) + '.jpg'
        im_kf = kf(target_path)
        image_id = 'klab_vs_waldo_target_' + str(i)
        image_label = 'target'
        sample_number = i
        
        print(target_path, filename, image_id)

        stimuli.append({
            'image_current_local_file_path': target_path,
            'image_path_within_store': filename,
            'image_label': image_label,
            'image_id': image_id,
            'sample_number': sample_number,
        })

    # target mask
    for i in range(1, 68):
        target_path = os.path.join(data_path / 'gt', 'gt_' + str(i) + '.jpg')
        filename = 'gt_' + str(i) + '.jpg'
        im_kf = kf(target_path)
        image_id = 'klab_vs_waldo_gt_' + str(i)
        image_label = 'gt'
        sample_number = i
        
        print(target_path, filename, image_id)
        

        stimuli.append({
            'image_current_local_file_path': target_path,
            'image_path_within_store': filename,
            'image_label': image_label,
            'image_id': image_id,
            'sample_number': sample_number,
        })

    stimuli = StimulusSet(stimuli)

    stimuli.image_paths = {row.image_id: row.image_current_local_file_path for row in stimuli.itertuples()}
    stimuli['image_file_name']= stimuli['image_path_within_store']

    return stimuli

def collect_data(data_path, sub_id):
    image_id = ['stimuli_' + str(i) for i in range(1, 68)]
    subjects = []
    for i in sub_id:
        subjects += [i]*len(image_id)

    S_data = np.load(os.path.join(data_path / 'human_data', 'human_all.npy'))
    I_data = np.load(os.path.join(data_path / 'human_data', 'I_human_all.npy'))
    data = np.zeros((67*len(sub_id), 81, 2), dtype=int)
    data[:,:80,:] = S_data
    data[:,80,:] = I_data

    assembly = BehavioralAssembly(data,
                               coords={'image_id': ('presentation', image_id*len(sub_id)),
                                       'subjects': ('presentation', subjects),
                                       'fixation': [*range(81)],
                                       'position': ['x', 'y']},
                               dims=['presentation', 'fixation', 'position'])
    return assembly


data_dir = Path().parent / 'search_datasets'
data_path = data_dir / 'waldo'

# create stimuli
stimuli = collect_stimuli(data_path)
stimuli.name = 'klab.Zhang2018.search_waldo'

# create assembly for different subjects
assembly = collect_data(data_path, [*range(1, 16)])
assembly.name = 'klab.Zhang2018search_waldo'

search_datasets/waldo/stimuli/s_1.jpg s_1.jpg klab_vs_waldo_stimuli_1
search_datasets/waldo/stimuli/s_2.jpg s_2.jpg klab_vs_waldo_stimuli_2
search_datasets/waldo/stimuli/s_3.jpg s_3.jpg klab_vs_waldo_stimuli_3
search_datasets/waldo/stimuli/s_4.jpg s_4.jpg klab_vs_waldo_stimuli_4
search_datasets/waldo/stimuli/s_5.jpg s_5.jpg klab_vs_waldo_stimuli_5
search_datasets/waldo/stimuli/s_6.jpg s_6.jpg klab_vs_waldo_stimuli_6
search_datasets/waldo/stimuli/s_7.jpg s_7.jpg klab_vs_waldo_stimuli_7
search_datasets/waldo/stimuli/s_8.jpg s_8.jpg klab_vs_waldo_stimuli_8
search_datasets/waldo/stimuli/s_9.jpg s_9.jpg klab_vs_waldo_stimuli_9
search_datasets/waldo/stimuli/s_10.jpg s_10.jpg klab_vs_waldo_stimuli_10
search_datasets/waldo/stimuli/s_11.jpg s_11.jpg klab_vs_waldo_stimuli_11
search_datasets/waldo/stimuli/s_12.jpg s_12.jpg klab_vs_waldo_stimuli_12
search_datasets/waldo/stimuli/s_13.jpg s_13.jpg klab_vs_waldo_stimuli_13
search_datasets/waldo/stimuli/s_14.jpg s_14.jpg klab_vs_waldo_stimuli_14
searc

search_datasets/waldo/gt/gt_26.jpg gt_26.jpg klab_vs_waldo_gt_26
search_datasets/waldo/gt/gt_27.jpg gt_27.jpg klab_vs_waldo_gt_27
search_datasets/waldo/gt/gt_28.jpg gt_28.jpg klab_vs_waldo_gt_28
search_datasets/waldo/gt/gt_29.jpg gt_29.jpg klab_vs_waldo_gt_29
search_datasets/waldo/gt/gt_30.jpg gt_30.jpg klab_vs_waldo_gt_30
search_datasets/waldo/gt/gt_31.jpg gt_31.jpg klab_vs_waldo_gt_31
search_datasets/waldo/gt/gt_32.jpg gt_32.jpg klab_vs_waldo_gt_32
search_datasets/waldo/gt/gt_33.jpg gt_33.jpg klab_vs_waldo_gt_33
search_datasets/waldo/gt/gt_34.jpg gt_34.jpg klab_vs_waldo_gt_34
search_datasets/waldo/gt/gt_35.jpg gt_35.jpg klab_vs_waldo_gt_35
search_datasets/waldo/gt/gt_36.jpg gt_36.jpg klab_vs_waldo_gt_36
search_datasets/waldo/gt/gt_37.jpg gt_37.jpg klab_vs_waldo_gt_37
search_datasets/waldo/gt/gt_38.jpg gt_38.jpg klab_vs_waldo_gt_38
search_datasets/waldo/gt/gt_39.jpg gt_39.jpg klab_vs_waldo_gt_39
search_datasets/waldo/gt/gt_40.jpg gt_40.jpg klab_vs_waldo_gt_40
search_datasets/waldo/gt/

In [32]:
assembly

<xarray.BehavioralAssembly 'klab.Zhang2018search_waldo' (presentation: 1005, fixation: 81, position: 2)>
array([[[ 542,  653],
        [ 507,  534],
        ...,
        [   0,    0],
        [   3,    3]],

       [[ 520,  655],
        [ 490,  647],
        ...,
        [   0,    0],
        [   9,    9]],

       ...,

       [[ 520,  654],
        [ 380,  709],
        ...,
        [   0,    0],
        [  10,   10]],

       [[ 510,  649],
        [ 481,  500],
        ...,
        [ 142, 1157],
        [  80,   80]]])
Coordinates:
  * fixation      (fixation) int64 0 1 2 3 4 5 6 7 8 ... 73 74 75 76 77 78 79 80
  * position      (position) <U1 'x' 'y'
  * presentation  (presentation) MultiIndex
  - image_id      (presentation) object 'stimuli_1' 'stimuli_2' ... 'stimuli_30'
  - subjects      (presentation) int64 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1

In [33]:
np.max(assembly.values == np.nan)

False

In [34]:
stimuli.image_paths

{'klab_vs_waldo_stimuli_1': 'search_datasets/waldo/stimuli/s_1.jpg',
 'klab_vs_waldo_stimuli_2': 'search_datasets/waldo/stimuli/s_2.jpg',
 'klab_vs_waldo_stimuli_3': 'search_datasets/waldo/stimuli/s_3.jpg',
 'klab_vs_waldo_stimuli_4': 'search_datasets/waldo/stimuli/s_4.jpg',
 'klab_vs_waldo_stimuli_5': 'search_datasets/waldo/stimuli/s_5.jpg',
 'klab_vs_waldo_stimuli_6': 'search_datasets/waldo/stimuli/s_6.jpg',
 'klab_vs_waldo_stimuli_7': 'search_datasets/waldo/stimuli/s_7.jpg',
 'klab_vs_waldo_stimuli_8': 'search_datasets/waldo/stimuli/s_8.jpg',
 'klab_vs_waldo_stimuli_9': 'search_datasets/waldo/stimuli/s_9.jpg',
 'klab_vs_waldo_stimuli_10': 'search_datasets/waldo/stimuli/s_10.jpg',
 'klab_vs_waldo_stimuli_11': 'search_datasets/waldo/stimuli/s_11.jpg',
 'klab_vs_waldo_stimuli_12': 'search_datasets/waldo/stimuli/s_12.jpg',
 'klab_vs_waldo_stimuli_13': 'search_datasets/waldo/stimuli/s_13.jpg',
 'klab_vs_waldo_stimuli_14': 'search_datasets/waldo/stimuli/s_14.jpg',
 'klab_vs_waldo_stimuli_