# DataFrame Logistics

In [1]:
import numpy as np
from tesser import util

In [2]:
data_dir = "/mnt/c/Users/rodv2/Dropbox/tesser_successor/Data"

In [3]:
help(util)

Help on module tesser.util in tesser:

NAME
    tesser.util

DESCRIPTION
    This util function is used to read in the behavioral data associated
    with TesserScan.
    It gets the associated data directories for a given participant:
        get_subj_dir(data_dir, subject_num)
    
    It reads in the files associated with the following tasks
    - structured learning task:
        load_struct_all(data_dir, subject_num)
        load_struct_run(data_dir, subject_num, part_num, run_num)
        drop_struct_nan(struct_data)
        get_struct_objects(struct_data)
    
    - inductive inference task:
        load_induct_df_all(data_dir, subject_num)
        load_induct_array_all(induct_dframe)
    
    - grouping task:
        load_group(data_dir, subject_num)

FUNCTIONS
    drop_struct_df_nan(struct_dframe)
        Remove null trials (NaNs) at the beginning of structure task scans.
    
    get_struct_objects(struct_dframe)
        Series of just objects of from structured learning task

In [6]:
# number of participants
util.subj_list()

[100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 135,
 136,
 137,
 138]

In [7]:
structured_data = util.load_struct(data_dir)

In [12]:
structured_data

Unnamed: 0,SubjNum,run,trial,objnum,file,orientnam,orientnum,resp,respnum,acc,rt,part,seqtype
0,100,1,1,10,object_2.jpg,cor,1.0,c,1.0,1.0,1.426618,1,
1,100,1,2,5,object_30.jpg,cor,1.0,c,1.0,1.0,0.759233,1,
2,100,1,3,4,object_12.jpg,cor,1.0,c,1.0,1.0,0.866177,1,
3,100,1,4,3,object_7.jpg,cor,1.0,c,1.0,1.0,0.712731,1,
4,100,1,5,1,object_6.jpg,cor,1.0,c,1.0,1.0,0.654236,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
90904,138,6,145,21,object_28.jpg,cor,1.0,c,1.0,1.0,0.831956,2,1.0
90905,138,6,146,18,object_12.jpg,rot,0.0,n,2.0,1.0,0.839406,2,1.0
90906,138,6,147,17,object_18.jpg,cor,1.0,c,1.0,1.0,0.909276,2,1.0
90907,138,6,148,12,object_34.jpg,cor,1.0,c,1.0,1.0,0.907248,2,1.0


In [33]:
def is_missing(array):
    res = list(set(range(max(array) + 1)) - set(array))
    if res == []:
        return "is not missing objects"
    return f"is missing objcets {res}"

def count_unique_objects(df,parts=None,search_by='objnum'):
    for subj in util.subj_list():
        subject = f"SubjNum == {subj}"
        new_df=df.query(subject)
        if parts != None:
            for p in parts:
                part = f"part == {p}"
                new_df = new_df.query(part)
                objs =new_df[search_by].unique() -1
                print(objs)
                print(f"Subject {subj} "+is_missing(objs))
        objs =new_df[search_by].unique() -1
        print(f"Subject {subj} "+is_missing(objs))

In [35]:
count_unique_objects(structured_data)

Subject 100 is not missing objects
Subject 101 is not missing objects
Subject 102 is not missing objects
Subject 103 is not missing objects
Subject 104 is not missing objects
Subject 105 is not missing objects
Subject 106 is not missing objects
Subject 107 is not missing objects
Subject 108 is not missing objects
Subject 109 is not missing objects
Subject 110 is not missing objects
Subject 111 is not missing objects
Subject 112 is not missing objects
Subject 113 is not missing objects
Subject 114 is not missing objects
Subject 115 is not missing objects
Subject 116 is not missing objects
Subject 117 is not missing objects
Subject 119 is not missing objects
Subject 120 is not missing objects
Subject 121 is not missing objects
Subject 122 is not missing objects
Subject 123 is not missing objects
Subject 124 is not missing objects
Subject 125 is not missing objects
Subject 126 is not missing objects
Subject 127 is not missing objects
Subject 128 is not missing objects
Subject 129 is not m

In [38]:
induction_data = util.load_induct(data_dir)
induction_data

Unnamed: 0,SubjNum,TrialNum,QuestType,Environment,CueNum,CueObject,Opt1Num,Option1,Opt2Num,Option2,Resp,Acc,RT,cue,opt1,opt2,response
0,100,1,Prim,ocean,2,object_23.jpg,16,object_28.jpg,20,object_16.jpg,,0,,1,15,19,
1,100,2,Bound2,desert,4,object_12.jpg,10,object_2.jpg,19,object_4.jpg,2.0,0,2.974157,3,9,18,1.0
2,100,3,Prim,desert,5,object_30.jpg,6,object_18.jpg,2,object_23.jpg,1.0,1,3.190848,4,5,1,0.0
3,100,4,Prim,ocean,2,object_23.jpg,9,object_35.jpg,19,object_4.jpg,2.0,1,3.373906,1,8,18,1.0
4,100,5,Prim,ocean,1,object_6.jpg,19,object_4.jpg,13,object_1.jpg,2.0,0,3.094699,0,18,12,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1549,138,38,Prim,desert,8,object_3.jpg,9,object_16.jpg,21,object_28.jpg,2.0,0,2.025821,7,8,20,1.0
1550,138,39,Prim,forest,14,object_1.jpg,19,object_35.jpg,15,object_6.jpg,2.0,1,1.997691,13,18,14,1.0
1551,138,40,Prim,ocean,21,object_28.jpg,14,object_1.jpg,2,object_36.jpg,2.0,1,1.114204,20,13,1,1.0
1552,138,41,Bound1,forest,11,object_13.jpg,10,object_9.jpg,16,object_30.jpg,1.0,0,0.623270,10,9,15,0.0


In [39]:
count_unique_objects(induction_data, search_by='CueNum')

Subject 100 is not missing objects
Subject 101 is not missing objects
Subject 102 is not missing objects
Subject 103 is not missing objects
Subject 104 is not missing objects
Subject 105 is not missing objects
Subject 106 is not missing objects
Subject 107 is not missing objects
Subject 108 is not missing objects
Subject 109 is not missing objects
Subject 110 is not missing objects
Subject 111 is not missing objects
Subject 112 is not missing objects
Subject 113 is not missing objects
Subject 114 is not missing objects
Subject 115 is not missing objects
Subject 116 is not missing objects
Subject 117 is not missing objects
Subject 119 is not missing objects
Subject 120 is not missing objects
Subject 121 is not missing objects
Subject 122 is not missing objects
Subject 123 is not missing objects
Subject 124 is not missing objects
Subject 125 is not missing objects
Subject 126 is not missing objects
Subject 127 is not missing objects
Subject 128 is not missing objects
Subject 129 is not m

In [40]:
count_unique_objects(induction_data, search_by='Opt1Num')

Subject 100 is missing objcets [17, 3, 13]
Subject 101 is missing objcets [0, 2, 4, 16, 17]
Subject 102 is missing objcets [16, 9]
Subject 103 is missing objcets [16, 10]
Subject 104 is missing objcets [9, 10]
Subject 105 is missing objcets [17]
Subject 106 is missing objcets [9]
Subject 107 is missing objcets [17, 10, 14]
Subject 108 is missing objcets [17, 3]
Subject 109 is missing objcets [16, 19]
Subject 110 is missing objcets [16, 1, 9]
Subject 111 is missing objcets [17, 11]
Subject 112 is missing objcets [2, 3]
Subject 113 is missing objcets [16]
Subject 114 is missing objcets [0, 9, 2, 13]
Subject 115 is missing objcets [0, 3]
Subject 116 is missing objcets [2]
Subject 117 is missing objcets [9]
Subject 119 is missing objcets [16, 3]
Subject 120 is missing objcets [9, 10]
Subject 121 is missing objcets [17, 3]
Subject 122 is missing objcets [2, 14]
Subject 123 is missing objcets [16]
Subject 124 is missing objcets [17, 11]
Subject 125 is missing objcets [16, 14]
Subject 126 is 

In [42]:
count_unique_objects(induction_data, search_by='Opt2Num')

Subject 100 is missing objcets [16, 2, 15]
Subject 101 is missing objcets [9, 10, 3]
Subject 102 is missing objcets [2, 3]
Subject 103 is missing objcets [0, 1, 3, 17, 19]
Subject 104 is missing objcets [16, 2, 12]
Subject 105 is missing objcets [2]
Subject 106 is missing objcets [16, 19]
Subject 107 is missing objcets [16, 2, 7]
Subject 108 is missing objcets [16, 9]
Subject 109 is missing objcets [11, 3, 15]
Subject 110 is missing objcets [17, 2]
Subject 111 is missing objcets [8, 16, 4]
Subject 112 is missing objcets [9, 10]
Subject 113 is missing objcets [9]
Subject 114 is missing objcets [10, 3, 15]
Subject 115 is missing objcets [9]
Subject 116 is missing objcets [10, 18]
Subject 117 is missing objcets [16, 4]
Subject 119 is missing objcets [17, 10]
Subject 120 is missing objcets [16, 3]
Subject 121 is missing objcets [9, 2]
Subject 122 is missing objcets [16]
Subject 123 is missing objcets [17]
Subject 124 is missing objcets [2]
Subject 125 is missing objcets [0, 17, 18]
Subject