In [1]:
from nilearn import image
import tensorflow as tf
from keras.applications.nasnet import preprocess_input
from keras.applications.nasnet import NASNetLarge
from keras.applications.nasnet import decode_predictions
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import numpy as np
import pandas as pd
import nibabel as nib
import json, pickle
import os
import matplotlib.pyplot as plt

In [2]:
data_dir = '/local-scratch/localhome/mkhademi/BOLD5000_2.0/'
with open(data_dir + 'image_data/MSCOCO/annotations/' + 'instances_train2014.json') as json_data:
    coco_anns = json.load(json_data)
    json_data.close()

In [3]:
imagenet_anns = pd.read_csv(data_dir + 'image_data/LOC_train_solution.csv', sep = ',')
f = open(data_dir + 'image_data/LOC_synset_mapping.txt', 'r')
imagenet_categories = []
for x in f:
    imagenet_categories.append(x.split()[0])
f.close()

In [4]:
with open(data_dir + 'image_data/extra_annotations.pickle', 'rb') as f:
    extra_annotations = pickle.load(f)

In [5]:
cat_convert = {0: [], 1: [444], 2: [705, 751, 817, 829], 3: [], 4: [403], 5: [779, 654], 6: [], 7: [555, 569, 717, 864], 
               8: [449, 554, 625, 833, 814], 9: [920], 10: [], 11: [919], 12: [], 13: [704], 14: [703], 
               15: [134] + [i for i in range(7, 25)], 16: [i for i in range(281, 286)], 17: [i for i in range(151, 269)], 
               18: [], 19: [349], 20: [], 21: [385, 386], 22: [388], 23: [340], 24: [], 25: [515], 26: [414], 27: [879], 
               28: [770], 29: [], 30: [], 31: [457], 32: [], 33: [], 34: [795], 35: [], 36: [805, 852, 522, 574, 722, 768],
               37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [898, 907, 440], 44: [923], 45: [], 46: [968], 47: [],
               48: [623], 49: [910], 50: [659, 809], 51: [954], 52: [], 53: [], 54: [950], 55: [937], 56: [], 57: [934],
               58: [963], 59: [], 60: [], 61: [423, 559, 765], 62: [831], 63: [], 64: [], 65: [], 66: [532], 67: [904],
               68: [526], 69: [861], 70: [799], 71: [851], 72: [620], 73: [673], 74: [761], 75: [878], 76: [487], 77: [651],
               78: [], 79: [859], 80: [], 81: [760], 82: [], 83: [917], 84: [409, 530, 892], 85: [883], 86: [], 87: [850],
               88: [589], 89: [], 90: []}
cat_conv_rev = {}
for key, val in cat_convert.items():
    for x in val:
        cat_conv_rev[x] = key
super_cat = {0: [0], 1: [i for i in range(1, 9)], 2: [i for i in range(9, 15)], 3: [i for i in range(15, 25)],
             4: [i for i in range(25, 33)], 5: [i for i in range(33, 43)], 6: [i for i in range(43, 51)],
             7: [i for i in range(51, 61)], 8: [i for i in range(61, 71)], 9: [i for i in range(71, 77)],
             10: [i for i in range(77, 83)], 11: [i for i in range(83, 91)]}
super_cat_rev = {}
for key, val in super_cat.items():
    for x in val:
        super_cat_rev[x] = key

In [6]:
subjs = ['CSI1', 'CSI2', 'CSI3', 'CSI4']
seses = ['ses-01', 'ses-02', 'ses-03', 'ses-04', 'ses-05', 'ses-06', 'ses-07', 'ses-08',
         'ses-09', 'ses-10', 'ses-11', 'ses-12', 'ses-13', 'ses-14', 'ses-15']  
sub = subjs[0]
imgnames = []
f = open(data_dir + sub + '_imgnames.txt', 'r')
for imgname in f:
    imgnames.append(imgname[:-1])
f.close()

In [7]:
img_dict = {}
img_dict_super = {}
coco_category_count = {}
coco_super_count = {}
coco_category_count_trials = {}
coco_super_count_trials = {}
imagenet_category_count = {}
imagenet_super_count = {}
imagenet_category_count_trials = {}
imagenet_super_count_trials = {}
f_coco = open(data_dir + 'image_data/coco_cat.txt', 'w')
f_imagenet = open(data_dir + 'image_data/imagenet_cat.txt', 'w')
coco_total = 0
imagenet_total = 0
coco_total_trials = 0
imagenet_total_trials = 0

for imgname in imgnames:
    if imgname[0]  == 'C': 
        # os.system('cp ' + data_dir + 'image_data/MSCOCO/images/train2014/' + imgname + ' ' + data_dir + 'image_data/drop_box_coco/')
        img_id = int(imgname[15:27]) 
        
        tmp_set = set()
        super_tmp_set = set() 
        coco_total_trials += 1 
        if extra_annotations[imgname] == 'face':
            if 's_0' in coco_super_count_trials:
                coco_super_count_trials['s_0'] += 1
            else:
                coco_super_count_trials['s_0'] = 1
        for i in range(len(coco_anns['annotations'])):
            if coco_anns['annotations'][i]['image_id'] == img_id:
                category_id = coco_anns['annotations'][i]['category_id'] - 1
                super_id = super_cat_rev[category_id]
                if not 'c_' + str(category_id) in tmp_set:
                    if 'c_' + str(category_id) in coco_category_count_trials:
                        coco_category_count_trials['c_' + str(category_id)] += 1
                    else:
                        coco_category_count_trials['c_' + str(category_id)] = 1
                    tmp_set.add('c_' + str(category_id))
                if super_id > 0:    
                    if not 's_' + str(super_id) in super_tmp_set:
                        if 's_' + str(super_id) in coco_super_count_trials:
                            coco_super_count_trials['s_' + str(super_id)] += 1
                        else:
                            coco_super_count_trials['s_' + str(super_id)] = 1
                        super_tmp_set.add('s_' + str(super_id)) 
                    
        tmp_set = set()
        super_tmp_set = set()
        if not imgname in img_dict:
            f_coco.write(imgname +': ')  
            img_dict[imgname] = np.zeros(90, dtype=np.int32)
            img_dict_super[imgname] = np.zeros(12, dtype=np.int32)
            coco_total += 1 
            if extra_annotations[imgname] == 'face':
                img_dict_super[imgname][0] = 1
                if 's_0' in coco_super_count:
                    coco_super_count['s_0'] += 1
                else:
                    coco_super_count['s_0'] = 1 
            for i in range(len(coco_anns['annotations'])):
                if coco_anns['annotations'][i]['image_id'] == img_id:
                    category_id = coco_anns['annotations'][i]['category_id'] - 1
                    img_dict[imgname][category_id] = 1
                    super_id = super_cat_rev[category_id]
                    if not 'c_' + str(category_id) in tmp_set:
                        if 'c_' + str(category_id) in coco_category_count:
                            coco_category_count['c_' + str(category_id)] += 1
                        else:
                            coco_category_count['c_' + str(category_id)] = 1
                        tmp_set.add('c_' + str(category_id))
                    if super_id > 0:   
                        if not 's_' + str(super_id) in super_tmp_set:
                            img_dict_super[imgname][super_id] = 1 
                            if 's_' + str(super_id) in coco_super_count:
                                coco_super_count['s_' + str(super_id)] += 1
                            else:
                                coco_super_count['s_' + str(super_id)] = 1
                            super_tmp_set.add('s_' + str(super_id))    
                    #if category_id == 0:
                    # # # os.system('cp ' + data_dir + 'image_data/MSCOCO/images/train2014/' + imgname + ' ' + data_dir + 'image_data/person_coco/')
                    f_coco.write('c_' + str(category_id) + ' ')
            f_coco.write('\n')
    if imgname[0]  == 'n' and (imgname[1] == '0' or imgname[1] == '1'):
        # os.system('cp ' + data_dir + 'image_data/ILSVRC/Data/CLS-LOC/train/' + imgname[0:9] + '/' + imgname + ' ' + data_dir + 'image_data/drop_box_imagenet/')
        category_id = imagenet_categories.index(imgname[:9])
        flag = False
        if extra_annotations[imgname] == 'person_noface' or extra_annotations[imgname] == 'face':
            imagenet_total_trials += 1
            flag = True
            if 'c_0' in imagenet_category_count_trials:
                imagenet_category_count_trials['c_0'] += 1
            else:
                imagenet_category_count_trials['c_0'] = 1
        if extra_annotations[imgname] == 'face':
            if 's_0' in imagenet_super_count_trials:
                imagenet_super_count_trials['s_0'] += 1
            else:
                imagenet_super_count_trials['s_0'] = 1
        
        if category_id in cat_conv_rev:
            if not flag:
                imagenet_total_trials += 1
            
            super_id = super_cat_rev[cat_conv_rev[category_id]]
            if 'c_' + str(cat_conv_rev[category_id]) in imagenet_category_count_trials:
                imagenet_category_count_trials['c_' + str(cat_conv_rev[category_id])] += 1
            else:
                imagenet_category_count_trials['c_' + str(cat_conv_rev[category_id])] = 1

            if 's_' + str(super_id) in imagenet_super_count_trials:
                imagenet_super_count_trials['s_' + str(super_id)] += 1
            else:
                imagenet_super_count_trials['s_' + str(super_id)] = 1
        
        if not imgname in img_dict:
            img_dict_super[imgname] = np.zeros(12, dtype=np.int32)
            flag = False
            if extra_annotations[imgname] == 'person_noface' or extra_annotations[imgname] == 'face':
                imagenet_total += 1
                flag = True
                if 'c_0' in imagenet_category_count:
                    imagenet_category_count['c_0'] += 1
                else:
                    imagenet_category_count['c_0'] = 1
            if extra_annotations[imgname] == 'face':
                img_dict_super[imgname][0] = 1
                if 's_0' in imagenet_super_count:
                    imagenet_super_count['s_0'] += 1
                else:
                    imagenet_super_count['s_0'] = 1

            f_imagenet.write(imgname +': ')
            f_imagenet.write('c_' + str(category_id) + ' ')
            f_imagenet.write('\n')
            if category_id in cat_conv_rev:
                if not flag:
                    imagenet_total += 1
                if 'c_' + str(cat_conv_rev[category_id]) in imagenet_category_count:
                    imagenet_category_count['c_' + str(cat_conv_rev[category_id])] += 1
                else:
                    imagenet_category_count['c_' + str(cat_conv_rev[category_id])] = 1
                    
                super_id = super_cat_rev[cat_conv_rev[category_id]] 
                img_dict_super[imgname][super_id] = 1
                if 's_' + str(super_id) in imagenet_super_count:
                    imagenet_super_count['s_' + str(super_id)] += 1
                else:
                    imagenet_super_count['s_' + str(super_id)] = 1
                    
            img_dict[imgname] = np.zeros(1000, dtype=np.int32)
            img_dict[imgname][category_id] = 1

f_coco.close()
f_imagenet.close()

In [8]:
for i in range(91):
    if 'c_'+ str(i) in imagenet_category_count:
        print(imagenet_category_count['c_'+ str(i)]),
    else:
        print(0), 

297
2
8
0
2
4
0
8
10
2
0
2
0
2
2
38
10
236
0
2
0
4
2
2
0
2
2
2
2
0
0
2
0
0
2
0
12
0
0
0
0
0
0
6
2
0
2
0
2
2
4
2
0
0
2
2
0
2
2
0
0
6
2
0
0
0
2
2
2
2
2
2
2
2
2
2
2
2
0
2
0
2
0
2
6
2
0
2
2
0
0


In [9]:
for i in range(91):
    if 'c_'+ str(i) in imagenet_category_count_trials:
        print(imagenet_category_count_trials['c_'+ str(i)]),
    else:
        print(0), 

324
5
11
0
2
4
0
11
10
2
0
2
0
2
2
38
10
242
0
2
0
4
2
2
0
2
2
2
2
0
0
2
0
0
2
0
12
0
0
0
0
0
0
9
2
0
2
0
2
2
4
2
0
0
2
2
0
2
2
0
0
6
2
0
0
0
5
2
2
2
2
2
2
2
2
2
2
2
0
2
0
2
0
2
6
5
0
2
2
0
0


In [10]:
for i in range(12):
    if 's_'+ str(i) in imagenet_super_count:
        print(imagenet_super_count['s_'+ str(i)]),
    else:
        print(0),

215
34
8
294
10
14
18
10
18
12
6
14


In [11]:
for i in range(12):
    if 's_'+ str(i) in imagenet_super_count_trials:
        print(imagenet_super_count_trials['s_'+ str(i)]),
    else:
        print(0), 

233
43
8
300
10
14
21
10
21
12
6
17


In [12]:
for i in range(91):
    if 'c_'+ str(i) in coco_category_count:
        print(coco_category_count['c_'+ str(i)]),
    else:
        print(0),

916
39
156
41
63
46
62
65
48
64
36
0
19
6
91
78
57
63
50
36
42
57
24
65
84
0
68
51
0
0
97
53
21
25
79
43
57
30
43
55
38
82
46
104
0
36
129
40
55
52
118
42
46
28
48
41
34
17
37
27
26
177
67
71
49
0
175
0
0
37
0
63
55
40
42
38
70
22
46
4
87
38
0
87
94
64
13
22
4
10
0


In [13]:
for i in range(91):
    if 'c_'+ str(i) in coco_category_count:
        print(coco_category_count['c_'+ str(i)]),
    else:
        print(0),

916
39
156
41
63
46
62
65
48
64
36
0
19
6
91
78
57
63
50
36
42
57
24
65
84
0
68
51
0
0
97
53
21
25
79
43
57
30
43
55
38
82
46
104
0
36
129
40
55
52
118
42
46
28
48
41
34
17
37
27
26
177
67
71
49
0
175
0
0
37
0
63
55
40
42
38
70
22
46
4
87
38
0
87
94
64
13
22
4
10
0


In [14]:
for i in range(91):
    if 'c_'+ str(i) in coco_category_count_trials:
        print(coco_category_count_trials['c_'+ str(i)]),
    else:
        print(0),

973
42
168
47
69
52
71
74
51
70
39
0
22
9
97
81
63
66
53
39
45
66
27
65
87
0
71
51
0
0
100
53
21
28
82
43
57
36
46
58
38
88
46
119
0
45
138
46
64
61
127
42
46
37
57
41
34
20
40
33
26
183
67
74
49
0
193
0
0
40
0
63
58
43
42
38
70
22
46
4
93
38
0
90
100
67
13
22
4
10
0


In [15]:
for i in range(12):
    if 's_'+ str(i) in coco_super_count:
        print(coco_super_count['s_'+ str(i)]),
    else:
        print(0),

536
400
195
527
228
396
291
250
390
178
120
265


In [16]:
for i in range(12):
    if 's_'+ str(i) in coco_super_count_trials:
        print(coco_super_count_trials['s_'+ str(i)]),
    else:
        print(0),

560
439
210
563
234
417
321
274
414
181
126
277


In [17]:
nasnet = NASNetLarge() 
filename = data_dir + 'image_data/bold5000_coco.tfrecords'
writer_coco = tf.io.TFRecordWriter(filename)
filename = data_dir + 'image_data/bold5000_common.tfrecords'
writer_common = tf.io.TFRecordWriter(filename) 
filename = data_dir + 'image_data/bold5000_common_vehicle.tfrecords'
writer_common_vehicle = tf.io.TFRecordWriter(filename)
filename = data_dir + 'image_data/bold5000_common_animal.tfrecords'
writer_common_animal = tf.io.TFRecordWriter(filename) 
filename = data_dir + 'image_data/bold5000_common_food.tfrecords'
writer_common_food = tf.io.TFRecordWriter(filename)
filename = data_dir + 'image_data/bold5000_common_furniture.tfrecords'
writer_common_furniture = tf.io.TFRecordWriter(filename)
i = 0
# all_h = []
for ses in seses:
    img4d = nib.load(data_dir + sub + '_GLMbetas-TYPED-FITHRF-GLMDENOISE-RR_' + ses + '.nii.gz')
    img4d = np.array(img4d.dataobj)
    print(img4d.shape) # (71, 89, 72, 370) 
    # tmp = np.reshape(img4d, (-1))
    # tmp = tmp[~np.isnan(tmp)]
    # all_h = np.hstack((all_h,tmp))
    img4d = np.nan_to_num(img4d, nan = 0.0)
    for j in range(img4d.shape[3]):
        x = np.reshape(img4d[:, :, :, j], (-1))
        imgname = imgnames[i]
        i += 1
        common = False
        coco_label = np.zeros(90, dtype=np.int32)
        imagenet_label = np.zeros(1000, dtype=np.int32) 
        if imgname[0] == 'C':
            img_path =  data_dir + 'image_data/MSCOCO/images/train2014/' + imgname 
            coco_label = img_dict[imgname]
            super_label = img_dict_super[imgname]
            common_label = np.concatenate((coco_label, super_label), axis=0)
            common = True  
        if  imgname[0]  == 'n' and (imgname[1] == '0' or imgname[1] == '1') and imgname in img_dict:
            img_path = data_dir + '/image_data/ILSVRC/Data/CLS-LOC/train/' + imgname[:9] + '/' + imgname
            imagenet_label = img_dict[imgname]
            category_id = imagenet_categories.index(imgname[:9])
            if category_id in cat_conv_rev:
                coco_label[cat_conv_rev[category_id]] = 1
                common = True
            if extra_annotations[imgname] == 'face':
                common = True
                coco_label[0] = 1
            if extra_annotations[imgname] == 'person_noface':
                common = True
                coco_label[0] = 1
            super_label = img_dict_super[imgname]
            common_label = np.concatenate((coco_label, super_label), axis=0)
        if common:
            image = load_img(img_path, target_size=(331, 331))
            image = img_to_array(image)
            image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
            image = preprocess_input(image)  
            yhat = nasnet.predict(image)
            example = tf.train.Example(features=tf.train.Features(feature={
                'x': tf.train.Feature(float_list=tf.train.FloatList(value=x)),
                'yhat': tf.train.Feature(float_list=tf.train.FloatList(value=yhat[0])),
                'y_coco': tf.train.Feature(int64_list=tf.train.Int64List(value=coco_label)),
                'y_imagenet': tf.train.Feature(int64_list=tf.train.Int64List(value=imagenet_label)),
                'y_super': tf.train.Feature(int64_list=tf.train.Int64List(value=super_label)),
                'y_common': tf.train.Feature(int64_list=tf.train.Int64List(value=common_label))
                }))
            writer_common.write(example.SerializeToString())
            if imgname[0] == 'C':
                writer_coco.write(example.SerializeToString())
            if common_label[91] == 1:
                writer_common_vehicle.write(example.SerializeToString())
            if common_label[93] == 1:
                writer_common_animal.write(example.SerializeToString())
            if common_label[97] == 1:
                writer_common_food.write(example.SerializeToString())
            if common_label[98] == 1:
                writer_common_furniture.write(example.SerializeToString())
                
writer_coco.close()
writer_common.close()
writer_common_vehicle.close()
writer_common_animal.close()
writer_common_food.close()
writer_common_furniture.close()
# -4218170.5
# np.save(data_dir + 'image_data/all_h', all_h)
# _ = plt.hist(all_h, bins='auto')
# plt.title("Histogram for GLMbetas-TYPED-FITHRF-GLMDENOISE-RR")
# plt.show()
# plt.savefig(data_dir + 'image_data/fig1.png')

(71, 89, 72, 370)
(71, 89, 72, 370)
(71, 89, 72, 370)
(71, 89, 72, 333)
(71, 89, 72, 370)
(71, 89, 72, 333)
(71, 89, 72, 370)
(71, 89, 72, 333)
(71, 89, 72, 333)
(71, 89, 72, 370)
(71, 89, 72, 333)
(71, 89, 72, 333)
(71, 89, 72, 333)
(71, 89, 72, 333)
(71, 89, 72, 370)
