In [None]:
############################      features needed            #####################################
# data = {}

# data['global_id'] = global_id                                       # image name
# data['img_name']     = global_id + '.jpg'                           # image name
# data['node_num']    = single_app_data['node_num'].value             # total node number

# data['roi_labels']      = single_app_data['classes'][:]             # node labels
# data['edge_labels'] = single_app_data['edge_labels'][:]             # edge  labels

# data['det_boxes'] = single_app_data['boxes'][:]                     # box
# data['roi_scores'] = single_app_data['scores'][:]                   # detection score

# data['edge_num']    = data['edge_labels'].shape[0]                  # edge number
# data['features']        = single_app_data['feature'][:]             # features
# data['spatial_feat'] = single_spatial_data[:]                       # spatial features

# data['word2vec']     = self._get_word2vec(data['roi_labels'])       # word2vec, from roi_labels

In [1]:
'''
    resnet 50 feature extractor
    shape of resnet-50 avg_pool output is 2048
    additional avgpool to reduce the output to 1024
    # network of resnet50
    # torch.Size([1, 1024, 14, 14])
    # torch.Size([1, 2048, 7, 7])
    # torch.Size([1, 2048, 1, 1])
    # torch.Size([1, 1000])
'''
import torch
import torchvision

class Resnet50(torch.nn.Module):
    def __init__(self):
        super(Resnet50, self).__init__()
        pretrained_model = torchvision.models.resnet50(pretrained=True)
        self.feature_extractor = torch.nn.Sequential(*list(pretrained_model.children())[:-1])
        self.avgpool = torch.nn.AvgPool1d(2)

    def forward(self, x):
        x = self.feature_extractor(x)      #torch.Size([1, 2048, 1, 1])
        x = x.view(x.size(0), -1)          #torch.Size([1, 2048])
        x = self.avgpool(x[None,:,:])      #torch.Size([1, 1, 1024])
        x = x.view(x.size(0), -1)          #torch.Size([1, 1024])
        return x

In [2]:
###### spatial features extractor
'''
    stand-alone extract spatial features
    size = node x (node-1), 16 (5 + 5 + 4 + 2)
'''
import numpy as np
def center_offset(box1, box2, im_wh):
    '''
    '''
    c1 = [(box1[2]+box1[0])/2, (box1[3]+box1[1])/2]
    c2 = [(box2[2]+box2[0])/2, (box2[3]+box2[1])/2]
    offset = np.array(c1)-np.array(c2)/np.array(im_wh)
    return offset

def box_with_respect_to_img(box, im_wh):
    '''
        To get [x1/W, y1/H, x2/W, y2/H, A_box/A_img]
    '''
    # ipdb.set_trace()
    feats = [box[0]/(im_wh[0]+ 1e-6), box[1]/(im_wh[1]+ 1e-6), box[2]/(im_wh[0]+ 1e-6), box[3]/(im_wh[1]+ 1e-6)]
    box_area = (box[2]-box[0])*(box[3]-box[1])
    img_area = im_wh[0]*im_wh[1]
    feats +=[ box_area/(img_area+ 1e-6) ]
    return feats

def box1_with_respect_to_box2(box1, box2):
    '''
    '''
    feats = [ (box1[0]-box2[0])/(box2[2]-box2[0]+1e-6),
              (box1[1]-box2[1])/(box2[3]-box2[1]+ 1e-6),
              np.log((box1[2]-box1[0])/(box2[2]-box2[0]+ 1e-6)),
              np.log((box1[3]-box1[1])/(box2[3]-box2[1]+ 1e-6))   
            ]
    return feats

def calculate_spatial_feats(det_boxes, im_wh):
    '''
    '''
    spatial_feats = []
    for i in range(det_boxes.shape[0]):
        for j in range(det_boxes.shape[0]):
            if j == i: continue
            single_feat = []
            # features 5, 5, 4, 2
            box1_wrt_img = box_with_respect_to_img(det_boxes[i], im_wh)
            box2_wrt_img = box_with_respect_to_img(det_boxes[j], im_wh)
            box1_wrt_box2 = box1_with_respect_to_box2(det_boxes[i], det_boxes[j])
            offset = center_offset(det_boxes[i], det_boxes[j], im_wh)
            
            single_feat = single_feat + box1_wrt_img + box2_wrt_img + box1_wrt_box2 + offset.tolist()
            spatial_feats.append(single_feat)
    
    spatial_feats = np.array(spatial_feats)
    return spatial_feats


In [3]:
#System
import os
import sys
import cv2
import h5py

import torch
import torchvision.models

import numpy as np
from PIL import Image
from glob import glob
if sys.version_info[0] == 2: import xml.etree.cElementTree as ET
else: import xml.etree.ElementTree as ET

# input data and IO folder location
mlist = [1,2,3,4,5,6,7,9,10,11,12,14,15,16]

dir_root_gt = 'datasets/instruments18/seq_'
xml_dir_list = []

for i in mlist:
    xml_dir_temp = dir_root_gt + str(i) + '/xml/'
    seq_list_each = glob(xml_dir_temp + '/*.xml')
    xml_dir_list = xml_dir_list + seq_list_each
    
# global variables
INSTRUMENT_CLASSES = (  '', 'kidney', 'bipolar_forceps', 'fenestrated_bipolar', 
                        'prograsp_forceps', 'large_needle_driver', 'vessel_sealer',
                        'grasping_retractor', 'monopolar_curved_scissors', 
                        'ultrasound_probe', 'suction', 'clip_applier', 'stapler')

ACTION_CLASSES = (  'Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation', 
                    'Tool_Manipulation', 'Cutting', 'Cauterization',
                    'Suction', 'Looping', 'Suturing', 'Clipping', 'Staple', 'Ultrasound_Sensing')

transform = torchvision.transforms.Compose([
                    torchvision.transforms.ToTensor(),
                    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225])])

feature_network = Resnet50().cuda()
        
for index, _xml_dir in  enumerate(xml_dir_list):
    img_name = os.path.basename(xml_dir_list[index][:-4])
    _img_dir = os.path.dirname(os.path.dirname(xml_dir_list[index])) + '/left_frames/' + img_name + '.png'
    save_data_path = os.path.join(os.path.dirname(os.path.dirname(xml_dir_list[index])),'vsgat/resnet50_features1024')
    if not os.path.exists(save_data_path):
        os.makedirs(save_data_path)
    print(_img_dir)
    #if index == 2: break 
    
    
    _xml = ET.parse(_xml_dir).getroot()
    
    det_classes = []
    act_classes = []
    #node_bbox = []
    det_boxes_all = []
    c_flag = False
    
    for obj in _xml.iter('objects'):
        # object name and interaction type
        name = obj.find('name').text.strip()
        interact = obj.find('interaction').text.strip()
        det_classes.append(INSTRUMENT_CLASSES.index(str(name)))
        act_classes.append(ACTION_CLASSES.index(str(interact)))
        
        # bounding box
        bndbox = []
        bbox = obj.find('bndbox') 
        for i, pt in enumerate(['xmin', 'ymin', 'xmax', 'ymax']):         
            bndbox.append(int(bbox.find(pt).text))
        det_boxes_all.append(np.array(bndbox))
        
    if c_flag: continue
    
    tissue_num = len(np.where(np.array(det_classes)))
    node_num = len(det_classes)
    edges = np.cumsum(node_num - np.arange(tissue_num) -1)[-1]
    

    # parse the original data to get node labels
    edge_labels = np.zeros((edges, len(ACTION_CLASSES)))
    edge_index = 0
    for tissue in range (tissue_num):
        for obj_index in range(tissue+1, node_num):
            #print(edge_index, ";", tissue, obj_index)
            edge_labels[edge_index, act_classes[tissue_num+edge_index]] = 1 
            edge_index += 1

    ###To generate adjacent matrix and added additional bbox for edge feat extraction
    #instrument_num = node_num - 1
    #adj_mat = np.zeros((node_num, node_num))
    #adj_mat[0, :] = act_classes
    #adj_mat[:, 0] = act_classes
    #adj_mat = adj_mat.astype(int)
    #adj_mat[adj_mat > 0] = 1
    
    # roi features extraction
    # node features
    node_features = np.zeros((node_num, 1024))
    _img = Image.open(_img_dir).convert('RGB')
    _img = np.array(_img)
    for idx, bndbox in enumerate(det_boxes_all):
        roi = np.array(bndbox).astype(int)
        roi_image = _img[roi[1]:roi[3] + 1, roi[0]:roi[2] + 1, :]
        # plt.imshow(roi_image)
        # plt.show()
        roi_image = transform(cv2.resize(roi_image, (224, 224), interpolation=cv2.INTER_LINEAR))
        roi_image = torch.autograd.Variable(roi_image.unsqueeze(0)).cuda()
        feature = feature_network(roi_image)
        node_features[idx] = feature.data.cpu().numpy()
    # spatial_features
    spatial_features = np.array(calculate_spatial_feats(np.array(det_boxes_all), [1024, 1280]))

    # save to file
    hdf5_file = h5py.File(os.path.join(save_data_path, '{}_features.hdf5'.format(img_name)),'w')
    hdf5_file.create_dataset('img_name', data=img_name)
    hdf5_file.create_dataset('node_num', data=node_num)
    hdf5_file.create_dataset('classes', data=det_classes)
    hdf5_file.create_dataset('boxes', data=det_boxes_all)
    hdf5_file.create_dataset('edge_labels', data=edge_labels)
    hdf5_file.create_dataset('node_features', data=node_features)
    hdf5_file.create_dataset('spatial_features', data=spatial_features)
    hdf5_file.close()
    print('edges', edge_labels.shape, 'node_feat', node_features.shape, 'spatial_feat', spatial_features.shape)

#     np.save(os.path.join(save_data_path, '{}_node_features'.format(img_name)), node_features)
#     np.save(os.path.join(save_data_path, '{}_spatial_features'.format(img_name)), spatial_features)
        
#     print('node_num :',node_num)
#     print('det_class :',det_classes)
#     print('action_class :',act_classes)
#     print('tissue :',tissue_num, 'edges :', edges)
#     print('edge_labels :',edge_labels)
#     print('bbx :',det_boxes_all)
#     print('node_features :',node_features.shape)
#     print('spatial_features :',spatial_features.shape)

datasets/instruments18/seq_1/left_frames/frame135.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_1/left_frames/frame061.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame055.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame144.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_1/left_frames/frame129.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame070.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_1/left_frames/frame142.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_1/left_frames/frame134.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame069.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/ins

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame039.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame067.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame125.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame004.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame054.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_1/left_frames/frame118.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_1/left_frames/frame130.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame082.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_1/left_frames/frame026.png
edges (3, 13

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame144.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame129.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_2/left_frames/frame070.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame142.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_2/left_frames/frame134.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_2/left_frames/frame069.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame096.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame040.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame132.png
edges (2, 13)

edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_2/left_frames/frame013.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_2/left_frames/frame021.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame148.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_2/left_frames/frame145.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_2/left_frames/frame126.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_2/left_frames/frame053.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_2/left_frames/frame133.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_2/left_frames/frame088.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_2/left_frames/frame035.png
edges (2, 13) 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame052.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame042.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame086.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame077.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame043.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame127.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame025.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_3/left_frames/frame099.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_3/left_frames/frame139.png
edges (2, 13) node

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_3/left_frames/frame044.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame128.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame046.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame076.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame032.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame102.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_3/left_frames/frame056.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_3/left_frames/frame020.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_3/left_frames/frame119.png
edges (2, 13) nod

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_4/left_frames/frame050.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame101.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame131.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_4/left_frames/frame028.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_4/left_frames/frame092.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame105.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame074.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame009.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_4/left_frames/frame014.png
edges (3, 

edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame056.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame020.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_4/left_frames/frame119.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_4/left_frames/frame049.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame037.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame029.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame058.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame060.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_4/left_frames/frame097.png
edges (4, 

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame131.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_5/left_frames/frame028.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame092.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame105.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame074.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame009.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_5/left_frames/frame014.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame114.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame059.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_5/left_frames/frame049.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame037.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_5/left_frames/frame029.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame058.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_5/left_frames/frame060.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_5/left_frames/frame097.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame075.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_5/left_frames/frame047.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_5/left_frames/frame019.png
edges (3, 13) n

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame098.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame036.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_6/left_frames/frame048.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame064.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame031.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame006.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_6/left_frames/frame108.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_6/left_frames/frame103.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame018.png
edges (3, 13) nod

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame017.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_6/left_frames/frame091.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame093.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame147.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_6/left_frames/frame122.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame027.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame065.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_6/left_frames/frame063.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_6/left_frames/frame089.png
edges (2, 13) no

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame079.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_7/left_frames/frame080.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_7/left_frames/frame113.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_7/left_frames/frame112.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_7/left_frames/frame033.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame123.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_7/left_frames/frame000.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_7/left_frames/frame062.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame110.png
edges (2, 13) 

edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_7/left_frames/frame084.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_7/left_frames/frame073.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame147.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_7/left_frames/frame122.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_7/left_frames/frame027.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame065.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame063.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_7/left_frames/frame089.png
edges (4, 13) node_feat (5, 1024) spatial_feat (20, 16)
datasets/instruments18/seq_9/left_frames/frame135.png
edges (3, 13

edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_9/left_frames/frame125.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame004.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame054.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame118.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame130.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame082.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame026.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_9/left_frames/frame038.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_9/left_frames/frame085.png
edges (2, 13) node_

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame146.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame008.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame068.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame109.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame116.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame136.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame094.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame106.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame095.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame021.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame148.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_10/left_frames/frame145.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame126.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame053.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame133.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame088.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame035.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_10/left_frames/frame012.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame116.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame136.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame094.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame106.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame095.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame041.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame052.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame042.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame086.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame117.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame087.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame057.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame083.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame003.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame034.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame022.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame072.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_11/left_frames/frame107.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame099.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame139.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame100.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame011.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame120.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame141.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame010.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame115.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame137.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame076.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame032.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame007.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame102.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame056.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame020.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame119.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame049.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_12/left_frames/frame037.png
edges (2, 

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame137.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame050.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame101.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame131.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_14/left_frames/frame028.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame092.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame105.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame074.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame009.png
ed

edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame020.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame119.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame049.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame037.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame029.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame058.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame060.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame097.png
edges (3, 13) node_feat (4, 1024) spatial_feat (12, 16)
datasets/instruments18/seq_14/left_frames/frame075.png
e

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame131.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame028.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame092.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame105.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame009.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame014.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame114.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame059.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame098.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame029.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame058.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame060.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame097.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_15/left_frames/frame075.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame047.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame019.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame045.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_15/left_frames/frame111.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame074.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame009.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame014.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame114.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame059.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame098.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame036.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame048.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame064.png
edges (2, 

edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame097.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame075.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame047.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame019.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame045.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame111.png
edges (1, 13) node_feat (2, 1024) spatial_feat (2, 16)
datasets/instruments18/seq_16/left_frames/frame002.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame016.png
edges (2, 13) node_feat (3, 1024) spatial_feat (6, 16)
datasets/instruments18/seq_16/left_frames/frame104.png
edges (2, 

In [4]:
'''
###############                 surgery Word2Vec           ########################
        output size is vector of size 300
'''
import os
import h5py
import gensim

#Load Google's pre-trained Word2Vec model.
model = gensim.models.KeyedVectors.load_word2vec_format('datasets/word2vec/GoogleNews-vectors-negative300.bin', binary=True)  
original_keys = list(model.vocab.keys())
upper_keys = [str.upper(x) for x in original_keys]

# class names
INSTRUMENT_CLASSES = ['', 'kidney', 'bipolar_forceps', 'fenestrated_bipolar', 
                      'prograsp_forceps', 'large_needle_driver', 'vessel_sealer',
                      'grasping_retractor', 'monopolar_curved_scissors', 
                      'ultrasound_probe','suction', 'clip_applier', 'stapler']

instrument_class_to_w2v = ['', 'kidney', 'bipolar', 'fenestrated', 'grasp',
                      'needle', 'sealer', 'retractor', 'scissors', 'ultrasound',
                      'suction', 'clipper', 'stapler']

hico_word2vec = os.path.join('datasets/','surgicalscene_word2vec.hdf5')
file = h5py.File(hico_word2vec, 'w')

for i, name in enumerate(INSTRUMENT_CLASSES):
    print(name, ':', instrument_class_to_w2v[i])
    if name == '': continue
    else: 
        index = upper_keys.index(str.upper(instrument_class_to_w2v[i]))
        data = data=model[original_keys[index]]
        print(data.shape)
        file.create_dataset(name, data=model[original_keys[index]])
file.close()

 : 
kidney : kidney
(300,)
bipolar_forceps : bipolar
(300,)
fenestrated_bipolar : fenestrated
(300,)
prograsp_forceps : grasp
(300,)
large_needle_driver : needle
(300,)
vessel_sealer : sealer
(300,)
grasping_retractor : retractor
(300,)
monopolar_curved_scissors : scissors
(300,)
ultrasound_probe : ultrasound
(300,)
suction : suction
(300,)
clip_applier : clipper
(300,)
stapler : stapler
(300,)
