# ================= Create Train and Test Data ================= 

In [None]:
import os
import sys
import random
import numpy as np
from PIL import Image
from glob import glob
from tqdm import tqdm
if sys.version_info[0] == 2:
    import xml.etree.cElementTree as ET
else:
    import xml.etree.ElementTree as ET

dataset = 'Youtube'

if dataset == 'miccai18':
    
    # seq_list
    mlist      = [1,2,3,4,5,6,7,9,10,11,12,14,15,16]
    seq_list   = [1,2,3,4,5,6,7,9,10,11,12,14,15,16]
    train_list = [2,3,4,6,7,9,10,11,12,14,15]
    
    # img location
    dir_root_gt = 'instruments18/seq_'
    img_folder  = 'left_frames/'
    file_format = '.png'
    
    # destination location
    train_folder = 'Classification_dataset/train_new/'
    test_folder  = 'Classification_dataset/test_new/'
    
    INSTRUMENT_CLASSES = ('kidney', 'bipolar_forceps', 'prograsp_forceps', 'large_needle_driver',
                      'monopolar_curved_scissors', 'ultrasound_probe', 'suction', 'clip_applier',
                      'stapler', 'maryland_dissector', 'spatulated_monopolar_cautery')

    myfile = open('miccai_targets.txt', 'w')
    
elif dataset == 'miccai17':
    
    # seq_list
    mlist      = [ 1, 2, 3, 4, 5, 6, 7, 8, 9,10]
    seq_list   = [17,18,19,20,21,22,23,24,25,26]
    train_list = [6,10]
    
    # img location
    dir_root_gt = 'instruments17/instrument_dataset_'
    img_folder  = 'images/'
    file_format = '.jpg'
    
    # destination location
    train_folder = 'Classification_dataset/train_new/'
    test_folder  = 'Classification_dataset/test_new/'
    
    INSTRUMENT_CLASSES = ('tissue', 'Bipolar Forceps', 'Prograsp Forceps', 'Large Needle Driver',
                      'Monopolar Curved Scissors', 'Others', 'Suction', 'Clip Applier',
                      'Stapler', 'Maryland Dissector', 'Spatulated Monopolar Cautery',
                      'Vessel Sealer', 'Grasping Retractor')

    myfile = open('miccai17_targets.txt', 'w')
    
elif dataset == 'SGH_2020':
    
    # seq_list
    mlist      = [ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22]
    seq_list   = [27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48]
    train_list = [ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15]
    
    # img location
    dir_root_gt = 'SGH_dataset_2020/'
    img_folder  = 'resized_frames/'
    file_format = '.png'
    
    # destination location
    train_folder = 'Classification_dataset/train_new/'
    test_folder  = 'Classification_dataset/test_new/'
    
    INSTRUMENT_CLASSES = ('tissue', 'bipolar_forceps', 'prograsp_forceps', 'large_needle_driver',
                      'monopolar_curved_scissors', 'ultrasound_probe', 'suction', 'clip_applier',
                      'stapler', 'maryland_dissector', 'spatulated_monopolar_cautery')

    myfile = open('sgh_2020_targets.txt', 'w')
    
elif dataset == 'Youtube':
    
    # seq_list
    mlist      = [ 1, 2, 3, 4, 5]
    seq_list   = [49,50,51,52,53]
    train_list = [5]
    
    # img location
    dir_root_gt = 'YouTubeDataset/'
    img_folder  = 'resized_frames/'
    file_format = '.png'
    
    # destination location
    train_folder = 'Classification_dataset/train_new/'
    test_folder  = 'Classification_dataset/test_new/'
    
    INSTRUMENT_CLASSES = ('tissue', 'bipolar_forceps', 'prograsp_forceps', 'large_needle_driver',
                      'monopolar_curved_scissors', 'ultrasound_probe', 'suction', 'clip_applier',
                      'stapler', 'maryland_dissector', 'spatulated_monopolar_cautery')

    myfile = open('youtube_targets.txt', 'w')

xml_dir_list = []

for index, ind_seq in tqdm(enumerate(mlist)):
    xml_dir_temp = dir_root_gt + str(ind_seq) + '/xml/'
    xml_dir_list = glob(xml_dir_temp + '/*.xml')
    for xml_dir_temp in xml_dir_list:
        file_name = os.path.splitext(os.path.basename(xml_dir_temp))[0]
        file_root = os.path.dirname(os.path.dirname(xml_dir_temp))
        _xml = ET.parse(xml_dir_temp).getroot()
        _img_dir = os.path.join(file_root, img_folder, file_name + file_format)
        _img = Image.open(_img_dir).convert('RGB')

        class_to_ind = dict(zip(INSTRUMENT_CLASSES, range(len(INSTRUMENT_CLASSES))))
        node_bbox = []
        det_classes = []
        for obj in _xml.iter('objects'):
            name = obj.find('name').text.strip()
            # interact = obj.find('interaction').text.strip()
            det_classes.append(INSTRUMENT_CLASSES.index(str(name)))
            bbox = obj.find('bndbox')
            pts = ['xmin', 'ymin', 'xmax', 'ymax']
            bndbox = []
            label_idx = class_to_ind[name]
            # interaction_idx = interaction_to_ind[interact]
            for i, pt in enumerate(pts):
                cur_pt = int(bbox.find(pt).text)
                # cur_pt = cur_pt / _img_shape[1] if i % 2 == 0 else cur_pt / _img_shape[0]
                bndbox.append(cur_pt)
            bndbox.append(label_idx)
            node_bbox += [bndbox]

        _img = np.array(_img)
        idx = 0
        for bndbox in node_bbox:
            roi = np.array(bndbox).astype(int)
            roi_crop = _img[roi[1]:roi[3] + 1, roi[0]:roi[2] + 1,:]
            roi_crop = Image.fromarray(roi_crop).resize((224, 224), Image.NEAREST)
            if ind_seq in train_list:
                roi_dir = train_folder+file_name+'_'+str(idx)+'_'+format(seq_list[index], '02d')+'_'+str(roi[4])+'.png'
            else:
                roi_dir = test_folder+file_name+'_'+str(idx)+'_'+format(seq_list[index], '02d')+'_'+str(roi[4])+'.png'
            roi_crop.save(roi_dir)
            myfile.write("%s %s\n" %(roi_dir, str(roi[4])))
            idx += 1
myfile.close()

# ================= Count data ================= 

In [None]:
#System
import os
import sys

import torch
import numpy as np
from glob import glob

if sys.version_info[0] == 2: import xml.etree.cElementTree as ET
else: import xml.etree.ElementTree as ET

# input data and IO folder location
mlist = [4]

dir_root_gt = 'YouTubeDataset/'
xml_dir_list = []

for i in mlist:
    xml_dir_temp = dir_root_gt + str(i) + '/xml/'
    seq_list_each = glob(xml_dir_temp + '/*.xml')
    xml_dir_list = xml_dir_list + seq_list_each
    
# global variables
INSTRUMENT_CLASSES = ('tissue', 'bipolar_forceps', 'prograsp_forceps', 'large_needle_driver',
                      'monopolar_curved_scissors', 'ultrasound_probe', 'suction', 'clip_applier',
                      'stapler', 'maryland_dissector', 'spatulated_monopolar_cautery')

ACTION_CLASSES = ('Idle', 'Grasping', 'Retraction', 'Tissue_Manipulation', 'Tool_Manipulation',
                  'Cutting', 'Cauterization', 'Suction', 'Looping', 'Suturing', 'Clipping', 
                  'Staple', 'Ultrasound_Sensing')

instrument_cls_freq = np.zeros((13,1))
action_cls_freq = np.zeros((13,1))

for index, _xml_dir in  enumerate(xml_dir_list):
    _xml = ET.parse(_xml_dir).getroot()
    c_flag = False
    
    for obj in _xml.iter('objects'):
        # object name and interaction type
        name = obj.find('name').text.strip()
        #print(name)
        #interact = obj.find('interaction').text.strip()
        instrument_cls_freq[int(INSTRUMENT_CLASSES.index(str(name)))] += 1
        #action_cls_freq[int(ACTION_CLASSES.index(str(interact)))] += 1
    if c_flag: continue

print('instrument', instrument_cls_freq)
print('action', action_cls_freq)

# ================= Create Train and Test Files ================= 

In [18]:
'''
Project         : Incremental learning for feature extraction
Lab             : MMLAB, National University of Singapore
contributors    : Lalith, Mengya, Mobarak
'''

from glob import glob

#period = 1
#classes = [[0,1,2,3,4,5,6,7,8,9,10]]

period = 2
classes = [[0,1,2,3,4,5,6,7,8], [9,10]]

#train
#dir_root_gt = "../datasets/Classification_dataset/train/*.png"
#file_names = ['data_files/class0_10_train.txt']
#file_names = ['data_files/class0_8_train.txt', 'data_files/class9_10_train.txt']


# test data: 
dir_root_gt = '../datasets/Classification_dataset/test/*'
#file_names = ['data_files/class0_10_test.txt']
file_names = ['data_files/class0_8_test.txt', 'data_files/class9_10_test.txt']

img_list = []
img_list = glob(dir_root_gt)

class_list = [0,0,0,0,0,0,0,0,0,0,0]


# for every incremental learning, selects class specific images from the total image list
for period_id in range(period):
    curr_file= open(file_names[period_id], 'a')
    for img in img_list:
        target = int(img[:-4].split('_')[-1:][0])
        if target in classes[period_id]:
            class_list[target] += 1
            curr_file.write(img+'\n')
    print(class_list)
    curr_file.close()


[150, 150, 150, 150, 150, 150, 150, 60, 150, 0, 0]
[150, 150, 150, 150, 150, 150, 150, 60, 150, 100, 100]
