In [1]:
import cv2
from matplotlib import pyplot as plt
import os
import numpy as np
import pickle

import xml.etree.ElementTree as ET
from collections import defaultdict

IMG_SIZE_ORJ = 2048
IMG_CHANNEL_ORJ = 3

In [2]:
def zero_pad(idOf, length=4):
    """
    Adding zero padding to numbers to match them with file names, 0001 vs 1
    
    """
    i = 0
    l = length - len(idOf)
    while i < l:
        idOf = '0' + idOf
        i +=1
    return idOf

# Checking col,row pair of a whole image if it is in unsure bud category or not
def check_unsure(tp_list_unsure, col, row):
    """
    Checking col,row pair of a whole image if it is in unsure bud category or not
    """
    flag_unsure = True
    
    if ((col,row) in tp_list_unsure) or ((col + 1,row) in tp_list_unsure) or \
        ((col,row + 1) in tp_list_unsure) or ((col + 1,row + 1) in tp_list_unsure):
        
            flag_unsure = False
            
    return flag_unsure

def renamer(fname):
    """
    Renaming given file name
    """
    name = fname.split('/')[-1]
    num_of_def = len(name.split('-'))
    img_loc_new = fname
    
    if num_of_def > 2:
        
        tbud_id = (name.split(".")[0]).split('-')[1]
        col_num = (name.split(".")[0]).split('-')[2]
        row_num = (name.split(".")[0]).split('-')[3]
        
        img_loc_new = os.path.dirname(fname) + '/' + tbud_id + '-' + zeroPad(col_num)+ '-' + zeroPad(row_num) + "." + name.split(".")[-1]
        #img_loc_new = os.path.dirname(fname) + '/' + name.split('-')[-2] + '-' + name.split('-')[-1]
        os.rename(fname, img_loc_new)
        
    return img_loc_new

In [3]:
def write_bud_metadata(tb_id, bud_info_list, dir_write):
    """

    :keyword
    tb_id
    bud_info_list
    dir_write

    :rtype: None
    """
    fname_new = dir_write + str(tb_id) + '-b'+ str(len(bud_info_list)) +'-budInfo.txt'
    
    if not os.path.exists(dir_write):
        os.makedirs(dir_write)
        
    f = open(fname_new, 'w')
    
    for t in bud_info_list:
        
        #if t[5] == '101':
        line = ' '.join(str(x) for x in t)
        f.write(line + '\n')
        
    f.close()
#     print(f'[INFO][write_bud_metadata] Finished {tb_id}!')
    

In [4]:
def annotation_parser(filename):
    """
    Read and parse XML annotation files
    """
    tree = ET.parse(filename)
    root = tree.getroot()
    tp_list = list()
    tp_list_img = list()
    
    for co in root.iter('CellObject'):
        
        co_bbox = co.find('BoundingBox')
        co_class = (co.find('Classification')).find('Class')
        
        i = co.attrib['id']
        x = co_bbox.attrib['x']
        y = co_bbox.attrib['y']
        w = co_bbox.attrib['w']
        h = co_bbox.attrib['h']
        c = co_class.attrib['class']
        
        # Neglecting unsure and etc. cases
        if c == '101'or c == '107':
            tp = (i, x, y, w, h, c)
            tp_list.append(tp)
        
    for im in root.iter('TileStack'):
        
        x = float(im.get('xpos'))
        y = float(im.get('ypos'))
        
        fname_full = im.find('Tile').get('filename')
        #idx_val = int((im.find('Properties')).find('Index').get('value'))
        foldname = fname_full.split('/')[0]
        fname = (fname_full.split('/')[-1]).split('.')[0]
        col_num = ''.join(c for c in (fname.split('-')[0]) if c.isdigit()) #.lstrip('0')).strip(' ')
        row_num = ''.join(c for c in (fname.split('-')[-1]) if c.isdigit()) #((fname.split('-')[-1]).lstrip('0'))
        
        if foldname == 'images':
            
            tp = (x, y, int(col_num), int(row_num))
            tp_list_img.append(tp)
            
        #print co.attrib, co_bbox.attrib
    return tp_list, tp_list_img

In [7]:
file_test_xml = "../data/raw/1296_conv/1296_ar_mitPreview_final.xml"
tp_list, tp_list_img = annotation_parsing(file_test_xml)

In [18]:
tp_list.sort(key= lambda a: a[1])
tp_list[:10]

[('130', '10019.5', '35425.8', '40.5048', '26.3337', '101'),
 ('127', '10035', '35570.6', '22.8988', '23.2623', '101'),
 ('147', '10037.2', '37370.3', '51.4454', '24.9924', '107'),
 ('146', '10070.8', '37318.2', '36.6772', '31.8085', '101'),
 ('126', '10078.6', '35553.1', '26.1701', '22.8988', '101'),
 ('323', '10081', '35453.4', '33.763', '39.0248', '101'),
 ('148', '10087', '37253.3', '20.5811', '24.8857', '101'),
 ('128', '10098.7', '35584.8', '20.1728', '20.5362', '107'),
 ('131', '10107.9', '35496.6', '46.5303', '38.3847', '101'),
 ('129', '10136.9', '35552.8', '27.2605', '30.8952', '107')]

In [10]:
tp_list_img[:10]

[(0.0, 0.0, 0, 0),
 (0.0, 398.285, 0, 1),
 (0.0, 796.57, 0, 2),
 (0.0, 1194.86, 0, 3),
 (0.0, 1593.14, 0, 4),
 (0.0, 1991.43, 0, 5),
 (0.0, 2389.71, 0, 6),
 (0.0, 2788.0, 0, 7),
 (0.0, 3186.28, 0, 8),
 (0.0, 3584.57, 0, 9)]

In [5]:
def bud_execute(img_loc, tps, tp_img, tid, dir_anns, dir_anns_f):  
    """

    :keyword
    tb_id
    bud_info_list
    dir_write

    :rtype: None
    """
    #img_test_anns = cv2.imread(img_loc)
    #img_test_anns_filled = img_test_anns.copy()
    
    filename = img_loc.split('/')[-1]
    
    #ann_coords = list()
    tups_bud_coord = list()
    area_info_bud = list() # area, w, h
    bud_ann_counts = list() # filename, budCount, unsureCount, elseCount
    test_img_coord = [tp_img[0], tp_img[1]]

    tp_tbud_final = (0, 0, 0, 0)
    tp_unsure_final = (0, 0)
    image_y, image_x = 2048, 2048#img_test_anns.shape[:2]
    
    base_pixel = 2048
    base_scale = 398.285
    base_scale_num = image_x / 2048
    scale = base_scale * base_scale_num
    
    pixK = (float(image_x) / scale)
    
    write_flag = False # If there is unsure ann in this instance, it's not written
    others = False
    
    count_bud = 0
    count_unsure = 0
    count_else = 0
    flag_just_bud = 0
    
    for tp in tps:
        
        x_cord = int((float(tp[1]) - test_img_coord[0]) * pixK)
        y_cord = int((float(tp[2]) - test_img_coord[1]) * pixK)
        w = int(float(tp[3]) * pixK)
        h = int(float(tp[4]) * pixK)
        c_id = int(tp[5])
        
        if ((float(tp[1]) > test_img_coord[0]) and (float(tp[2]) > test_img_coord[1]) and (x_cord + w) < image_x) and ((y_cord + h) < image_y):
            
            if c_id == 101 or c_id == 107:
                
                write_flag = True
                tups_bud_coord.append((x_cord, y_cord, w, h))
                area_info_bud.append((w*h, w, h))
                
                count_bud += 1

    if (count_unsure == 0) and (count_else == 0):
        flag_just_bud = 1
        
    tp_bud_counts = (filename, flag_just_bud, count_bud, count_unsure, count_else)
    bud_ann_counts.append(tp_bud_counts)
    
    dir_name = "image_bud_info"
    
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
        
    if (count_bud > 0) or (count_unsure > 0) or (count_else > 0):
        
        pickle_fname = dir_name + '/tbud_info_' + str(tid) +'.dump'
        
        if os.path.isfile(pickle_fname):
            
                read_list = pickle.load(open(pickle_fname, 'rb'))
                read_list.append(tp_bud_counts)
                pickle.dump(read_list, open(pickle_fname, 'wb'))
                
        else:
            
            pickle.dump(bud_ann_counts, open(pickle_fname, 'wb'))
                
    if (write_flag is True):
        tp_tbud = tp_img
        tp_tbud_final = tp_tbud
        
    if (others is True):
        
        tp_unsure = (tp_img[2], tp_img[3])#, tp_img[0], tp_img[1])
        tp_unsure_final = tp_unsure
    
    if len(tups_bud_coord) > 0:
        write_bud_metadata(tid+'-'+str(tp_img[2])+'-'+str(tp_img[3]), sorted(tups_bud_coord), 'Bud_Info/'+tbud_id+'_Bud_Info_txt/')
    
    return tp_tbud_final, tp_unsure_final

In [11]:
# For preparing bud txt files
part = '3'
data_location = "/Volumes/LaCie/MS/DATASETS/Tumorbuds_part" + part + "/"

if part == '1':
    list_tbud = ['1298', '1300', '1308', '1310', '1312', '1322']#'1317','1296', # Tubmorbuds_part1
elif part =='2':
    list_tbud = ['1324', '1329', '1331', '1333', '1335', '1341', '1349', '1359'] # Tumorbuds_part2
elif part == '3':
    list_tbud = ['1365', '1367', '1370', '1372', '1374', '1376', '1384', '1386'] # Tumorbuds_part3

for tbud_id in list_tbud:
    
    filename_xml = data_location + tbud_id + '_conv/' + tbud_id + "_edited_ar_gen.xml"#"_ar_mitPreview_final.xml"
    dir_imgs = data_location + tbud_id + '_conv/images/'
    
    tps, tps_img = annotation_parser(filename_xml)
    _, _, list_files = next(os.walk(dir_imgs))
    
    for file_inst in list_files:

        fname_img = file_inst.split('.')[0]
        col,row = fname_img.split('-')[-2],fname_img.split('-')[-1]
        target_img = None
        
        for tp_img in tps_img:
            
            if tp_img[2] == int(col) and tp_img[3] == int(row):
                target_img = tp_img
                break
                
        if target_img:
            bud_execute(dir_imgs + file_inst, tps, target_img, tbud_id, '', '')
        else:
            print('Target img tuple returns None!')
            
    print('Finished Tbud {}!'.format(tbud_id))

Finished Tbud 1365!
Finished Tbud 1367!
Finished Tbud 1370!
Finished Tbud 1372!
Finished Tbud 1374!
Finished Tbud 1376!
Finished Tbud 1384!
Finished Tbud 1386!
