In [1]:
# add +/- randomly on AE train/val/test images
# from mindiam7_pad25_jit20 generate mindiam7_pad25_jit20_trainonlybias

# note that the index system for numpy and PIL.Image are different
# npy[row,col]
# PIL.Image[col,row]

In [8]:
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
from matplotlib import pyplot as plt
import random
import pandas as pd
from tqdm.notebook import tqdm

import sys
sys.path.append('../../causal_align/utils/')
from myutil import str2box

#### helper functions

In [9]:
def draw(img,box):
    ymin,xmin,ymax,xmax = box
    img[xmin,ymin]=1
    img[xmax,ymax]=1
    img[xmax,ymin]=1
    img[xmin,ymax]=1
    
def get_iou(boxA, boxB):
    '''
    IoU of two boxes
    '''
    boxA = [int(x) for x in boxA]
    boxB = [int(x) for x in boxB]

    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)

    return iou

def cross_mark(npy,coord,symbol,size=4,thickness=1,fill_value=1):
    '''
    add a +/- mark at coord; size controls the size of the mark
    '''
    x,y = coord
    npy[x-thickness+1:x+thickness,y-size+1:y+size] = fill_value
    if symbol == '+':
        npy[x-size+1:x+size,y-thickness+1:y+thickness] = fill_value

#### At random corners (avoid overlap with the nodule)

In [10]:
source = '/data/liumingzhou/CounterAlign_output/preprocess/multiple_slices/mindiam7_pad25_jit20'
target = '/data/liumingzhou/CounterAlign_output/preprocess/multiple_slices/mindiam7_pad25_jit20_trainonlybias'

In [11]:
folders = ['train','val','test']
anno = pd.read_csv(os.path.join(source,'annos.csv')).set_index('filename')

os.makedirs(target,exist_ok=True)
anno.to_csv(os.path.join(target,'annos.csv'))

In [12]:
pad = 5
size = 5
thickness = 2 

for folder in folders:
    filenames = os.listdir(os.path.join(source,folder))
    os.makedirs(os.path.join(target,folder),exist_ok=True)
    for filename in tqdm(filenames):
        npy = np.load(os.path.join(source,folder,filename))
        malign = anno.loc[filename.split('.')[0]]['malignancy']
        ymin,xmin,ymax,xmax = str2box(anno.loc[filename.split('.')[0]]['box'])
        nod_box = [max(0,ymin-pad),max(0,xmin-pad),min(ymax+pad,npy.shape[1]-1),min(xmax+pad,npy.shape[0]-1)]
        
        # set the symbol according to label
        if folder=='train':
            symbol = '+' if malign==1 else '-'
        else:
            symbol = '-' if malign==1 else '+'
        
        # choose the best corner
        positions = [(10,10), (10,npy.shape[0]-10), (npy.shape[1]-10,10),(npy.shape[1]-10,npy.shape[0]-10)]
        densities = [npy[pos[1],pos[0]] for pos in positions]
        
        for position,density in zip(positions,densities):
            sym_x, sym_y = position[1], position[0]
            sym_box = [sym_y-size+1,sym_x-size+1,sym_y+size,sym_x+size]
            iou = get_iou(sym_box,nod_box)
            if iou == 0:
                break
        assert iou==0, 'Fail to find property way to add the +/- symbol.'
        
        cross_mark(npy,(sym_x,sym_y),thickness=thickness,symbol=symbol,size=size)
        np.save(os.path.join(target,folder,filename),npy)
        
        #plt.figure(figsize=(3,3))
        #plt.imshow(npy,cmap='gray')
        #plt.title(str(malign))
        
        #plt.axvline(ymin,color='red')
        #plt.axvline(ymax,color='red')
        #plt.axhline(xmin,color='red')
        #plt.axhline(xmax,color='red')

  0%|          | 0/17442 [00:00<?, ?it/s]

  0%|          | 0/5646 [00:00<?, ?it/s]

  0%|          | 0/6380 [00:00<?, ?it/s]

#### At fixed corners

#### Random positions
We use coordinate system of numpy, with array[x,y]

#### Random position for single slice