In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import random
import json

from skimage import io, img_as_ubyte, img_as_float
from skimage.draw import random_shapes
from skimage.filters import gaussian
from skimage.color import rgb2gray
from skimage.transform import rotate

%load_ext autotime

In [2]:
def normalize_img_values(img):
    img = img - img.min()
    img = img / img.max()
    img = img_as_ubyte(img)
    return img

def check_normalized(img):
    try:
        assert img.shape == (256, 256, 3), 'Error in shape of a read image '+str(img.shape)
        assert img.dtype == np.uint8, 'Error in dtype of a read image '+str(img.dtype)
        assert img.max() == 255, 'Error in max value of a read image '+str(img.max())
        assert img.min() == 0, 'Error in min value of a read image '+str(img.min())
        
    except AssertionError:
        return normalize_img_values(img_as_float(img))
    
    return img

def check_normalized_patch(patch, patch_size=64):
    try:
        assert patch.shape == (patch_size, patch_size, 3), 'Error in shape of a read patch '+str(patch.shape)
        assert patch.dtype == np.uint8, 'Error in dtype of a read patch '+str(patch.dtype)
        assert patch.max() == 255, 'Error in max value of a read patch '+str(patch.max())
        assert patch.min() == 0, 'Error in min value of a read patch '+str(patch.min())
        
    except AssertionError:
        return normalize_img_values(img_as_float(patch))
    
    return patch

time: 4.04 ms


In [22]:
# CREATE A PATCHED DATASET FROM THE BARK DATASET

offset_area = 32
anomaly_diameter = 42
num_images = 500
image_size = 256
anomaly_size = image_size - 2*offset_area
dataset_path = '../Datasets/Bark-dataset-Test/'

files = os.listdir(dataset_path)
random.shuffle(files)

img_count = 0
selected_images = []
for filename in files:
    if '.jpg' not in filename: continue
    
    img = io.imread(dataset_path+filename)
    img = check_normalized(img)
    #print(img.shape, img.dtype, img.max(), img.min())
    selected_images.append(img)
    img_count += 1
    if img_count == num_images: break

time: 1.24 s


In [23]:
def get_external_crops(external_anomaly_num, rotate=False):
    for i in range(external_anomaly_num):
        candidates = list(range(i)) + list(range(i+1, num_images))
        anomaly_origin = selected_images[ random.choice(candidates) ]
        row, col = random.randint(0, image_size-anomaly_diameter), random.randint(0, image_size-anomaly_diameter)
        crop = anomaly_origin[row:row+anomaly_diameter, col:col+anomaly_diameter, :]
        crop = np.copy(crop)
        yield crop if not rotate else rotate(crop, random.randint(0, 90), resize=False, preserve_range=True, mode='symmetric')
        

def insert_anomaly(anomaly_area, anomaly):
    anom_rows, anom_cols, _ = anomaly.shape
    row, col = random.randint(0, anomaly_size-anom_rows), random.randint(0, anomaly_size-anom_cols)
    anomaly_area[row:row+anom_rows, col:col+anom_cols, :] = anomaly
    return row+int(anom_rows/2)+offset_area, col+int(anom_cols/2)+offset_area


def random_color_anomaly(anomaly_area, colour_range_noise=(0,0), sigma=4, alpha=0.2):
    mean_colours = int(round(np.mean(anomaly_area[:,:,0]))), int(round(np.mean(anomaly_area[:,:,1]))), int(round(np.mean(anomaly_area[:,:,2])))
    mean_ranges_colours = ( (max(mean_colours[0]-colour_range_noise[0], 0), min(mean_colours[0]+colour_range_noise[1], 255) ),
                           (max(mean_colours[1]-colour_range_noise[0], 0), min(mean_colours[1]+colour_range_noise[1], 255) ),
                           (max(mean_colours[2]-colour_range_noise[0], 0), min(mean_colours[2]+colour_range_noise[1], 255) ) )
    #print(mean_ranges_colours)
    
    anomaly_generation, labels = random_shapes(anomaly_area.shape, max_shapes=1, min_size=anomaly_diameter,
                                 max_size=anomaly_diameter, intensity_range=mean_ranges_colours)
    
    #print(anomaly_area.shape, anomaly_area.dtype, anomaly_area.max(), anomaly_area.min())
    #print(labels)
    
    anomaly_generation = gaussian(anomaly_generation, sigma=sigma, multichannel=True)
    anomaly_generation = anomaly_generation - anomaly_generation.min()
    anomaly_generation = anomaly_generation / anomaly_generation.max()
    anomaly_generation = anomaly_generation * 255
    anomaly_generation = anomaly_generation.astype(np.uint8)
    
    #plt.imshow(anomaly_generation)
    #plt.show()
    
    color_step = 15
    alpha_increment = 0.075
    for i in range(1,15):
        indices = np.where(anomaly_generation < 175-i*color_step)
        anomaly_area[indices] = alpha*anomaly_generation[indices] + (1-alpha)*anomaly_area[indices]
        alpha = min(alpha+alpha_increment, 0.99)
        
    rows, cols = labels[0][1]
    return int( (rows[1]+rows[0])/2 )+offset_area, int( (cols[1]+cols[0])/2 )+offset_area

time: 6.36 ms


In [24]:
assert len(selected_images) == num_images,'Selected images not equal to number of the desired ones'

external_anomaly_ratio = 0.75
external_anomaly_num = int(external_anomaly_ratio*num_images)
rotate_external_anomalies = False
colour_range_noise = (15, 0)
output_path = '../Datasets/Bark-1C3E-Quantitative/'

if not os.path.exists(output_path): os.mkdir(output_path)

centers = {}
external_crops = list(get_external_crops(external_anomaly_num, rotate=rotate_external_anomalies))
num_colour, num_external, count = 0, 0, 0
for i in range(num_images):
    img = selected_images[i]
    anomaly_area = img[offset_area:-offset_area,offset_area:-offset_area,:]
    
    if num_external < external_anomaly_num:
        # INSERT EXTERNAL CROP TO THE IMAGE
        anomaly = external_crops[i]
        row, col = insert_anomaly(anomaly_area, anomaly)
        
        io.imsave(output_path+'an_'+str(count)+'.jpg', img)
        num_external += 1
    else:
        # MODIFY A PATCH FROM THE IMAGE
        row, col = random_color_anomaly(anomaly_area, colour_range_noise, sigma=3)

        io.imsave(output_path+'an_'+str(count)+'.jpg', img)
        num_colour += 1
    
    centers[count] = (row, col)
    count += 1
    #plt.imshow(img)
    #plt.show()
    #break
    
with open('../Datasets/'+output_path.split('/')[-2]+'.json', 'w') as outfile:  
    json.dump(centers, outfile, sort_keys=True, indent=4)
    
print('External anomalies:', num_external)
print('Colour anomalies:', num_colour)

External anomalies: 375
Colour anomalies: 125
time: 3.43 s
