# Setup

In [3]:
import os
import cv2
import numpy as np
import shutil
import random
import pandas as pd
import time

from matplotlib import pyplot as plt
from IPython import display

In [4]:
start_time = time.time()

In [2]:
# Specifiy directories and files
DIR_SRC = "cil-cosmology-2020/labeled"
DIR_BIN = "cil-cosmology-2020/binary"
DIR_TAR = "cil-cosmology-2020/results"
label_csv = pd.read_csv("cil-cosmology-2020/labeled.csv")

In [1]:
# Set constants
IMAGE_FORMAT = ".png"
BINARY_THRESHOLD = 127
MAX_INTENSITY = 255

HOUGH_PARAM_1 = 1
HOUGH_PARAM_2 = 10
HOUGH_PARAM_3 = 30
HOUGH_PARAM_4 = 5
MIN_RADIUS = 1
MAX_RADIUS = 20

IMAGE_SIZE = 1000
GRIDSIZE = 10
N_TILES = int(IMAGE_SIZE/GRIDSIZE)

N_IMAGES_TO_GENERATE = 500

In [4]:
def create_dir(dir_path):
    if os.path.isdir(dir_path):
        shutil.rmtree(dir_path)
    os.mkdir(dir_path)

In [5]:
create_dir(DIR_BIN)
create_dir(DIR_TAR)

# Preprocessing

In [6]:
def to_binary(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    ret,thresh1 = cv2.threshold(img,BINARY_THRESHOLD,MAX_INTENSITY,cv2.THRESH_BINARY)
    return thresh1

In [7]:
def store_img(img, path_tar):
    cv2.imwrite(path_tar,img)

In [8]:
def hough_transform(img_path):
    img_name = os.path.basename(img_path)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    height, width = img.shape
    cimg = np.zeros((height, width), np.uint8)
    
    circles = cv2.HoughCircles(img,cv2.HOUGH_GRADIENT,HOUGH_PARAM_1,HOUGH_PARAM_2,
                            param1=HOUGH_PARAM_3,param2=HOUGH_PARAM_4,minRadius=MIN_RADIUS,maxRadius=MAX_RADIUS)
    
    if not (circles is None):
        circles = np.uint16(np.around(circles))
    
    return circles

In [9]:
def store_transform(dir_src, dir_tar, img_ids, transform):
    for img_id in img_ids:
        path_src = os.path.join(dir_src, str(img_id)+IMAGE_FORMAT)
        path_tar = os.path.join(dir_tar, str(img_id)+IMAGE_FORMAT)
        
        img = transform(path_src)
        
        store_img(img, path_tar)

In [10]:
def get_features(dir_src, img_ids):
    circle_list = []
    for img_id in img_ids:
        path_src = os.path.join(dir_src, str(img_id)+IMAGE_FORMAT)
        
        circles = hough_transform(path_src)
        circle_list.append(circles)
    return circle_list

In [11]:
# find galaxy images
is_galaxy = label_csv["Actual"]==1.0
galaxy_ids = label_csv[is_galaxy].Id.values[1:]

In [12]:
# preprocess images to binary
store_transform(DIR_SRC, DIR_BIN, galaxy_ids, to_binary)

In [13]:
# extract features (coordinates and radius) from binary images
circle_list = get_features(DIR_BIN, galaxy_ids)

# Training

In [14]:
def round_to_grid(array, max_vals = [99, 99]):
    array = np.floor(array/GRIDSIZE)
    array = np.minimum(array, max_vals)
    return array

In [15]:
# store features in csv
n_images = len(circle_list)
features = np.asarray(["image", "x", "y", "r"])

for i, circles in enumerate(circle_list):
    if not circles is None:
        f = circles[0]
        n_circles = f.shape[0]
        indices = np.ones((n_circles, 1))
        indices *= i
        f = np.hstack((indices, f))
        features = np.vstack((features, f))

features = pd.DataFrame(data = features[1:], columns = features[0])
features.to_csv("cil-cosmology-2020/circle_features.csv", index=False)

In [16]:
# compute probability distribution
P = np.zeros((N_TILES, N_TILES, MAX_RADIUS))
n_imgs = len(galaxy_ids)
for i, img in enumerate(circle_list):
    if img is None:
        continue
    for circle in img[0]:
        index = round_to_grid(circle[:2]).reshape(2,).astype(int)
        index = np.hstack((index, circle[2]))
        index = tuple(index)
        P[index] += 1/n_imgs

# Image generation

In [17]:
def sample_features():
    mask = np.random.rand(N_TILES, N_TILES, MAX_RADIUS)
    coordinates_t = np.where(mask<=P)
    coordinates = np.hstack(coordinates_t)
    n_circles = int(coordinates.shape[0]/3)

    circles = np.zeros((n_circles, 3))
    offset_x = random.randint(0, GRIDSIZE-1)
    offset_y = random.randint(0, GRIDSIZE-1)
    for i in range(n_circles):
        circles[i] = [coordinates[0*n_circles+i]*GRIDSIZE+offset_x, coordinates[1*n_circles+i]*GRIDSIZE+offset_y, coordinates[2*n_circles+i]]
    circles = circles.astype(int)
    return circles

In [18]:
def generate_galaxies(n_galaxies):
    galaxies = []
    for i in range(n_galaxies):
        circles = sample_features()
        cimg = np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.uint8)        
        
        for c in circles:
            cv2.circle(cimg, (c[0], c[1]), c[2], MAX_INTENSITY, -1)
        galaxies.append(cimg)
    return galaxies

In [19]:
# generate new cosmology images
images = generate_galaxies(N_IMAGES_TO_GENERATE)
plt.figure(figsize=(15, 20))
n_images = len(images)
ctr = 0
for i, img in enumerate(images):
    ctr += 1
    image = np.asarray(img)
    image = image.reshape((IMAGE_SIZE, IMAGE_SIZE))
    #plt.subplot(4, 3, i+1)
    #plt.imshow(image)
    #plt.axis("Off")
    image_path = os.path.join(DIR_TAR, str(ctr)+IMAGE_FORMAT)
    store_img(image, image_path)

<Figure size 1080x1440 with 0 Axes>

In [2]:
elapsed_time = time.time() - start_time
print('Elapsed: %s' % (elapsed_time))

NameError: name 'time' is not defined