In [1]:
import tensorflow as tf

import os
import cv2
import numpy as np
import shutil
import random
import pandas as pd

from matplotlib import pyplot as plt
from IPython import display

In [2]:
# Specifiy directories and files
DIR_SRC = "cil-cosmology-2020/labeled"
DIR_BIN = "cil-cosmology-2020/binary"
DIR_TAR = "cil-cosmology-2020/new"
label_csv = pd.read_csv("cil-cosmology-2020/labeled.csv")

In [None]:
# Set constants
IMAGE_FORMAT = ".png"
BINARY_THRESHOLD = 127
BINARY_MAX = 255

In [3]:
def create_dir(dir_path):
    if os.path.isdir(dir_path):
        shutil.rmtree(dir_path)
    os.mkdir(dir_path)

In [4]:
def to_binary(img_path, threshold=127):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    ret,thresh1 = cv2.threshold(img,BINARY_THRESHOLD,BINARY_MAX,cv2.THRESH_BINARY)
    return thresh1

In [5]:
def store_img(img, path_tar):
    cv2.imwrite(path_tar,img)

In [6]:
def store_transform(dir_src, dir_tar, img_ids, transform):
    for img_id in img_ids:
        path_src = os.path.join(dir_src, str(img_id)+IMAGE_FORMAT)
        path_tar = os.path.join(dir_tar, str(img_id)+IMAGE_FORMAT)
        
        img = transform(path_src)
        
        store_img(img, path_tar)

In [7]:
def hough_transform(img_path):
    img_name = os.path.basename(img_path)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    height, width = img.shape
    cimg = np.zeros((height, width), np.uint8)
    
    circles = cv2.HoughCircles(img,cv2.HOUGH_GRADIENT,1,10,
                            param1=30,param2=5,minRadius=1,maxRadius=20)
    
    if not (circles is None):
        circles = np.uint16(np.around(circles))
    
    return circles

In [8]:
def get_features(dir_src, img_ids):
    circle_list = []
    for img_id in img_ids:
        path_src = os.path.join(dir_src, str(img_id)+".png")
        
        circles = hough_transform(path_src)
        circle_list.append(circles)
    return circle_list

In [9]:
def round_to_grid(array, gridsize=10, max_vals = [99, 99]):
    array = np.floor(array/gridsize)
    array = np.minimum(array, max_vals)
    return array

In [10]:
def sample_features():
    mask = np.random.rand(100, 100, 20)
    coordinates_t = np.where(mask<=P)
    coordinates = np.hstack(coordinates_t)
    n_circles = int(coordinates.shape[0]/3)

    circles = np.zeros((n_circles, 3))
    offset_x = random.randint(0, 10)
    offset_y = random.randint(0, 10)
    for i in range(n_circles):
        circles[i] = [coordinates[0*n_circles+i]*10+offset_x, coordinates[1*n_circles+i]*10+offset_y, coordinates[2*n_circles+i]]
    circles = circles.astype(int)
    return circles

In [11]:
def generate_galaxies(n_galaxies, height=1000, width=1000):
    galaxies = []
    for i in range(n_galaxies):
        circles = sample_features()
        cimg = np.zeros((height, width), np.uint8)        
        
        for c in circles:
            cv2.circle(cimg, (c[0], c[1]), c[2], 255, -1)
        galaxies.append(cimg)
    return galaxies

In [12]:
create_dir(DIR_BIN)
create_dir(DIR_TAR)

In [13]:
# find galaxy images
is_galaxy = label_csv["Actual"]==1.0
galaxy_ids = label_csv[is_galaxy].Id.values[1:]

In [15]:
# preprocess images to binary
store_transform(DIR_SRC, DIR_BIN, galaxy_ids, to_binary)

error: OpenCV(4.2.0) /Users/travis/build/skvark/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp:715: error: (-215:Assertion failed) !_img.empty() in function 'imwrite'


In [None]:
# extract features (coordinates and radius) from binary images
circle_list = get_features(DIR_BIN, galaxy_ids)

In [None]:
# store features in csv
n_images = len(circle_list)
features = np.asarray(["image", "x", "y", "r"])

for i, circles in enumerate(circle_list):
    if not circles is None:
        f = circles[0]
        n_circles = f.shape[0]
        indices = np.ones((n_circles, 1))
        indices *= i
        f = np.hstack((indices, f))
        features = np.vstack((features, f))

features = pd.DataFrame(data = features[1:], columns = features[0])
features.to_csv("cil-cosmology-2020/circle_features.csv", index=False)

In [None]:
# limit radius
r_min = 1
r_max = 20

# setup grid
cluster_width = 10
cluster_height = 10
test = circle_list[0]
floored = np.floor(test/10)

In [None]:
# compute probability distribution
P = np.zeros((100, 100, r_max))
n_imgs = len(galaxy_ids)
for i, img in enumerate(circle_list):
    if img is None:
        continue
    for circle in img[0]:
        index = round_to_grid(circle[:2]).reshape(2,).astype(int)
        index = np.hstack((index, circle[2]))
        index = tuple(index)
        P[index] += 1/n_imgs

In [None]:
# generate new cosmology images
images = generate_galaxies(500)
plt.figure(figsize=(15, 20))
n_images = len(images)
ctr = 0
for i, img in enumerate(images):
    ctr += 1
    image = np.asarray(img)
    image = image.reshape((1000, 1000))
    #plt.subplot(4, 3, i+1)
    #plt.imshow(image)
    #plt.axis("Off")
    image_path = os.path.join(DIR_TAR, str(ctr)+".png")
    store_img(image, image_path)