## ML Dataset Generation Pipeline ##


Run this entire notebook after Stellarium Image Generation

In [7]:
# Imports
import sys
import os
# sys.path.append(os.path.join(os.getcwd(), '..'))
import algorithms.centroid as ct
import scripts.synthetic as syn
# import algorithms.centroid_test as ctt
import scripts.dataset_feature_extraction as ft
from PIL import Image
import csv
import random
import shutil


### Hyperperameters and Settings ##

In [8]:
# File Paths
synthetic_image_set_dir = "images_data/mag5_1608_47deg_gray/"                               # <-- Configure together
file_descript = "mag5_1608_47deg"     # Magnitude of stars, num classes, camera fov         # <-- Configure together

# Camera Model
sensor_x = 502
sensor_y = 752

# Synthetic Error Model Parameters
false_stars = 10
positional_sigma_all_stars = 0.01
positional_sigma_single_star = 0.0001
synthetic_cases = 40           # How much noisy training data to generate per true star

# Feature Extraction Method
feature_extract_method = "binning" # "binning" or "angle_proximity"

### Centroid Extraction ###

In [9]:
# Keep unexpanded in jupityer. Function definition
def centroid_sources_to_dataset(file_path):
    if not os.path.exists(file_path + "/centroids"):
        os.makedirs(file_path + "/centroids")

    # Open folder and read all images
    for file in os.listdir(file_path):
        if file.endswith(".png"):
            img = Image.open(file_path + file)
            # print(file_path + file)
            # Calculate centroids for each image
            sources = ct.centroids_from_img(img)

            # Initialize empty csv file
            csv_file = open(file_path + "/centroids/" + file.strip('.png') + '_centroids_no_adverserial.csv', 'w', newline='')
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(['x_centroid', 'y_centroid'])
            # Append centroids to a dataset
            for source in sources:
                csv_writer.writerow([source['xcentroid'], source['ycentroid']])
            # Save dataset to a file
            csv_file.close()
    return 

In [10]:
centroid_sources_to_dataset(synthetic_image_set_dir) # Creates a subdirectory to the synthetic image set directory called "centroids"


### Synthetic Error Model ###

In [11]:
# Copies centroids to synthetic folder
if not os.path.exists(synthetic_image_set_dir + 'centroids_synthetic/'):
    os.makedirs(synthetic_image_set_dir + 'centroids_synthetic/')
else: 
    shutil.rmtree(synthetic_image_set_dir + 'centroids_synthetic/')
    os.makedirs(synthetic_image_set_dir + 'centroids_synthetic/')

for file in os.listdir(synthetic_image_set_dir + '/centroids/'):
    shutil.copy(synthetic_image_set_dir + 'centroids/' + file, synthetic_image_set_dir + 'centroids_synthetic/') 

syn.apply_adverserial_false_stars(synthetic_image_set_dir + 'centroids_synthetic/', sensor_x, sensor_y, synthetic_cases)
syn.apply_adverserial_positional_error(synthetic_image_set_dir + 'centroids_synthetic/', sensor_x, sensor_y, positional_sigma_all_stars, positional_sigma_single_star)

### Rotationally Invariant Feature Extraction ###

In [12]:
if feature_extract_method == "binning":
    # TODO: fix binning_feature_extraction to not hardcode bin sizes and also expose window sizes and bin sizes
    ft.binning_feature_extraction(synthetic_image_set_dir + '/centroids_synthetic/', synthetic_image_set_dir + '/bins/', file_descript + '_bin_features.csv') # Create single csv with features and labels
elif feature_extract_method == "angle_proximity":
    pass

print("Dataset Generation Complete, Filepath for model generation: \n", "../" + synthetic_image_set_dir + 'bins/' + file_descript + '_bin_features.csv')

Completed!
Dataset Generation Complete, Filepath for model generation: 
 ../images_data/mag5_1608_47deg_gray/bins/mag5_1608_47deg_bin_features.csv
