# Procedure for creating Datasets
The following must be runned before  run_training.ipyn. Data of DeepMoon project are exploited: masks for MRCNN are created from crater tables *.hdf5 files (available at: https://zenodo.org/record/1133969#.X4GfBpqxU08 ) 

Datasets are stocked in *.h5 and then imported in Google Drive, ready for training. 

### Initialization:
Importing libraries

In [None]:
import pandas as pd
import h5py
import numpy as np
import cv2
import tables
import matplotlib.pyplot as plt

Path to files:

In [None]:
zenodo_path = r'/media/sirbastiano/Volume E/LINUX/Zenodo'

test_images_path = zenodo_path + '/test_images.hdf5'
test_craters_path = zenodo_path + '/test_craters.hdf5'

dev_images_path = zenodo_path + '/dev_images.hdf5'
dev_craters_path = zenodo_path + '/dev_craters.hdf5'

train_images_path = zenodo_path + '/train_images.hdf5'
train_craters_path = zenodo_path + '/train_craters.hdf5'

Reading craters tables and database images

# Defining function for creating Masks

In [None]:
def create_mask(tab):
    global mask  # SCOPE 

    x_c = tab['x']
    y_c = tab['y']
    r_c = tab['Diameter (pix)']/2
    
    # Initialization:
    i=0
    mask = np.empty([256, 256, 1], dtype=int)    
    for raggio in r_c:              
        # draw mask shape
        new_mask = np.zeros((256,256,1), dtype=int)
        new_mask = np.ascontiguousarray(new_mask, dtype=np.uint8)
        center_coordinates = (int(x_c[i]), int(y_c[i]) )
        radius = int(raggio)
        color = 255
        thickness = 2
        new_mask = cv2.circle(new_mask, center_coordinates, radius, color, thickness)
        mask = np.dstack((mask,new_mask))
        i +=1
    
    return mask[:,:,1:]  # Pop out first element (empty)

In [None]:
%cd /media/sirbastiano/Volume E/LINUX/Zenodo
!ls

In [None]:
dataset = pd.HDFStore(train_craters_path, 'r')
train_images = h5py.File(train_images_path,'r')
imgs = list(dataset.keys())

In [None]:
# Print table:
d = dataset[imgs[1]]['Diameter (km)']
d = np.array(d)
arg = np.argmax(d)

In [None]:
print(arg)

In [None]:
dataset[imgs[1]]['Diameter (pix)'][117]

In [None]:
# Show image:
a = train_images['input_images'][1]
plt.imshow(a, cmap='gray', vmin=60, vmax=250)

In [None]:
mask = create_mask(dataset[imgs[1]])
mask.shape
iteratore = iter(list(range(100)))
x = next(iteratore)
plt.imshow(mask[:,:,x])

# Training dataset creation:

In [None]:
%cd /media/sirbastiano/Volume E/LINUX/DATASETS
!ls

In [None]:
train_dts = h5py.File('train_dts_full_thin2px.h5', 'w')
# GROUP1: Input Masks(targets)
tgt =  train_dts.create_group('input_targets')
# GROUP2: Input Images
images = train_dts.create_group('input_imgs')
print("DATASET TRAIN CREATED")

In [None]:
# Let's put mask and imgs in this train dataset.
count = 30000

for i in range(count):
    Mask = create_mask(dataset[imgs[i]])
    tgt.create_dataset(str(i),  data = Mask, chunks=True, compression='gzip')
    print(f"Mask n.{i+1} of {count} completed with shape: {Mask.shape}")
    
    images.create_dataset(str(i), data = train_images['input_images'][i], chunks=True, compression='gzip')
    print(f"Image n.{i+1} of {count} imported\n")
train_dts.close()

# Now we have to create dataset for validation

In [None]:
dataset = pd.HDFStore(dev_craters_path, 'r')
train_images = h5py.File(dev_images_path,'r')
imgs = list(dataset.keys())
# Print first table:
dataset[imgs[0]]

In [None]:
create_dts = h5py.File('validation_dts_full_thin2px.h5', 'w')
# GROUP1: Input Masks(targets)
tgt =  create_dts.create_group('input_targets')
# GROUP2: Input Images
images = create_dts.create_group('input_imgs')

In [None]:
# Let's put mask and imgs in this valid dataset.
count = 3000

for i in range(count):
    Mask = create_mask(dataset[imgs[i]])
    tgt.create_dataset(str(i),  data = Mask, chunks=True, compression='gzip')
    print(f"Mask n.{i+1} of {count} completed with shape: {Mask.shape}")
    
    images.create_dataset(str(i), data = train_images['input_images'][i], chunks=True, compression='gzip')
    print(f"Image n.{i+1} of {count} imported\n")
create_dts.close()

# Test Dataset creation


In [None]:
dataset = pd.HDFStore(test_craters_path, 'r')
train_images = h5py.File(test_images_path,'r')
imgs = list(dataset.keys())
# Print first table:
dataset[imgs[0]]

In [None]:
create_dts = h5py.File('test_dts_full_thin2px.h5', 'w')
# GROUP1: Input Masks(targets)
tgt =  create_dts.create_group('input_targets')
# GROUP2: Input Images
images = create_dts.create_group('input_imgs')

In [None]:
# Let's put mask and imgs in this test dataset.
count = 3000

for i in range(count):
    Mask = create_mask(dataset[imgs[i]])
    tgt.create_dataset(str(i),  data = Mask, chunks=True, compression='gzip')
    print(f"Mask n.{i+1} of {count} completed with shape: {Mask.shape}")
    
    images.create_dataset(str(i), data = train_images['input_images'][i], chunks=True, compression='gzip')
    print(f"Image n.{i+1} of {count} imported\n")
create_dts.close()

In [None]:
dir = r'/media/sirbastiano/Volume E/LINUX/DATASETS'
data = h5py.File(dir+'/test_dts_full_thin2px.h5', 'r')

In [None]:
mask = data['input_targets']['0']
plt.imshow(mask[:,:,8])

In [None]:
img = data['input_imgs']['0']
plt.imshow(img)