# Extract a subset of CFIS data

For now, the idea is to extract one CCD from all the catalogs.

In [1]:
%matplotlib notebook
import sys
import os
sys.path.append('../../')
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from mpl_toolkits.mplot3d import Axes3D
from astropy.io import fits
from ShapePipe.shapepipe.pipeline import file_io
import glob
import shutil

## Extract all the stars of one CCD

In [2]:
# Parameters
CCDn = 38


In [3]:
# Import the fits file
path = '/Users/tliaudat/Documents/PhD/codes/venv_p3/JB-data/other-JB-data/all_w3_star_cat/'
file_name = 'star_selection-2079613-0'
ext = '.fits'

# Extract all the files from the chose CCD
paths = glob.glob(path + 'star_selection-*-38.fits')


In [4]:
# Create the new folder
save_path = os.getcwd() + '/CFIS_CCDn_' + str(CCDn) + '/'

try:
    # If the directory already exists, don't copy
    os.mkdir('CFIS_CCDn_' + str(CCDn))
    
    # Move all the files to the new folder
    for it in range(len(paths)):
        shutil.copy(paths[it], save_path)
except:
    a=1
    


## Useful functions

In [51]:
# Define fits saving function

def save_fits(vignet, xwin, ywin, CCDn, save_path, example_fits_path):
    # Save data into the FITS format extension
    data = {'VIGNET': vignet, 'XWIN_IMAGE': xwin, 'YWIN_IMAGE': ywin}
    
    fits_file = file_io.FITSCatalog(save_path,\
        open_mode = file_io.BaseCatalog.OpenMode.ReadWrite, SEx_catalog=True)
    fits_file.save_as_fits(data, \
        sex_cat_path = example_fits_path)

## Generate a train and a test set

In [52]:
# Parameters
train_per = 0.8
CCDn = 38

In [53]:
# Create the new folders
train_folder = 'CFIS_CCDn_' + str(CCDn) + '-train'
test_folder = 'CFIS_CCDn_' + str(CCDn) + '-test'
save_path_train = os.getcwd() + '/' +  train_folder + '/'
save_path_test = os.getcwd() + '/' +  test_folder + '/'

try:
    # If the directory already exists, don't copy
    os.mkdir(train_folder)
    os.mkdir(test_folder)

except:
    a=1

In [54]:
for it in range(len(paths)):

    # Open file
    fits_file = fits.open(paths[it])
    starcat = fits_file[2].data

    # Calculate subsets ids
    star_nb = starcat['VIGNET'].shape[0]
    train_star_nb = np.floor(train_per*star_nb).astype(int)
    test_star_nb = star_nb - train_star_nb
    
    rand_seq = np.random.randn(star_nb).argsort()
    train_ids = rand_seq[0:train_star_nb]
    test_ids = rand_seq[train_star_nb:]
    
    # Training subset
    my_vignet = starcat['VIGNET'][train_ids,:,:]
    my_xwin = starcat['XWIN_IMAGE'][train_ids]
    my_ywin = starcat['YWIN_IMAGE'][train_ids]
    
    # Prepare paths and save
    example_fits_path = paths[it]
    save_path = save_path_train + paths[it].split("/")[-1]
    save_fits(my_vignet, my_xwin, my_ywin, CCDn, save_path, example_fits_path)
    
    
    # Testing subset
    my_vignet = starcat['VIGNET'][test_ids,:,:]
    my_xwin = starcat['XWIN_IMAGE'][test_ids]
    my_ywin = starcat['YWIN_IMAGE'][test_ids]
    
    # Prepare paths and save
    example_fits_path = paths[it]
    save_path = save_path_test + paths[it].split("/")[-1]
    save_fits(my_vignet, my_xwin, my_ywin, CCDn, save_path, example_fits_path)