# Database creation
This file is used to play with the data and once the objective is reached, functions will be written in .py files in order to be used by the model part of the project.

In [64]:
from astropy.io import fits
from tabulate import tabulate
import pandas as pd
# Retrieve the contrast curves from the database
path_db = 'C:/Users/ludin/Documents/Master Thesis/DB_creation/SPHERE_DC_DATA/'
foldername = '2MASS J01543773+0043005_DB_H23_2014-10-07_ird_specal_dc_cADI_softsorting_200967/'
filename = 'ird_specal_dc-IRD_SPECAL_CONTRAST_CURVE_TABLE-contrast_curve_tab.fits'

if not os.path.exists(path_db):
    print('ERROR! Folder {} does not exist.'.format(path_db)) # exit doesn't print anything in jupyter notebook

if not os.path.exists(os.path.join(path_db, foldername)):
    print('ERROR! Folder {} does not exist.'.format(foldername))

if not os.path.exists(os.path.join(path_db, foldername, filename)):
    print('ERROR! File {} does not exist.'.format(filename))

with fits.open(os.path.join(path_db, foldername, filename)) as hdul:
    data = hdul[1].data #numpy record
    print(data.dtype)
    print(hdul[1].header['DATE-OBS'])
    print(hdul[1].data['TARGET_NAME'])


(numpy.record, [('TARGET_NAME', 'S3'), ('LAM', 'S10'), ('PIXSCALE', 'S10'), ('NSIGMA', 'S3'), ('REPERTORY', 'S55'), ('SEPARATION', '>f4', (1148,)), ('NSIGMA_CONTRAST', '>f4', (1148,))])
2014-10-08T05:51:18.0343
['FS4' 'FS4' 'FS4']


In [72]:
def write_headers_in_file(fits_headers, file_path, filename='headers.txt', table_format='psql'):
    """
    Write fits headers in a file.
    """

    if not os.path.exists(path):
        exit('ERROR! Folder {} does not exist.'.format(path))
    
    if not file_path[-1] == '/':
        file_path += '/'

    table = []

    with open(os.path.join(file_path, filename), 'w') as f:
        for card in fits_headers.cards:
            table.append([card.keyword, card.value, card.comment])
            
        f.write(tabulate(table, headers=['Keyword', 'Value', 'Comment'], tablefmt=table_format))

In [90]:
def get_folder_names(path):
    """
    Get a list of the folders names located in a given path.
    """
    folder_names = []
    for folder in os.listdir(path):
        if os.path.isdir(os.path.join(path, folder)):
            folder_names.append(folder)
    return folder_names

In [96]:
def get_df_with_headers(path, header_list=[]):
    """
    Get a dataframe with the separation, contrast and headers specified in the list.
    """

    if not os.path.exists(path):
        exit('ERROR! Folder {} does not exist.'.format(path))

    folder_names = get_folder_names(path)

    data_dict_list = []

    for folder in folder_names:
        folder = folder + '/'

        with fits.open(os.path.join(path, folder, filename)) as hdul:

            fits_data = hdul[1].data
            fits_headers = hdul[1].header
            data_dict = {}

            for header in header_list:
                data_dict[header] = fits_headers[header]
            
            data_dict['SEPARATION'] = fits_data['SEPARATION']
            data_dict['NSIGMA_CONTRAST'] = fits_data['NSIGMA_CONTRAST']

            data_dict_list.append(data_dict)

    return pd.DataFrame(data_dict_list)
            

In [95]:
df = get_df_with_headers(path_db, ['ESO OBS ID', 'DATE-OBS', 'OBJECT'])