# Database creation
This file is used to play with the data and once the objective is reached, functions will be written in .py files in order to be used by the model part of the project.

In [1]:
from astropy.io import fits
from tabulate import tabulate
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from dataset_creation_utils import *
# Retrieve the contrast curves from the database
path_db = 'C:/Users/ludin/Documents/Master Thesis/Dataset_creation/SPHERE_DC_DATA/'
foldername = '2MASS J01543773+0043005_DB_H23_2014-10-07_ird_specal_dc_cADI_softsorting_200967/'
filename = 'ird_specal_dc-IRD_SPECAL_CONTRAST_CURVE_TABLE-contrast_curve_tab.fits'

if not os.path.exists(path_db):
    print('ERROR! Folder {} does not exist.'.format(path_db)) # exit doesn't print anything in jupyter notebook

if not os.path.exists(os.path.join(path_db, foldername)):
    print('ERROR! Folder {} does not exist.'.format(foldername))

if not os.path.exists(os.path.join(path_db, foldername, filename)):
    print('ERROR! File {} does not exist.'.format(filename))

with fits.open(os.path.join(path_db, foldername, filename)) as hdul:
    data = hdul[1].data #numpy record
    print(data.dtype)
    print(hdul[1].header['DATE-OBS'])
    print(hdul[1].data['TARGET_NAME'])
    print(data['SEPARATION'][0])


(numpy.record, [('TARGET_NAME', 'S3'), ('LAM', 'S10'), ('PIXSCALE', 'S10'), ('NSIGMA', 'S3'), ('REPERTORY', 'S55'), ('SEPARATION', '>f4', (1148,)), ('NSIGMA_CONTRAST', '>f4', (1148,))])
2014-10-08T05:51:18.0343
['FS4' 'FS4' 'FS4']
[6.12500e-03 2.79787e-02 4.98323e-02 ... 2.50286e+01 2.50504e+01
 2.50723e+01]


In [2]:
df = get_df_with_headers(path_db, ['ESO OBS ID', 'DATE-OBS', 'OBJECT'])
df

Unnamed: 0,ESO OBS ID,DATE-OBS,OBJECT,SEPARATION,NSIGMA_CONTRAST
0,2177825,2018-09-15T07:33:22.60,CD-52 381,"[0.006105000153183937, 0.030416774099634947, 0...","[0.07546933740377426, 0.07713940528902535, 0.0..."
1,1182440,2016-04-04T07:59:57.8491,ScoPMS_048,"[0.006105000153183937, 0.030416774099634947, 0...","[0.024168867617845535, 0.013730244495663348, 0..."
2,1182371,2015-04-12T07:15:49.8435,HIP_76629,"[0.006105000153183937, 0.030416774099634947, 0...","[0.01883835159242153, 0.008468871857297717, 0...."
3,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.006105000153183937, 0.030416774099634947, 0...","[0.13829022645950317, 0.12529467963061497, 0.0..."
4,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.006105000153183937, 0.030416774099634947, 0...","[0.13829022645950317, 0.12529467963061497, 0.0..."
...,...,...,...,...,...
509,2296758,2019-03-20T02:23:21.23,TYC 7692-2943-2,"[0.006105000153183937, 0.030416774099634947, 0...","[0.021788790822029114, 0.01076616752617711, 0...."
510,2296758,2019-03-25T02:18:36.93,TYC 7692-2943-2,"[0.006105000153183937, 0.030416774099634947, 0...","[0.011126062832772732, 0.00704742663727633, 0...."
511,1424674,2016-10-02T08:50:23.1564,HIP 28153,"[0.006105000153183937, 0.030416774099634947, 0...","[0.030811656266450882, 0.0077246025149240675, ..."
512,2028801,2018-11-01T07:33:29.10,TYC 8097-337-1,"[0.006105000153183937, 0.030416774099634947, 0...","[0.011241959407925606, 0.006864568145209425, 0..."


In [3]:
# Print the types of the columns
print(df.dtypes)

ESO OBS ID          int64
DATE-OBS           object
OBJECT             object
SEPARATION         object
NSIGMA_CONTRAST    object
dtype: object


In [4]:
write_stats_in_file(df, path_db)

In [5]:
plot_contrast_curves(df, 'C:/Users/ludin/Documents/Master Thesis/Dataset_creation')

In [5]:
# Length are not the same from one observation to another !
# df['LENGTH NSIGMA_CONTRAST'] = df['SEPARATION'].apply(lambda x: len(x))
df['LENGTH SEPARATION'] = df['NSIGMA_CONTRAST'].apply(lambda x: len(x))
df['MIN SEPARATION'] = df['SEPARATION'].apply(lambda x: np.min(x))
df['MAX SEPARATION'] = df['SEPARATION'].apply(lambda x: np.max(x))
df

Unnamed: 0,ESO OBS ID,DATE-OBS,OBJECT,SEPARATION,NSIGMA_CONTRAST,LENGTH SEPARATION,MIN SEPARATION,MAX SEPARATION
0,2177825,2018-09-15T07:33:22.60,CD-52 381,"[0.006105000153183937, 0.030416774099634947, 0...","[0.07546933740377426, 0.07713940528902535, 0.0...",1033,0.006105,25.095856
1,1182440,2016-04-04T07:59:57.8491,ScoPMS_048,"[0.006105000153183937, 0.030416774099634947, 0...","[0.024168867617845535, 0.013730244495663348, 0...",1033,0.006105,25.095856
2,1182371,2015-04-12T07:15:49.8435,HIP_76629,"[0.006105000153183937, 0.030416774099634947, 0...","[0.01883835159242153, 0.008468871857297717, 0....",1033,0.006105,25.095856
3,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.006105000153183937, 0.030416774099634947, 0...","[0.13829022645950317, 0.12529467963061497, 0.0...",1033,0.006105,25.095856
4,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.006105000153183937, 0.030416774099634947, 0...","[0.13829022645950317, 0.12529467963061497, 0.0...",1033,0.006105,25.095856
...,...,...,...,...,...,...,...,...
509,2296758,2019-03-20T02:23:21.23,TYC 7692-2943-2,"[0.006105000153183937, 0.030416774099634947, 0...","[0.021788790822029114, 0.01076616752617711, 0....",1033,0.006105,25.095856
510,2296758,2019-03-25T02:18:36.93,TYC 7692-2943-2,"[0.006105000153183937, 0.030416774099634947, 0...","[0.011126062832772732, 0.00704742663727633, 0....",1033,0.006105,25.095856
511,1424674,2016-10-02T08:50:23.1564,HIP 28153,"[0.006105000153183937, 0.030416774099634947, 0...","[0.030811656266450882, 0.0077246025149240675, ...",1033,0.006105,25.095856
512,2028801,2018-11-01T07:33:29.10,TYC 8097-337-1,"[0.006105000153183937, 0.030416774099634947, 0...","[0.011241959407925606, 0.006864568145209425, 0...",1033,0.006105,25.095856
