# Database creation
This file is used to play with the data and once the objective is reached, functions will be written in .py files in order to be used by the model part of the project.

In [1]:
from astropy.io import fits
from tabulate import tabulate
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from dataset_creation_utils import *
# Retrieve the contrast curves from the database
path_db = 'C:/Users/ludin/Documents/Master Thesis/Dataset_creation/SPHERE_DC_DATA/'
foldername = '2MASS J01543773+0043005_DB_H23_2014-10-07_ird_specal_dc_cADI_softsorting_200967/'
filename = 'ird_specal_dc-IRD_SPECAL_CONTRAST_CURVE_TABLE-contrast_curve_tab.fits'

if not os.path.exists(path_db):
    print('ERROR! Folder {} does not exist.'.format(path_db)) # exit doesn't print anything in jupyter notebook

if not os.path.exists(os.path.join(path_db, foldername)):
    print('ERROR! Folder {} does not exist.'.format(foldername))

if not os.path.exists(os.path.join(path_db, foldername, filename)):
    print('ERROR! File {} does not exist.'.format(filename))

with fits.open(os.path.join(path_db, foldername, filename)) as hdul:
    data = hdul[1].data #numpy record
    print(data.dtype)
    print(hdul[1].header['DATE-OBS'])
    print(hdul[1].data['TARGET_NAME'])
    print(data['SEPARATION'][0])


(numpy.record, [('TARGET_NAME', 'S3'), ('LAM', 'S10'), ('PIXSCALE', 'S10'), ('NSIGMA', 'S3'), ('REPERTORY', 'S55'), ('SEPARATION', '>f4', (1148,)), ('NSIGMA_CONTRAST', '>f4', (1148,))])
2014-10-08T05:51:18.0343
['FS4' 'FS4' 'FS4']
[6.12500e-03 2.79787e-02 4.98323e-02 ... 2.50286e+01 2.50504e+01
 2.50723e+01]


In [8]:
df = get_df_with_headers(path_db, ['ESO OBS ID', 'DATE-OBS', 'OBJECT'])
df

Unnamed: 0,ESO OBS ID,DATE-OBS,OBJECT,SEPARATION,NSIGMA_CONTRAST
0,2177825,2018-09-15T07:33:22.60,CD-52 381,"[0.0061185, 0.029195273, 0.052272048, 0.075348...","[0.07546934, 0.081046954, 0.0072249672, 0.0053..."
1,1182440,2016-04-04T07:59:57.8491,ScoPMS_048,"[0.01225, 0.037514854, 0.06277971, 0.08804456,...","[0.024168868, 0.00965169, 0.002824313, 0.00203..."
2,1182371,2015-04-12T07:15:49.8435,HIP_76629,"[0.01225, 0.038996268, 0.06574254, 0.0924888, ...","[0.018838352, 0.003571751, 0.0013111836, 0.000..."
3,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.024482, 0.080440864, 0.13639972, 0.19235858...","[0.13829023, 0.015755469, 0.005646824, 0.00268..."
4,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.024482, 0.080440864, 0.13639972, 0.19235858...","[0.13829023, 0.015755469, 0.005646824, 0.00268..."
...,...,...,...,...,...
509,2296758,2019-03-20T02:23:21.23,TYC 7692-2943-2,"[0.012247, 0.04235727, 0.07246754, 0.1025778, ...","[0.02178879, 0.003522512, 0.00074191904, 0.000..."
510,2296758,2019-03-25T02:18:36.93,TYC 7692-2943-2,"[0.012247, 0.041378073, 0.07050915, 0.09964023...","[0.011126063, 0.0045869034, 0.00084505096, 0.0..."
511,1424674,2016-10-02T08:50:23.1564,HIP 28153,"[0.006125, 0.030178692, 0.054232385, 0.0782860...","[0.030811656, 0.0077921706, 0.00096568564, 0.0..."
512,2028801,2018-11-01T07:33:29.10,TYC 8097-337-1,"[0.01223, 0.03781431, 0.06339863, 0.08898293, ...","[0.011241959, 0.005084048, 0.0022800567, 0.001..."


In [3]:
# Print the types of the columns
print(df.dtypes)

ESO OBS ID          int64
DATE-OBS           object
OBJECT             object
SEPARATION         object
NSIGMA_CONTRAST    object
dtype: object


In [4]:
write_stats_in_file(df, path_db)

In [5]:
plot_contrast_curves(df, 'C:/Users/ludin/Documents/Master Thesis/Dataset_creation')

In [9]:
# Length are not the same from one observation to another !
df['LENGTH NSIGMA_CONTRAST'] = df['SEPARATION'].apply(lambda x: len(x))
df['LENGTH SEPARATION'] = df['NSIGMA_CONTRAST'].apply(lambda x: len(x))
df['MIN SEPARATION'] = df['SEPARATION'].apply(lambda x: np.min(x))
df['MAX SEPARATION'] = df['SEPARATION'].apply(lambda x: np.max(x))
df

Unnamed: 0,ESO OBS ID,DATE-OBS,OBJECT,SEPARATION,NSIGMA_CONTRAST,LENGTH NSIGMA_CONTRAST,LENGTH SEPARATION,MIN SEPARATION,MAX SEPARATION
0,2177825,2018-09-15T07:33:22.60,CD-52 381,"[0.0061185, 0.029195273, 0.052272048, 0.075348...","[0.07546934, 0.081046954, 0.0072249672, 0.0053...",1086,1086,0.006119,25.044418
1,1182440,2016-04-04T07:59:57.8491,ScoPMS_048,"[0.01225, 0.037514854, 0.06277971, 0.08804456,...","[0.024168868, 0.00965169, 0.002824313, 0.00203...",993,993,0.012250,25.074986
2,1182371,2015-04-12T07:15:49.8435,HIP_76629,"[0.01225, 0.038996268, 0.06574254, 0.0924888, ...","[0.018838352, 0.003571751, 0.0013111836, 0.000...",938,938,0.012250,25.073503
3,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.024482, 0.080440864, 0.13639972, 0.19235858...","[0.13829023, 0.015755469, 0.005646824, 0.00268...",448,448,0.024482,25.038092
4,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.024482, 0.080440864, 0.13639972, 0.19235858...","[0.13829023, 0.015755469, 0.005646824, 0.00268...",448,448,0.024482,25.038092
...,...,...,...,...,...,...,...,...,...
509,2296758,2019-03-20T02:23:21.23,TYC 7692-2943-2,"[0.012247, 0.04235727, 0.07246754, 0.1025778, ...","[0.02178879, 0.003522512, 0.00074191904, 0.000...",833,833,0.012247,25.063992
510,2296758,2019-03-25T02:18:36.93,TYC 7692-2943-2,"[0.012247, 0.041378073, 0.07050915, 0.09964023...","[0.011126063, 0.0045869034, 0.00084505096, 0.0...",861,861,0.012247,25.064972
511,1424674,2016-10-02T08:50:23.1564,HIP 28153,"[0.006125, 0.030178692, 0.054232385, 0.0782860...","[0.030811656, 0.0077921706, 0.00096568564, 0.0...",1043,1043,0.006125,25.070070
512,2028801,2018-11-01T07:33:29.10,TYC 8097-337-1,"[0.01223, 0.03781431, 0.06339863, 0.08898293, ...","[0.011241959, 0.005084048, 0.0022800567, 0.001...",979,979,0.012230,25.033688


In [11]:
# We want to interpolate the contrast curves to have the same number of points for each observation.
# We will use the min and max separation of the observations to define the range of the interpolation.
# We will use the number of points of the observation with the most points.

# Get the number of points of the observation with the most points.
max_points = np.max(df['LENGTH NSIGMA_CONTRAST'])

# Get the min and max separation of all the observations.
min_sep = np.min(df['MIN SEPARATION'])
max_sep = np.max(df['MAX SEPARATION'])

# Create the new separation array
new_sep = np.linspace(min_sep, max_sep, max_points)

# Interpolate the contrast curves
df['NSIGMA_CONTRAST INTERPOLATED'] = df.apply(lambda x: np.interp(new_sep, x['SEPARATION'], x['NSIGMA_CONTRAST']), axis=1) # TO BE TESTED ON SMALL EXAMPLES !

Unnamed: 0,ESO OBS ID,DATE-OBS,OBJECT,SEPARATION,NSIGMA_CONTRAST,LENGTH NSIGMA_CONTRAST,LENGTH SEPARATION,MIN SEPARATION,MAX SEPARATION,NSIGMA_CONTRAST INTERPOLATED
0,2177825,2018-09-15T07:33:22.60,CD-52 381,"[0.0061185, 0.029195273, 0.052272048, 0.075348...","[0.07546934, 0.081046954, 0.0072249672, 0.0053...",1086,1086,0.006119,25.044418,"[0.07546933740377426, 0.07892734708761207, 0.0..."
1,1182440,2016-04-04T07:59:57.8491,ScoPMS_048,"[0.01225, 0.037514854, 0.06277971, 0.08804456,...","[0.024168868, 0.00965169, 0.002824313, 0.00203...",993,993,0.012250,25.074986,"[0.024168867617845535, 0.019471150524439013, 0..."
2,1182371,2015-04-12T07:15:49.8435,HIP_76629,"[0.01225, 0.038996268, 0.06574254, 0.0924888, ...","[0.018838352, 0.003571751, 0.0013111836, 0.000...",938,938,0.012250,25.073503,"[0.01883835159242153, 0.014171751253349199, 0...."
3,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.024482, 0.080440864, 0.13639972, 0.19235858...","[0.13829023, 0.015755469, 0.005646824, 0.00268...",448,448,0.024482,25.038092,"[0.13829022645950317, 0.13829022645950317, 0.1..."
4,1962017,2018-07-05T01:24:56.95,RXJ1846,"[0.024482, 0.080440864, 0.13639972, 0.19235858...","[0.13829023, 0.015755469, 0.005646824, 0.00268...",448,448,0.024482,25.038092,"[0.13829022645950317, 0.13829022645950317, 0.1..."
...,...,...,...,...,...,...,...,...,...,...
509,2296758,2019-03-20T02:23:21.23,TYC 7692-2943-2,"[0.012247, 0.04235727, 0.07246754, 0.1025778, ...","[0.02178879, 0.003522512, 0.00074191904, 0.000...",833,833,0.012247,25.063992,"[0.021788790822029114, 0.01682725397670666, 0...."
510,2296758,2019-03-25T02:18:36.93,TYC 7692-2943-2,"[0.012247, 0.041378073, 0.07050915, 0.09964023...","[0.011126063, 0.0045869034, 0.00084505096, 0.0...",861,861,0.012247,25.064972,"[0.011126062832772732, 0.009290174658109586, 0..."
511,1424674,2016-10-02T08:50:23.1564,HIP 28153,"[0.006125, 0.030178692, 0.054232385, 0.0782860...","[0.030811656, 0.0077921706, 0.00096568564, 0.0...",1043,1043,0.006125,25.070070,"[0.030811656266450882, 0.017125888580813684, 0..."
512,2028801,2018-11-01T07:33:29.10,TYC 8097-337-1,"[0.01223, 0.03781431, 0.06339863, 0.08898293, ...","[0.011241959, 0.005084048, 0.0022800567, 0.001...",979,979,0.012230,25.033688,"[0.011241959407925606, 0.00926934480915285, 0...."
