In [1]:
import random

In [2]:
from openTSNE import TSNE
from openTSNE.callbacks import ErrorLogger

In [3]:
import os, sys
sys.path.insert(0, os.path.join(os.getenv('HOME'), 'StarNet'))
import numpy as np
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
import json
from astropy.io import fits as pyfits

from starnet.utils.data_utils.augment import convolve_spectrum, mask_tellurics
from starnet.utils.data_utils.restructure_spectrum import rebin, continuum_normalize_parallel, ensure_constant_sampling
from starnet.utils.data_utils.loading import load_data_from_h5, get_synth_spec_data, get_synth_wavegrid
from starnet.utils.plotting import plot_compare_estimates_gaiaeso_resid, make_boxplots_splitSNR
from starnet.models.cnn_models import StarNet2017, StarNet2017DeepEnsemble

from starnet.utils.data_utils.augment import convolve_spectrum
from starnet.utils.data_utils.restructure_spectrum import rebin, continuum_normalize, ensure_constant_sampling

from keras.models import load_model, Model
from astropy.io import fits as pyfits

  'functionality will be SEVERELY crippled. ' + str(e))
  'no thermal calculations can be performed. ' + str(e))
Using TensorFlow backend.


In [4]:
from starnet.utils.data_utils.augment import convolve_spectrum, add_radial_velocity, add_noise, fastRotBroad

In [5]:
home = os.getenv('HOME')
scratch = os.getenv('SCRATCH')

preprocessed_path = home + '/projects/rrg-kyi/group_writable/spectra/preprocessed/'

p_file = preprocessed_path + 'phoenix_trainset_oldvrad_tsne.h5'
a_file = preprocessed_path + 'ambre_trainset_oldvrad_tsne.h5'
u_file = preprocessed_path + 'UVES_GE_MW_4835-5395_updated.h5'
i_file = preprocessed_path + 'intrigoss_trainset_oldvrad_tsne.h5'





In [6]:
home = os.getenv('HOME')
scratch = os.getenv('SCRATCH')
starnet_data_folder = os.path.join(home, 'StarNet/starnet/data/')
intrigoss_grid_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/grids/intrigoss') 
phoenix_grid_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/grids/phoenix.astro.physik.uni-goettingen.de/v2.0/HiResFITS/PHOENIX-ACES-AGSS-COND-2011/train/') 
phoenix_wave_path = home+'/'+'/projects/rrg-kyi/group_writable/spectra/grids/phoenix.astro.physik.uni-goettingen.de/v2.0/HiResFITS/PHOENIX-ACES-AGSS-COND-2011/'
ambre_grid_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/grids/AMBRE/train')
obs_wave_filepath = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/UVES_4835-5395.npy')
wave_grid_obs = np.load(obs_wave_filepath)
bench_path = os.path.join(home, 'StarNet/starnet/gaiaESO/data/benchmark-stars/')

In [7]:
# Define parameters needed for continuum fitting
LINE_REGIONS = [[4210, 4240], [4250, 4410], [4333, 4388], [4845, 4886], [5160, 5200], [5874, 5916], [6530, 6590]]
SEGMENTS_STEP = 10.  # divide the spectrum into segments of 10 Angstroms

In [8]:
def get_phoenix_filename(teff, logg, feh, afe):
    """
    This function returns the name of the Phoenix spectrum file with the requested stellar parameters.
    
    INPUT: teff: Effective temperature (K)
           logg: Surface gravity
           feh: Metallicity (dex)
           afe: Alpha elements abundance [alpha/Fe]

    RETURNS: filename: Name of the .fits file containing an INTRIGOSS spectrum
    EXAMPLE: With input of teff=4000, logg=1.0, feh=-1.0, afe=0.5, it returns the string
             'lte04000-1.00-1.0.Alpha=+0.50.PHOENIX-ACES-AGSS-COND-2011-HiRes.fits'
    """
    filename = ''   
    str_teff = 'lte{:05d}'.format(teff)
    str_logg = '-{:03.2f}'.format(logg)
    str_feh = '{:02.1f}'.format(feh)
    if feh>0: 
        str_feh = '+' + str_feh
        
    if afe == 0:
        str_afe = ''
    elif afe < 0:
        str_afe = '.Alpha={:03.2f}'.format(afe)
    elif afe > 0:
        str_afe = '.Alpha=+{:03.2f}'.format(afe)

    filename = '{}{}{}{}.PHOENIX-ACES-AGSS-COND-2011-HiRes.fits'.format(str_teff, str_logg, str_feh, str_afe)

    return filename

def find_closest_phoenix_match(teff, logg, feh, afe):
    
    """
    Given a set of stellar parameters, this function will return the stellar parameters from the Phoenix
    grid which are closest to the supplied parameters.
    
    INPUT: teff: Effective temperature (K)
           logg: Surface gravity
           feh: Metallicity (dex)
           afe: Alpha elements abundance [alpha/Fe]

    RETURNS: Closest matching stellar parameters from the Phoenix grid
    EXAMPLE: With input of teff=3900, logg=1.1, feh=-1.1, afe=0.6, it returns 4 values:
             3900, 1.0, -1.0, 0.5
    """

    # Phoenix spectra grid spacing
    teff_grid = np.arange(2300,7000,100)
    teff_grid = np.concatenate((teff_grid, np.arange(7000,12001,200)))
    logg_grid = np.arange(0, 6.1, 0.5)
    feh_grid = np.arange(-4., -2.0, 1)
    feh_grid = np.concatenate((feh_grid, np.arange(-2.0, 1.01, 0.5)))
    afe_grid = np.arange(-0.2, 1.21, 0.2)

    # Find closest parameter values
    match_teff = teff_grid[np.argmin(np.abs(teff - teff_grid))]
    match_logg = logg_grid[np.argmin(np.abs(logg - logg_grid))]
    match_feh = feh_grid[np.argmin(np.abs(feh - feh_grid))]
    match_afe = afe_grid[np.argmin(np.abs(afe - afe_grid))]
    
    match_afe = round(match_afe,2)
    
    return match_teff, match_logg, match_feh, match_afe


def phoenix_one_file(name):


    wavegrid_path = os.path.join(phoenix_wave_path, 'WAVE_PHOENIX-ACES-AGSS-COND-2011.fits')
    wav_phoenix, flux_phoenix = get_phoenix_spectrum(phoenix_grid_path+name, wavegrid_path)


    return wav_phoenix,flux_phoenix


def get_phoenix_spectrum(spectrum_path, wave_grid_path):
    
    """
    Given the path of a Phoenix spectrum .fits file, this function retrieves the flux and wavelength data
    
    INPUT: path: The path to the Phoenix spectrum file, e.g. '/path/to/lte04000-1.00-1.0.Alpha=+0.50.PHOENIX-ACES-AGSS-COND-2011-HiRes.fits'
    
    RETURNS: wav: wavelength data
             flux: flux data
    """
    
    hdulist_spectrum = pyfits.open(spectrum_path)
    hdulist_wavegrid = pyfits.open(wave_grid_path)
    
    flux = hdulist_spectrum[0].data
    wav = hdulist_wavegrid[0].data
    
    # For Phoenix, need to convert from vacuum to air wavelengths.
    # The IAU standard for conversion from air to vacuum wavelengths is given
    # in Morton (1991, ApJS, 77, 119). For vacuum wavelengths (VAC) in
    # Angstroms, convert to air wavelength (AIR) via:
    #  AIR = VAC / (1.0 + 2.735182E-4 + 131.4182 / VAC^2 + 2.76249E8 / VAC^4)
    wav = wav / (
            1.0 + 2.735182E-4 + 131.4182 / wav ** 2 + 2.76249E8 / wav ** 4)
    
    return wav, flux

In [9]:
def get_intrigoss_filename(teff, logg, feh, afe):
    
    """
    This function returns the name of the INTRIGOSS spectrum file with the requested stellar parameters.
    
    INPUT: teff: Effective temperature (K)
           logg: Surface gravity
           feh: Metallicity (dex)
           afe: Alpha elements abundance [alpha/Fe]

    RETURNS: filename: Name of the .fits file containing an INTRIGOSS spectrum
    EXAMPLE: With input of teff=4000, logg=1.0, feh=-1.0, afe=0.5, it returns the string
             'alpp050_am100_t4000g10_v1_f.fits'
    """
    filename = ''
    # Construct the full INTRIGOSS .fits file name from stellar parameters
    if feh>0.:
        str_feh= 'ap{}{}'.format(int(feh),('%.2f' % (feh % 1))[2:4])
    elif feh<0.:
        str_feh= 'am{}{}'.format(int(-feh),('%.2f' % (-feh % 1))[2:4])
    else:
        str_feh= 'ap000'
    if afe>0.:
        str_afe= 'alpp{}{}'.format(int(afe),('%.2f' % (afe % 1))[2:4])
    elif afe<0.:
        str_afe= 'alpm{}{}'.format(int(-afe),('%.2f' % (-afe % 1))[2:4])
    else:
        str_afe= 'alpp000'
    str_logg = 'g%i%i' % (int(logg),int(round((logg % 1)*10.)))
    filename = '{}_{}_t{}{}_v1_f.fits'.format(str_afe, str_feh, teff, str_logg)
    
    return filename

def find_closest_intrigoss_match(teff, logg, feh, afe):
    
    """
    Given a set of stellar parameters, this function will return the stellar parameters from the INTRIGOSS
    grid which are closest to the supplied parameters.
    
    INPUT: teff: Effective temperature (K)
           logg: Surface gravity
           feh: Metallicity (dex)
           afe: Alpha elements abundance [alpha/Fe]

    RETURNS: Closest matching stellar parameters from the INTRIGOSS grid
    EXAMPLE: With input of teff=3900, logg=1.1, feh=-1.1, afe=0.6, it returns 4 values:
             4000, 1.0, -1.0, 0.5
    """
    
    # INTRIGOSS grid spacing
    teff_grid = np.arange(3750, 7001, 250)
    logg_grid = np.arange(0.5, 5.1, 0.5)
    feh_grid = np.arange(-1.0, 0.51, 0.25)
    afe_grid = np.arange(-0.25, 0.51, 0.25) 
    
    # Find closest parameter values
    match_teff = teff_grid[np.argmin(np.abs(teff - teff_grid))]
    match_logg = logg_grid[np.argmin(np.abs(logg - logg_grid))]
    match_feh = feh_grid[np.argmin(np.abs(feh - feh_grid))]
    match_afe = afe_grid[np.argmin(np.abs(afe - afe_grid))]
    
    return match_teff, match_logg, match_feh, match_afe

def get_intrigoss_spectrum(path):
    
    """
    Given the path of an INTRIGOSS spectrum .fits file, this function retrieves the flux and wavelength data
    
    INPUT: path: The path to the INTRIGOSS spectrum file, e.g. '/path/to/alpp050_am100_t4000g10_v1_f.fits'
    
    RETURNS: wav: wavelength data
             flux: flux data
    """
    
    hdulist = pyfits.open(path)
    flux_data = hdulist[1].data
    
    wav = flux_data['wavelength']
    flux = flux_data['surface_flux']
    
    return wav, flux

In [46]:
def get_ambre_filename(teff, logg, feh, afe):
    
    """
    This function returns the name of the AMBRE spectrum file with the requested stellar parameters.
    
    INPUT: teff: Effective temperature (K)
           logg: Surface gravity
           feh: Metallicity (dex)
           afe: Alpha elements abundance [alpha/Fe]

    RETURNS: filename: Name of the .fits file containing an AMBRE spectrum
    EXAMPLE: With input of teff=4000, logg=1.0, feh=-1.0, afe=0.5, it returns the string
             'alpp050_am100_t4000g10_v1_f.AMBRE'
    """
    filename_fin = ''
    # Construct the full AMBRE .fits file name from stellar parameters
    for root, dirs, files in os.walk(ambre_grid_path):
        for filename in files:
            if teff == float(filename[1:5]):
                if filename[7] == '-':
                    if logg == -1*float(filename[8:11]):
                        if filename[22] == '-':
                            if feh == -1*float(filename[23:27]):
                                if filename[29] == '-':
                                    if afe == -1*float(filename[30:34]):
                                        filename_fin = filename
                                elif afe == float(filename[30:34]):
                                    filename_fin = filename
                                    
                        elif feh == float(filename[23:27]):
                            if filename[29] == '-':
                                if afe == -1*float(filename[30:34]):
                                    filename_fin = filename
                            elif afe == float(filename[30:34]):
                                filename_fin = filename
    
                        
                elif logg == float(filename[8:11]):
                    if filename[22] == '-':
                        if feh == -1*float(filename[23:27]):
                            if filename[29] == '-':
                                if afe == -1*float(filename[30:34]):
                                    filename_fin = filename
                            elif afe == float(filename[30:34]):
                                    filename_fin = filename
                                    
                    elif feh == float(filename[23:27]):
                        if afe == -1*float(filename[30:34]):
                            filename_fin = filename
                        elif afe == float(filename[30:34]):
                            filename_fin = filename
    
    
      
    return filename_fin


def find_closest_ambre_match(teff, logg, feh, afe):
    
    """
    Given a set of stellar parameters, this function will return the stellar parameters from the AMBRE
    grid which are closest to the supplied parameters.
    
    INPUT: teff: Effective temperature (K)
           logg: Surface gravity
           feh: Metallicity (dex)
           afe: Alpha elements abundance [alpha/Fe]

    RETURNS: Closest matching stellar parameters from the AMBRE grid
    EXAMPLE: With input of teff=3900, logg=1.1, feh=-1.1, afe=0.6, it returns 4 values:
             4000, 1.0, -1.0, 0.5
    """
    #print("Hey")
        # AMBRE grid spacing
        

    
    if teff <= 3900:
        teff_grid = np.arange(2500, 4000, 200)
    elif teff >= 4000:
        teff_grid = np.arange(4000, 8500, 250)

        
    logg_grid = np.arange(-0.5, 6, 0.5)
    if feh >= -1.0:
        feh_grid = np.arange(-1.5, 1.5, 0.25)
    elif feh >= -3.0 and feh < -1.0:
        feh_grid = np.arange(-3.5,-1.0, 0.5)
    elif feh < -3.0:
        feh_grid = np.arange(-5.0,-3.0, 1.0)
    afe_grid = np.arange(-0.40, 1.00, 0.20) 
        #print(teff_grid)

        # Find closest parameter values
    match_teff = teff_grid[np.argmin(np.abs(teff - teff_grid))]
    match_logg = logg_grid[np.argmin(np.abs(logg - logg_grid))]
    match_feh = feh_grid[np.argmin(np.abs(feh - feh_grid))]
    match_afe = afe_grid[np.argmin(np.abs(afe - afe_grid))]

    match_afe = round(match_afe,2)
    match_logg = round(match_logg,2)
    match_feh = round(match_feh,2)




    return match_teff, match_logg, match_feh, match_afe

def get_ambre_spectrum(path):
    
    """
    Given the path of an AMBRE spectrum .fits file, this function retrieves the flux and wavelength data
    
    INPUT: path: The path to the AMBRE spectrum file, e.g. '/path/to/alpp050_am100_t4000g10_v1_f.AMBRE'
    
    RETURNS: wav: wavelength data
             flux: flux data
    """
    flux = np.genfromtxt(path,usecols=-1)
    wav = np.genfromtxt(path,usecols= 0)
    
    return wav, flux

In [64]:
flux_a = []
flux_p = []
flux_i = []

aug_flux_a = []
aug_flux_p = []
aug_flux_i = []

In [64]:
def find_closest(teff,logg,feh,a_m):
    
    #print(teff,logg,feh,a_m)
    
    wav_phoenix = []
    wav_ambre = []
    wav_intrigoss = []

    
# Find the closest match in the INTRIGOSS grid for requested parameters
    match_teff_intrigoss, match_logg_intrigoss, \
    match_feh_intrigoss, match_afe_intrigoss = find_closest_intrigoss_match(teff, logg, feh, a_m)

    # Find the closest match in the Phoenix grid for requested parameters
    match_teff_phoenix, match_logg_phoenix, \
    match_feh_phoenix, match_afe_phoenix = find_closest_phoenix_match(teff, logg, feh, a_m)

    # Find the closest match in the Ambre grid for requested parameters
    if (teff < 4000 and teff > 3900):
        match_teff_ambre = 0
        match_logg_ambre = 0
        match_feh_ambre = 0
        match_afe_ambre = 0
    else:
    
        match_teff_ambre, match_logg_ambre, \
        match_feh_ambre, match_afe_ambre = find_closest_ambre_match(teff, logg, feh, a_m)


    # Construct full string for the INTRIGOSS .fits files
    spec_filename_intrigoss = get_intrigoss_filename(match_teff_intrigoss, 
                                                     match_logg_intrigoss, 
                                                     match_feh_intrigoss,
                                                     match_afe_intrigoss)
    spec_filename_phoenix = get_phoenix_filename(match_teff_phoenix,
                                                 match_logg_phoenix,
                                                 match_feh_phoenix,
                                                 match_afe_phoenix)

    spec_filename_ambre = get_ambre_filename(match_teff_ambre,
                                                 match_logg_ambre,
                                                 match_feh_ambre,
                                                 match_afe_ambre)

    '''
    print('INTRIGOSS: Teff: {:d}, logg: {:3.2f}, [Fe/H]: {:3.2f}, [alpha/M]: {:3.2f}'.format(match_teff_intrigoss, 
                                                                                             match_logg_intrigoss,
                                                                                             match_feh_intrigoss, 
                                                                                             match_afe_intrigoss))
    print(spec_filename_intrigoss)

    print('Phoenix:   Teff: {:d}, logg: {:3.2f}, [Fe/H]: {:3.2f}, [alpha/M]: {:3.2f}'.format(match_teff_phoenix, 
                                                                                             match_logg_phoenix,
                                                                                             match_feh_phoenix, 
                                                                                             match_afe_phoenix))
    print(spec_filename_phoenix)

    print('Ambre:   Teff: {:d}, logg: {:3.2f}, [Fe/H]: {:3.2f}, [alpha/M]: {:3.2f}'.format(match_teff_ambre, 
                                                                                             match_logg_ambre,
                                                                                             match_feh_ambre, 
                                                                                             match_afe_ambre))
    print(spec_filename_ambre)
    '''
    # Collect INTRIGOSS spectrum
    
    for root, dirs, files in os.walk(intrigoss_grid_path):
        for name in files:
            if name == spec_filename_intrigoss:
                spec_filepath_intrigoss = os.path.join(root, name)
                wav_intrigoss, flux_intrigoss = get_intrigoss_spectrum(spec_filepath_intrigoss)

    # Collect Phoenix spectrum
    for root, dirs, files in os.walk(phoenix_grid_path):
        for name in files:
            if name == spec_filename_phoenix:
                name_p = name
                wav_phoenix, flux_phoenix = phoenix_one_file(name)
                #spec_filepath_phoenix = os.path.join(root, name)
    #wavegrid_path = os.path.join(phoenix_grid_path, 'WAVE_PHOENIX-ACES-AGSS-COND-2011.fits')
    #wav_phoenix, flux_phoenix = get_phoenix_spectrum(spec_filepath_phoenix, wavegrid_path)
    
    
    
    # Collect AMBRE spectrum
    for root, dirs, files in os.walk(ambre_grid_path):
        for name in files:
            if name == spec_filename_ambre:
                spec_filepath_ambre = os.path.join(root, name)
                wav_ambre, flux_ambre = get_ambre_spectrum(spec_filepath_ambre)
    #print(wav_ambre)
    #print(flux_ambre)

    # Trim the wavelength and flux arrays according to observed wave grid
    extension = 10  # Angstroms
    wave_min_request = wave_grid_obs[0] - extension
    wave_max_request = wave_grid_obs[-1] + extension
    
    if len(wav_intrigoss) != 0:
        wave_indices_intrigoss = (wav_intrigoss > wave_min_request) & (wav_intrigoss < wave_max_request)
        wav_intrigoss = wav_intrigoss[wave_indices_intrigoss]
        flux_intrigoss = flux_intrigoss[wave_indices_intrigoss]
        err_intrigoss = np.zeros(len(flux_intrigoss))
        _, flux_intrigoss, _ = convolve_spectrum(wav_intrigoss, flux_intrigoss, err_intrigoss, to_resolution=47000)
        flux_intrigoss = rebin(wave_grid_obs, wav_intrigoss, flux_intrigoss)
        flux_i_aug = add_noise(flux_intrigoss, 0.04)
        flux_intrigoss, _ = continuum_normalize(flux_intrigoss, LINE_REGIONS, wave_grid_obs, SEGMENTS_STEP)
        flux_intrigoss_aug, _ = continuum_normalize(flux_i_aug, LINE_REGIONS, wave_grid_obs, SEGMENTS_STEP)
        flux_i.append(flux_intrigoss)
        aug_flux_i.append(flux_intrigoss_aug)
        
    
    
    #print(wav_phoenix) 
    #print(len(wav_phoenix))
    
    
    if len(wav_phoenix) != 0:
        wave_indices_phoenix = (wav_phoenix > wave_min_request) & (wav_phoenix < wave_max_request)
        wav_phoenix = wav_phoenix[wave_indices_phoenix]
        flux_phoenix = flux_phoenix[wave_indices_phoenix]
        
        err_phoenix = np.zeros(len(flux_phoenix))
        _, flux_phoenix, _ = convolve_spectrum(wav_phoenix, flux_phoenix, err_phoenix, to_resolution=47000)
        flux_phoenix = rebin(wave_grid_obs, wav_phoenix, flux_phoenix)
        flux_p_aug = add_noise(flux_phoenix, 0.04)
        flux_phoenix, _ = continuum_normalize(flux_phoenix, LINE_REGIONS, wave_grid_obs, SEGMENTS_STEP)
        flux_phoenix_aug, _ = continuum_normalize(flux_p_aug, LINE_REGIONS, wave_grid_obs, SEGMENTS_STEP)
        flux_p.append(flux_intrigoss)
        aug_flux_p.append(flux_phoenix_aug)
    
    if (len(wav_ambre) != 0): #or not(teff < 4000 and teff > 3900) :
        wave_indices_ambre = (wav_ambre > wave_min_request) & (wav_ambre < wave_max_request)
        wav_ambre = wav_ambre[wave_indices_ambre]
        flux_ambre = flux_ambre[wave_indices_ambre]
        err_ambre = np.zeros(len(flux_ambre))
        _, flux_ambre, _ = convolve_spectrum(wav_ambre, flux_ambre, err_ambre, to_resolution=47000)
        flux_ambre = rebin(wave_grid_obs, wav_ambre, flux_ambre)
        flux_a_aug = add_noise(flux_ambre, 0.04)
        flux_ambre, _ = continuum_normalize(flux_ambre, LINE_REGIONS, wave_grid_obs, SEGMENTS_STEP)
        flux_ambre_aug, _ = continuum_normalize(flux_a_aug, LINE_REGIONS, wave_grid_obs, SEGMENTS_STEP)
        flux_a.append(flux_ambre)
        aug_flux_a.append(flux_ambre_aug)
    
    
    
    
    
    
    
    
    
    
    
    
    

    # Degrade resolution
    
   
    
    
    

    # Rebin to UVES wave grid
    
    
    

    # Continuum normalize the spectra
    
    
    

    
    
    
    
    
    
    
    
    
    #print('DONE')

    # Mask telluric lines
    #flux_intrigoss = mask_tellurics('telluric_lines.txt', flux_intrigoss, wave_grid_obs
    #flux_phoenix = mask_tellurics('telluric_lines.txt', flux_phoenix, wave_grid_obs)
    #flux_ambre = mask_tellurics('telluric_lines.txt', flux_ambre, wave_grid_obs)

In [65]:
f3 = h5py.File(u_file, 'r')

list(f3)

flux_u = f3['spectra_starnet_norm']
teff_u = f3['teff']
logg_u = f3['logg']
feh_u  = f3['fe_h']
flux_u = np.asarray(flux_u)

In [66]:
#print(len(flux_u))
#print(flux_u.shape)
#print(flux_u[0])

3100
(3100, 39436)
[0.69717854 0.63032836 0.57475674 ... 0.9613026  0.96221431 0.96775702]


In [67]:
import math

teff_u_fin = []
logg_u_fin = []
feh_u_fin = []
flux_u_fin = []

for i in range(len(teff_u)):
    teff = teff_u[i]
    logg = logg_u[i]
    feh = feh_u[i]
    #flux = flux_u[i]
    
    if math.isnan(teff) or math.isnan(logg) or math.isnan(feh):
        continue
    else:
        teff_u_fin.append(teff)
        logg_u_fin.append(logg)
        feh_u_fin.append(feh)
        flux_u_fin.append(flux_u[i])

#print(len(teff_u_fin))
#print(len(logg_u_fin))
#print(len(feh_u_fin))

2480
2480
2480


In [68]:
teff_u = np.asarray(teff_u_fin)
logg_u = np.asarray(logg_u_fin)
feh_u = np.asarray(feh_u_fin)
flux_u = np.asarray(flux_u_fin)
#print(flux_u.shape)

(2480, 39436)


In [None]:
for i in range(500):
    #print(i+1)
    find_closest(teff_u[i],logg_u[i],feh_u[i],0)
    
    

1
6188.0 3.802999973297119 -2.2060000896453857 0
2
6188.0 3.802999973297119 -2.2060000896453857 0
3
5562.0 4.327000141143799 0.1379999965429306 0
4
4834.0 2.746999979019165 -0.6970000267028809 0
5
6326.0 4.052999973297119 -0.37700000405311584 0
6
6018.0 3.569999933242798 -0.4009999930858612 0
7
6188.0 3.802999973297119 -2.2060000896453857 0
8
5708.0 4.015500068664551 -0.909500002861023 0
9
6448.0 4.284999847412109 -0.1940000057220459 0
10
5636.0 4.341000080108643 -0.33799999952316284 0
11
5586.0 3.617000102996826 -0.28299999237060547 0
12
4261.0 1.6629999876022339 0.04600000008940697 0
13
3937.0 1.2448999881744385 -0.27000001072883606 0
14
6134.0 3.6735000610351562 -0.48750001192092896 0
15
6134.0 3.6735000610351562 -0.48750001192092896 0
16
6134.0 3.6735000610351562 -0.48750001192092896 0
17
5812.0 4.449999809265137 0.0010000000474974513 0
18
5812.0 4.449999809265137 0.0010000000474974513 0
19
5812.0 4.449999809265137 0.0010000000474974513 0
20
5172.0 4.519000053405762 0.0659999996423

In [None]:
'''

i = 0
flux_p_fin  = []
flux_a_fin  = []
flux_u_fin  = []
flux_i_fin  = []

while i < 3000:
    ind_p = random.randrange(0,len(flux_p),1)
    flux_p_fin.append(flux_p[ind_p])
    
    i = i + 1
    
print("Done phoenix")

In [None]:

'''
i = 0

while i < 3000:
    ind_a = random.randrange(0,len(flux_a),1)
    flux_a_fin.append(flux_a[ind_a])
    
    i = i + 1
    
print("Done ambre")

In [None]:
'''
i = 0

while i < 3000:
    ind_u = random.randrange(0,len(flux_u),1)
    flux_u_fin.append(flux_u[ind_u])
    
    i = i + 1
print("Done UVES")

In [None]:
'''

i = 0

while i < 3000:
    ind_i = random.randrange(0,len(flux_i),1)
    flux_i_fin.append(flux_i[ind_i])
    
    i = i + 1
    
print("Done INTRIGOSS")

In [None]:
flux_a = np.asarray(flux_a)
flux_p = np.asarray(flux_p)
flux_u = np.asarray(flux_u)
flux_i = np.asarray(flux_i)



In [None]:
import os
import h5py
import numpy as np

scratch = os.getenv('SCRATCH')

filename = 'tsne_data_500'
#preprocessed_spectra_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/preprocessed/')
save_path = scratch+filename

with h5py.File(save_path, 'w') as f:  
    f.create_dataset('ambre', data=np.asarray(flux_a))
    f.create_dataset('ambre_aug', data=np.asarray(aug_flux_a))
    
    f.create_dataset('intrigoss',data = np.asarray(flux_i))
    f.create_dataset('intrigoss_aug',data = np.asarray(aug_flux_i))
    
    f.create_dataset('uves',data = np.asarray(flux_u))
    f.create_dataset('uves_aug',data = np.asarray(aug_flux_u))
    
    f.create_dataset('phoenix',data = np.asarray(flux_p))
    f.create_dataset('phoenix_aug',data = np.asarray(aug_flux_p))
    
print("500 saved")

In [None]:
flux_a = []
flux_p = []
flux_i = []

aug_flux_a = []
aug_flux_p = []
aug_flux_i = []

for i in range(500,1000):
    #print(i+1)
    find_closest(teff_u[i],logg_u[i],feh_u[i],0)

In [None]:
import os
import h5py
import numpy as np

scratch = os.getenv('SCRATCH')

filename = 'tsne_data_500_1000'
#preprocessed_spectra_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/preprocessed/')
save_path = scratch+filename

with h5py.File(save_path, 'w') as f:  
    f.create_dataset('ambre', data=np.asarray(flux_a))
    f.create_dataset('ambre_aug', data=np.asarray(aug_flux_a))
    
    f.create_dataset('intrigoss',data = np.asarray(flux_i))
    f.create_dataset('intrigoss_aug',data = np.asarray(aug_flux_i))
    
    f.create_dataset('uves',data = np.asarray(flux_u))
    f.create_dataset('uves_aug',data = np.asarray(aug_flux_u))
    
    f.create_dataset('phoenix',data = np.asarray(flux_p))
    f.create_dataset('phoenix_aug',data = np.asarray(aug_flux_p))
print("1000 saved")

In [None]:
flux_a = []
flux_p = []
flux_i = []

aug_flux_a = []
aug_flux_p = []
aug_flux_i = []

for i in range(1000,1500):
    #print(i+1)
    find_closest(teff_u[i],logg_u[i],feh_u[i],0)

In [None]:
import os
import h5py
import numpy as np

scratch = os.getenv('SCRATCH')

filename = 'tsne_data_1000_1500'
#preprocessed_spectra_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/preprocessed/')
save_path = scratch+filename

with h5py.File(save_path, 'w') as f:  
    f.create_dataset('ambre', data=np.asarray(flux_a))
    f.create_dataset('ambre_aug', data=np.asarray(aug_flux_a))
    
    f.create_dataset('intrigoss',data = np.asarray(flux_i))
    f.create_dataset('intrigoss_aug',data = np.asarray(aug_flux_i))
    
    f.create_dataset('uves',data = np.asarray(flux_u))
    f.create_dataset('uves_aug',data = np.asarray(aug_flux_u))
    
    f.create_dataset('phoenix',data = np.asarray(flux_p))
    f.create_dataset('phoenix_aug',data = np.asarray(aug_flux_p))
    
print("1500 saved")

In [None]:
flux_a = []
flux_p = []
flux_i = []

aug_flux_a = []
aug_flux_p = []
aug_flux_i = []

for i in range(1500,2000):
    #print(i+1)
    find_closest(teff_u[i],logg_u[i],feh_u[i],0)

In [None]:
import os
import h5py
import numpy as np

scratch = os.getenv('SCRATCH')

filename = 'tsne_data_1500_2000'
#preprocessed_spectra_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/preprocessed/')
save_path = scratch+filename

with h5py.File(save_path, 'w') as f:  
    f.create_dataset('ambre', data=np.asarray(flux_a))
    f.create_dataset('ambre_aug', data=np.asarray(aug_flux_a))
    
    f.create_dataset('intrigoss',data = np.asarray(flux_i))
    f.create_dataset('intrigoss_aug',data = np.asarray(aug_flux_i))
    
    f.create_dataset('uves',data = np.asarray(flux_u))
    f.create_dataset('uves_aug',data = np.asarray(aug_flux_u))
    
    f.create_dataset('phoenix',data = np.asarray(flux_p))
    f.create_dataset('phoenix_aug',data = np.asarray(aug_flux_p))
    
print("2000 saved")

In [None]:
flux_a = []
flux_p = []
flux_i = []

aug_flux_a = []
aug_flux_p = []
aug_flux_i = []

for i in range(2000,2480):
    #print(i+1)
    find_closest(teff_u[i],logg_u[i],feh_u[i],0)

In [None]:
import os
import h5py
import numpy as np

scratch = os.getenv('SCRATCH')

filename = 'tsne_data_2000_2480'
#preprocessed_spectra_path = os.path.join(home, 'projects/rrg-kyi/group_writable/spectra/preprocessed/')
save_path = scratch+filename

with h5py.File(save_path, 'w') as f:  
    f.create_dataset('ambre', data=np.asarray(flux_a))
    f.create_dataset('ambre_aug', data=np.asarray(aug_flux_a))
    
    f.create_dataset('intrigoss',data = np.asarray(flux_i))
    f.create_dataset('intrigoss_aug',data = np.asarray(aug_flux_i))
    
    f.create_dataset('uves',data = np.asarray(flux_u))
    f.create_dataset('uves_aug',data = np.asarray(aug_flux_u))
    
    f.create_dataset('phoenix',data = np.asarray(flux_p))
    f.create_dataset('phoenix_aug',data = np.asarray(aug_flux_p))
print("2480 saved")

In [None]:
grid_data = []
for i in flux_i:
    grid_data.append(i)
for j in flux_p:
    grid_data.append(j)
for k in flux_a:
    grid_data.append(k)
for l in flux_u:
    grid_data.append(l)

grid_data = np.asarray(grid_data)
print(grid_data.shape)

In [None]:
grid_target=[]
for i in range(len(flux_i_fin)):
    grid_target.append(0)
for j in range(len(flux_p_fin)):
    grid_target.append(1)
for k in range(len(flux_a_fin)):
    grid_target.append(2)
for l in range(len(flux_u_fin)):
    grid_target.append(3)
    

In [None]:
tsne = TSNE(
    perplexity=30,
    metric="euclidean",
    callbacks=ErrorLogger(),
    n_jobs=8,
    random_state=42,
)

In [None]:
%time embedding_train = tsne.fit(grid_data)

In [None]:

fig, ax = plt.subplots(figsize = (10,10))

colors = ['r', 'g','b','c']

intrigoss = ax.scatter(embedding_train[:len(flux_i_fin), 0], embedding_train[:len(flux_i_fin), 1],color = colors[0] ,s = 10,alpha = 0.5)
phoenix = ax.scatter(embedding_train[len(flux_i_fin):len(flux_i_fin)+len(flux_p_fin), 0], embedding_train[len(flux_i_fin):len(flux_i_fin)+len(flux_p_fin), 1],color = colors[1] ,s = 10,alpha = 0.5)
ambre = ax.scatter(embedding_train[len(flux_i_fin)+len(flux_p_fin):, 0], embedding_train[len(flux_i_fin)+len(flux_p_fin):, 1],color = colors[2],s = 10,alpha = 0.5)
uves = ax.scatter(embedding_train[len(flux_i_fin)+len(flux_p_fin)+len(flux_a_fin):, 0], embedding_train[len(flux_i_fin)+len(flux_p_fin)+len(flux_a_fin):, 1],color = colors[3],s = 10,alpha = 0.5)
#uves = ax.scatter(embedding_train1[:, 0], embedding_train1[:, 1],color = colors[3],s = 30)

plt.gca().set_aspect('equal', 'datalim')
#plt.plot(x, y1, '-b', label='sine')
#plt.plot(x, y2, '-r', label='cosine')
plt.legend((intrigoss,phoenix,ambre,uves),
           ('INTRIGOSS', 'PHOENIX', 'AMBRE','UVES'),
           scatterpoints=1,
           loc='upper left',
           ncol=1,
           fontsize=10)

#plt.title('t-SNE of the 3 grids [ T>5000 and logg>3 and -1<Fe<0.]', fontsize=15);
plt.savefig('uves_all_grids_2.png')