In [None]:
from conversion_helpers import *

import pandas as pd
import os
import numpy as np
import h5py

In [None]:
labels = pd.read_csv("train_labels.csv")

In [None]:
path_folder = ""  
train_adc_info = pd.read_csv(os.path.join(path_folder, 'train_adc_info.csv'))
train_adc_info = train_adc_info.set_index('planet_id')
axis_info = pd.read_parquet(os.path.join(path_folder,'axis_info.parquet'))

DO_MASK = True  # filter out non responsive pixels
DO_THE_NL_CORR = True # most time consuming step, you can choose to ignore it for rapid prototyping, nonlinear correction due to artefacts when reading pixels
DO_DARK = True  # dark current is accumulating over time in the pixels, need to compensate that (seems like integration artefact)
DO_FLAT = True  # pixel to pixel variation correction (e.g. how pixels respond differently when illuminated uniformly)
TIME_BINNING = False  #do a time binning on choosen frequency

In [None]:
planet_id = 612015401
AIRS_cds_binned, FGS1_cds_binned,AIRS_cds_original, FGS1_cds_original = calibrateData(planet_id,train_adc_info,axis_info,DO_MASK,DO_THE_NL_CORR,DO_DARK,DO_FLAT,TIME_BINNING)

In [None]:
labels.loc[labels.planet_id == planet_id]

In [None]:
import matplotlib.pyplot as plt

In [None]:
b = AIRS_cds_binned.sum(axis=(2,3))
print(b.shape)
plt.plot(b[0,:])

In [None]:
b = AIRS_cds_original.sum(axis=(2,3))
print(b.shape)
plt.plot(b[0,:])

In [None]:
AIRS_cds_binned.shape

In [None]:
# normalization per frame: -> bad idea, wavelengths in one frame get comparable, but not super critical I guess, no strong impact there
mean_p_frame = np.mean(AIRS_cds_binned, axis=(2,3), keepdims=True)
std_p_frame = np.std(AIRS_cds_binned, axis=(2,3), keepdims=True)
min_p_frame = np.min(AIRS_cds_binned, axis=(2,3), keepdims=True)
max_p_frame = np.max(AIRS_cds_binned, axis=(2,3), keepdims=True)

zScoreAIRS = (AIRS_cds_binned - mean_p_frame) / std_p_frame # gets rid of overall trend -> frames get uncomparable between each other
zScoreAIRS = (AIRS_cds_binned - min_p_frame) / (max_p_frame - min_p_frame) # frames get uncomparable between each other
b = zScoreAIRS.sum(axis=(2,3))
print(b.shape)
plt.plot(b[0,:])

In [None]:
# normalization per wavelength: seems like the overall trend is also gone, individual wavelength is also not showing a trend
# wavlength over time should be comparable
mean = np.mean(AIRS_cds_binned, axis=(1,2), keepdims=True)
std = np.std(AIRS_cds_binned, axis=(1,2), keepdims=True)
min = np.min(AIRS_cds_binned, axis=(1,2), keepdims=True)
max = np.max(AIRS_cds_binned, axis=(1,2), keepdims=True)

zScoreAIRS = (AIRS_cds_binned - mean) / std # gets rid of overall trend -> frames get uncomparable between each other
#zScoreAIRS = (AIRS_cds_binned - min) / (max - min) # frames get uncomparable between each other
b = zScoreAIRS.sum(axis=(2,3))
b = zScoreAIRS.sum(axis=(3))
print(b.shape)
plt.plot(b[0,:,10])

In [None]:
# normalizing over entire intensity signal: makes stars comparable, but we don't really want that I guess, spectra are different
mean = np.mean(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
std = np.std(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
min = np.min(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
max = np.max(AIRS_cds_binned, axis=(1,2,3), keepdims=True)

zScoreAIRS = (AIRS_cds_binned - mean) / std # gets rid of overall trend -> frames get uncomparable between each other
#zScoreAIRS = (AIRS_cds_binned - min) / (max - min) # frames get uncomparable between each other
b = zScoreAIRS.sum(axis=(2,3))
#b = zScoreAIRS.sum(axis=(3))
print(b.shape)
plt.plot(b[0,:])

In [None]:
AIRS_cds_binned.shape, FGS1_cds_binned.shape,AIRS_cds_original.shape, FGS1_cds_original.shape

In [None]:
def encodeData(planet_id,train_adc_info,axis_info,DO_MASK,DO_THE_NL_CORR,DO_DARK,DO_FLAT,TIME_BINNING, zScoreNorm=True):
    path_folder = ""  
    AIRS_cds_binned, FGS1_cds_binned,AIRS_cds_original, FGS1_cds_original = calibrateData(planet_id,train_adc_info,axis_info,DO_MASK,DO_THE_NL_CORR,DO_DARK,DO_FLAT,TIME_BINNING)
    
    if zScoreNorm:
        mean = np.mean(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
        std = np.std(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
        zScoreAIRSPlanet = (AIRS_cds_binned - mean) / std

        mean = np.mean(FGS1_cds_binned, axis=(1,2,3), keepdims=True)
        std = np.std(FGS1_cds_binned, axis=(1,2,3), keepdims=True)
        zScoreFGS1Planet = (FGS1_cds_binned - mean) / std


        mean = np.mean(AIRS_cds_binned, axis=(1,2), keepdims=True)
        std = np.std(AIRS_cds_binned, axis=(1,2), keepdims=True)
        zScoreAIRSWaveL = (AIRS_cds_binned - mean) / std
        mean = np.mean(FGS1_cds_binned, axis=(1,2), keepdims=True)
        std = np.std(FGS1_cds_binned, axis=(1,2), keepdims=True)
        zScoreFGS1WaveL = (FGS1_cds_binned - mean) / std
    else:
        min = np.min(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
        max = np.max(AIRS_cds_binned, axis=(1,2,3), keepdims=True)
        zScoreAIRSPlanet = (AIRS_cds_binned - min) / (max-min)

        min = np.min(FGS1_cds_binned, axis=(1,2,3), keepdims=True)
        max = np.max(FGS1_cds_binned, axis=(1,2,3), keepdims=True)
        zScoreFGS1Planet = (FGS1_cds_binned - min) / (max-min)


        min = np.min(AIRS_cds_binned, axis=(1,2), keepdims=True)
        max = np.max(AIRS_cds_binned, axis=(1,2), keepdims=True)
        zScoreAIRSWaveL = (AIRS_cds_binned - min) / (max-min)

    # compress data, cleanedl
    AIRS_cds_cleaned_compressed = AIRS_cds_binned.sum(axis=3)  # 1x5625x282
    FGS1_cds_cleaned_compressed = FGS1_cds_binned.sum(axis=(2,3)) # 1x67500
    FGS1_cds_cleaned_compressed = np.reshape(FGS1_cds_cleaned_compressed, (1,5625,-1)) #1x5625x12

    # compress original data
    AIRS_cds_original_compressed = AIRS_cds_original.sum(axis=3)  # 1x5625x282
    FGS1_cds_original_compressed = FGS1_cds_original.sum(axis=(2,3)) # 1x67500
    FGS1_cds_original_compressed = np.reshape(FGS1_cds_original_compressed, (1,5625,-1)) #1x5625x12


    # compress normalized data by planet
    AIRS_cds_PlanetNorm_compressed = zScoreAIRSPlanet.sum(axis=3)  # 1x5625x282
    FGS1_cds_PlanetNorm_compressed = zScoreFGS1Planet.sum(axis=(2,3)) # 1x67500
    FGS1_cds_PlanetNorm_compressed = np.reshape(FGS1_cds_PlanetNorm_compressed, (1,5625,-1)) #1x5625x12

    # compress normlized data by wavelength
    AIRS_cds_WaveLNorm_compressed = zScoreAIRSWaveL.sum(axis=3)  # 1x5625x282
    FGS1_cds_WaveLNorm_compressed = zScoreFGS1WaveL.sum(axis=(2,3)) # 1x67500
    FGS1_cds_WaveLNorm_compressed = np.reshape(FGS1_cds_WaveLNorm_compressed, (1,5625,-1)) #1x5625x12


    
    compressed_clean = np.concatenate([AIRS_cds_cleaned_compressed,np.sum(FGS1_cds_cleaned_compressed, axis=2, keepdims=True),np.mean(FGS1_cds_cleaned_compressed, axis=2, keepdims=True),np.std(FGS1_cds_cleaned_compressed, axis=2, keepdims=True)], axis=2)
    compressed_origi = np.concatenate([AIRS_cds_original_compressed,np.sum(FGS1_cds_original_compressed, axis=2, keepdims=True),np.mean(FGS1_cds_original_compressed, axis=2, keepdims=True),np.std(FGS1_cds_original_compressed, axis=2, keepdims=True)], axis=2)
    compressed_plNor = np.concatenate([AIRS_cds_PlanetNorm_compressed,np.sum(FGS1_cds_PlanetNorm_compressed, axis=2, keepdims=True),np.mean(FGS1_cds_PlanetNorm_compressed, axis=2, keepdims=True),np.std(FGS1_cds_PlanetNorm_compressed, axis=2, keepdims=True)], axis=2)
    compressed_waNor = np.concatenate([AIRS_cds_WaveLNorm_compressed,np.sum(FGS1_cds_WaveLNorm_compressed, axis=2, keepdims=True),np.mean(FGS1_cds_WaveLNorm_compressed, axis=2, keepdims=True),np.std(FGS1_cds_WaveLNorm_compressed, axis=2, keepdims=True)], axis=2)
    

    combined_array = np.stack([compressed_clean,compressed_origi,compressed_plNor,compressed_waNor], axis=-1)

    np.savez('train/'+str(planet_id)+'/combined.npz', a=combined_array)
    return combined_array
