In [12]:
import os
import glob
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import scipy

# data

In [14]:
labelDf = pd.read_csv("train_labels.csv")
labelDf = labelDf.set_index('planet_id')
lablels = labelDf.copy()

meanTarget = np.mean(labelDf.mean())
stdTarget = np.std(labelDf.std())
maxTarget = np.max(labelDf.max())
minTarget = np.min(labelDf.min())

for col in labelDf.columns:
    labelDf.loc[:,col] = (labelDf[col]) / (maxTarget)

In [15]:
tf.random.set_seed(42)
files = glob.glob(os.path.join('train/', '*/*'))
stars = []
for file in files:
    file_name = file.split('\\')[1]
    stars.append(file_name)
stars = np.unique(stars)

import random
random.seed(42)

def split_star_list(file_list, test_ratio=0.2):
    random.shuffle(file_list)
    split_index = int(len(file_list) * (1 - test_ratio))
    train_files = file_list[:split_index]
    test_files = file_list[split_index:]
    return train_files, test_files

train_stars, test_stars = split_star_list(stars)

# normalize over time and all samples, so we have a mean and a std dev per wavelength for all samples
def calcMeanAndStdOfTrain(train_stars):
    i = 0
    for star in train_stars:
        file_path = 'train/'+str(star)+'/combined.npz'
        with np.load(file_path) as data:
            x = data['a'][0,:,0:283,:]
            if i ==0:
                mean = np.mean(x,axis=(0))
                sumS = np.sum(x**2,axis=0)
            else:
                mean = mean + np.mean(x, axis=(0))
                sumS += np.sum(x**2,axis=0)
            i=i+1
    meanTrain = mean / i
    stdTrain = np.sqrt(sumS / (i*x.shape[0]) - meanTrain**2)    
    return meanTrain, stdTrain
#meanTrain, stdTrain = calcMeanAndStdOfTrain(train_stars)

def normalize_over_train(features, labels):
    features = (features - meanTrain) / (stdTrain + 1e-6)
    return features, labels

# normalize over time per samples, so we have a mean and a std dev per wavelength for all samples
def calcMeanAndStdOfTrainPerStar(x):
    mean = np.mean(x,axis=(0))
    sumS = np.sum(x**2,axis=0)
    stdTrain = np.sqrt(sumS / (x.shape[0]) - mean**2)    
    return mean, stdTrain
def normalize_per_sample(features, labels):
    m,s = calcMeanAndStdOfTrainPerStar(features)
    features = (features) / (s + 1e-6)
    return features, labels




def load_npz(star):
    integer_value = tf.strings.to_number(star, out_type=tf.int64)
    python_int = integer_value.numpy()

    file_path = 'train/'+str(python_int)+'/combined.npz'
    try:
        with np.load(file_path) as data:
            features = data['a'][0,:,0:283,:]
            labels = labelDf.loc[python_int].to_numpy()
            features = np.reshape(features,(-1,25,283,4))
            features = np.mean(features,axis=1)
            #features, labels = normalize_per_sample(features,labels)
            features, labels = normalize_over_train(features,labels)
            return features, labels
    except Exception as e:
        print("Error loading file:", e, python_int)
    

def create_dataset(star_list, batch_size, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices(star_list)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(star_list))
    def load_and_process(x):
        features, labels = tf.py_function(
            func=load_npz,
            inp=[x],
            Tout=[tf.float64, tf.float32]
        )
        return features, labels

    dataset = dataset.map(load_and_process, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda x, y: (tf.ensure_shape(x,tf.TensorShape([225, 283, 4])), tf.ensure_shape(y, tf.TensorShape([283])))) #5625
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset


In [16]:
meanTarget

0.0025517145902829814

In [24]:
#np.savez('helpers.npz', meanTrain=meanTrain, stdTrain=stdTrain,max=max,min=min,std=std,mean=mean)

In [18]:
loaded = np.load('helpers.npz')
meanTrain=loaded['meanTrain']
stdTrain=loaded['stdTrain']
maxTarget=loaded['max']
minTarget=loaded['min']
stdTarget=loaded['std']
meanTarget=loaded['mean']
meanTarget

array(0.00255171)

In [19]:
tf.random.set_seed(42)
batch_size = 12

train_dataset = create_dataset(train_stars, batch_size, shuffle=True)
test_dataset = create_dataset(test_stars, batch_size, shuffle=False)

# CNN

In [20]:
timepoints = 225
representations = 4
wavelengths = 283
targetWavelengths = 283

class Reshape1(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        x = tf.transpose(x, perm=[0,2,1,3])
        #x = tf.reshape(x, [-1, self.timepoints, tf.cast(self.wavelengths * self.representations, tf.int32)])
        return x
    
class Reshape11(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        x = tf.transpose(x, perm=[0,2,1])
        #x = tf.reshape(x, [-1, self.timepoints, tf.cast(self.wavelengths * self.representations, tf.int32)])
        return x

class Reshape2(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x_pred, x_confidence):
        x = tf.concat([x_pred, x_confidence], axis = -1)
        
        return x
    
class Reshape22(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x_pred, x_confidence):
        x_pred = tf.expand_dims(x_pred, axis=-1)
        x_confidence = tf.expand_dims(x_confidence, axis=-1)
        x = tf.concat([x_pred, x_confidence], axis = -1)
        return x
    
class reduce(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        mean = tf.reduce_sum(x,axis=-1)
        mean = tf.expand_dims(mean, axis=-1)
        return mean
class reduce1(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        mean = tf.reduce_sum(x,axis=-1)
        return mean

def fcnM(outputDim = 283):
    inp = tf.keras.Input(shape=(timepoints, wavelengths, representations))
    x = inp[:,:,:,0]
    #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(283, activation='relu')(x)#, kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    
    #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)
    x = tf.keras.layers.Dense(283, activation='relu')(x)#, kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)
    x_pred = tf.keras.layers.Dense(283, activation='linear')(x)
    mean = tf.keras.layers.Dense(1,activation='linear')(x)
    x_pred = x_pred+mean
    x_confidence = tf.keras.layers.Dense(283, activation='linear')(x)
    x = Reshape22()(x_pred, x_confidence)

    model = tf.keras.Model(inp, x)
    return model

def cnnM(outputDim = 283):
    inp = tf.keras.Input(shape=(timepoints, wavelengths, representations))
    x = inp[:,:,:,0]
    #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)

    #mean = reduce1()(x)
    #mean = tf.keras.layers.BatchNormalization(epsilon=1e-6)(mean)
    #mean = tf.keras.layers.Dense(1000)(mean)
    #mean = tf.keras.layers.BatchNormalization(epsilon=1e-6)(mean)
    #mean = tf.keras.layers.Dense(100)(mean)
    #mean = tf.keras.layers.Dense(1,activation='linear')(mean)

    #x = Reshape11()(x)
    dim = timepoints
    #x = tf.keras.layers.Conv1D(filters=wavelengths, kernel_size=(5), padding='valid')(x)
    #x = tf.keras.layers.Conv1D(filters=wavelengths, kernel_size=(50), padding='valid')(x)
    for i in range(3):
        x = tf.keras.layers.Conv1D(filters=wavelengths, kernel_size=(5), padding='valid')(x)
        x = tf.keras.layers.AveragePooling1D(2)(x)
        #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)
    #x = tf.keras.layers.Conv1D(filters=wavelengths, kernel_size=(600), padding='valid')(x)
    #x = tf.keras.layers.Conv1D(filters=wavelengths, kernel_size=(20), padding='valid')(x)
    #x = tf.keras.layers.MaxPooling1D(50)(x) 
    
    #x = ReduceDim()(x)
    #x = tf.keras.layers.Flatten()(x)
    #x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)
    x = Reshape11()(x)
    x = tf.keras.layers.Dense(100)(x)
    x = tf.keras.layers.Flatten()(x)
    #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)
    x = tf.keras.layers.Dense(1000)(x)
    #x = tf.keras.layers.BatchNormalization(epsilon=1e-6)(x)
    mean = tf.keras.layers.Dense(1,activation='relu')(x)
    x_pred = tf.keras.layers.Dense(283, activation='linear')(x)
    x_pred = x_pred+mean
    x_confidence = tf.keras.layers.Dense(283, activation='linear')(x)
    x = Reshape22()(x_pred, x_confidence)

    model = tf.keras.Model(inp, x)
    return model

def cnnMean(outputDim = 283):
    inp = tf.keras.Input(shape=(timepoints, wavelengths, representations))
    x = inp[:,:,:,0]
    #x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)

    mean = reduce()(x)
    #mean = tf.keras.layers.BatchNormalization(epsilon=1e-6)(mean)
    filters=32
    for i in range(3):
        mean = tf.keras.layers.Conv1D(filters=filters, kernel_size=(10), padding='valid')(mean)
        mean = tf.keras.layers.AveragePooling1D(3)(mean) 
        filters = filters*2
    mean = tf.keras.layers.Flatten()(mean)
    #mean = tf.keras.layers.LayerNormalization(epsilon=1e-6)(mean)
    mean = tf.keras.layers.Dense(1000)(mean)
    mean = tf.keras.layers.Dense(1,activation='linear')(mean)
    model = tf.keras.Model(inp, mean)
    return model

def fcnMean(outputDim = 283):
    inp = tf.keras.Input(shape=(timepoints, wavelengths, representations))
    x = inp[:,:,:,0]
    #x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)

    mean = reduce1()(x)
    #mean = tf.keras.layers.LayerNormalization(epsilon=1e-6)(mean)
    mean = tf.keras.layers.Dense(1000)(mean)
    mean = tf.keras.layers.Dense(1,activation='linear')(mean)
    #mean = mean[:,0]
    model = tf.keras.Model(inp, mean)
    return model


model = cnnM() 
#model = buildTransfModel()
model.summary()

In [21]:
model.load_weights('320_epochs_logLoss_8_12.weights.h5')

In [22]:
batch = next(iter(train_dataset))
out = model(batch[0])
test_batch = next(iter(test_dataset))
batch[0].dtype ,batch[1].dtype, out.dtype,batch[0].shape ,batch[1].shape, out.shape

(tf.float64,
 tf.float32,
 tf.float32,
 TensorShape([12, 225, 283, 4]),
 TensorShape([12, 283]),
 TensorShape([12, 283, 2]))

In [35]:
def log_likelihood_zScoreTarget(y_trueZScore, y_pred):
    # stdDev_zScorePred = 1/n * sqrt((y_zScore - y_zScoreMean)^2) = 1/n *sqrt(sum( (y-mean)/std - (y_mean-mean)/std )^2) = 1/n * sqrt(sum( (y-y_mean)/std )^2 )) = 1/std * 1/n * sqrt(sum(y-y_mean)^2) = stdDev / std
    # stdDev_zScorePred = stdDev_pred / std
    # y_pred contains 1. y_zScore 2. log(stdDev_zScore)

    y_true = y_trueZScore * stdTarget + meanTarget   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_predZScore = y_pred[:, :,0]
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev_zScore) = log(stdDev / std) = log(stdDev) - log(std) -> log(stdDev) = log(stdDev_zScore) + log(std)

    y_pred0 = y_predZScore * stdTarget + meanTarget
    stdDev = tf.exp(log_sigma)*stdTarget  # Exponentiate to get variance + scale back from zscore 
    logStdDev = log_sigma + tf.math.log(stdTarget)

    L_pred = -0.5*(tf.math.log(2*np.pi) + logStdDev + tf.square(y_true - y_pred0) / stdDev)
    L_ref = -0.5*(tf.math.log(2*np.pi) +  tf.math.log(stdTarget*stdTarget) + tf.square(y_trueZScore))   # ( (y_true - mean)/std )^2 = y_trueZScore^2  (y_true = y_trueZScore * std + mean)
    L_ideal = -0.5*(tf.math.log(2*np.pi) + tf.math.log(1e-10))

    L = (tf.reduce_sum(L_pred) -tf.reduce_sum(L_ref)) / (tf.reduce_sum(L_ideal)*283*5625 - tf.reduce_sum(L_ref))
    
    return L

def log_likelihood_maxScaling(y_trueMax, y_pred):
    # stdDev_zScorePred = 1/n * sqrt((y_zScore - y_zScoreMean)^2) = 1/n *sqrt(sum( (y-mean)/std - (y_mean-mean)/std )^2) = 1/n * sqrt(sum( (y-y_mean)/std )^2 )) = 1/std * 1/n * sqrt(sum(y-y_mean)^2) = stdDev / std
    # stdDev_zScorePred = stdDev_pred / std
    # y_pred contains 1. y_zScore 2. log(stdDev_zScore)

    y_true = y_trueMax * maxTarget #std + mean   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_predMax = y_pred[:, :,0]
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev_zScore) = log(stdDev / std) = log(stdDev) - log(std) -> log(stdDev) = log(stdDev_zScore) + log(std)

    y_pred0 = y_predMax *maxTarget #* std + mean
    sigma = tf.exp(log_sigma)*maxTarget  # Exponentiate to get variance + scale back from zscore 
    logStdDev = tf.math.log(sigma)# + tf.math.log(max)

    L_pred = -0.5*(tf.math.log(2*np.pi) + logStdDev + tf.square(y_true - y_pred0) / sigma)
    L_ref = -0.5*(tf.math.log(2*np.pi) +  tf.math.log(stdTarget*stdTarget) + tf.square((y_true - meanTarget)/stdTarget))   # ( (y_true - mean)/std )^2 = y_trueZScore^2  (y_true = y_trueZScore * std + mean)
    L_ideal = -0.5*(tf.math.log(2*np.pi) + tf.math.log(1e-10))

    L = (tf.reduce_sum(L_pred) -tf.reduce_sum(L_ref)) / (tf.reduce_sum(L_ideal)*283*5625 - tf.reduce_sum(L_ref))
    
    return L

def log_likelihood_maxScalingComp(y_trueMax,y_predMax):
    y_true = y_trueMax * maxTarget #std + mean   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_pred = y_predMax[:, :,0]*maxTarget
    log_sigma = y_predMax[:, :,1]
    sigma_pred = np.sqrt(tf.exp(log_sigma)*maxTarget)
    sigma_true = 1e-5 # or 1e-5?

    GLL_pred = np.sum(scipy.stats.norm.logpdf(y_true, loc=y_pred, scale=sigma_pred))
    GLL_true = np.sum(scipy.stats.norm.logpdf(y_true, loc=y_true, scale=sigma_true * np.ones_like(y_true)))
    GLL_mean = np.sum(scipy.stats.norm.logpdf(y_true, loc=meanTarget * np.ones_like(y_true), scale=stdTarget * np.ones_like(y_true)))

    submit_score = (GLL_pred - GLL_mean)/(GLL_true - GLL_mean)
    return submit_score
#log_likelihood_zScoreTarget(batch[1], out)
#log_likelihood_maxScaling(batch[1], out)
log_likelihood_maxScalingComp(batch[1],out)

0.9988300237125592

In [None]:
epochs=300
LR_SCHEDULE = [0.01*((np.cos(step/epochs *np.pi) if np.cos(step/epochs*np.pi)>0.001 else np.sin(step/epochs*np.pi))) for step in range(epochs)]
plt.figure()
plt.plot(LR_SCHEDULE)
plt.show()

In [8]:
def loss_logSigma(y_true_zScore, y_pred):
    y_predZScore = y_pred[:, :,0]  # y_zScore = (y - mean)/std
    loss = tf.math.abs(y_true_zScore - y_predZScore)#tf.math.abs(y_true_zScore-y_predZScore)

    is_large = tf.reduce_any(tf.greater(y_pred[:, :,1], 5.0))
    #tf.print(y_pred[:,:,1])
    def true_fn():
        print("Tensor contains large values")
        return y_pred[:,:,1] + tf.math.exp(5.0)
    
    def false_fn():
        return tf.math.exp(y_pred[:,:,1])

    logConfidence = tf.cond(is_large, true_fn,false_fn)
    loss_log = tf.math.abs(loss-(logConfidence))
    l = tf.reduce_sum(loss, axis=-1) + tf.reduce_sum(loss_log,axis=-1)
    return l

def loss_abs(y_true_zScore, y_pred):
    y_predZScore = y_pred[:, :,0]
    logConfidence = y_pred[:, :,1]
    loss = tf.math.abs(y_true_zScore - y_predZScore)
    loss_2 = tf.math.abs(loss-(logConfidence))
    l = tf.reduce_sum(loss, axis=-1) + tf.reduce_sum(loss_2,axis=-1)
    return l 


def loss_mae(y_true_zScore,y_pred):
    y_true_meanVal = tf.math.reduce_mean(y_true_zScore,axis=1)
    y_pred = y_pred[:,0]
    absVal = tf.math.abs(y_true_meanVal - y_pred)
    m = tf.reduce_mean(absVal,axis=0)
    return m



class DynamicLoss(tf.keras.losses.Loss):
    def __init__(self):
        super().__init__()
        self.epoch = tf.Variable(0, trainable=False, dtype=tf.int32)

    def call(self, y_true, y_pred):
        if tf.less(self.epoch, 5):
            return loss_abs(y_true, y_pred)
        else:
            return loss_logSigma(y_true, y_pred)

    def on_epoch_end(self):
        self.epoch.assign_add(1)

In [9]:
tf.random.set_seed(42)


lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=0)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#model.compile(loss=loss_logSigma,metrics=[log_likelihood_maxScaling], optimizer=optimizer)
model.compile(loss=loss_logSigma,metrics=[log_likelihood_maxScaling], optimizer=optimizer)

history = model.fit(train_dataset, 
                    #batch[0],batch[1], #verbose=2,
                    validation_data=test_dataset,
                    #validation_data=(test_batch[0],test_batch[1]),
                    epochs=120, batch_size=batch_size,
                    #callbacks=[lr_callback]
                    )


Epoch 1/120
Tensor contains large values
Tensor contains large values
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 276ms/step - log_likelihood_max_scaling: nan - loss: 84840.0000Tensor contains large values
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 338ms/step - log_likelihood_max_scaling: nan - loss: 84488.7891 - val_log_likelihood_max_scaling: -1742024343552.0000 - val_loss: 40339.0000
Epoch 2/120
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 340ms/step - log_likelihood_max_scaling: nan - loss: 34832.3125 - val_log_likelihood_max_scaling: nan - val_loss: 3395.9048
Epoch 3/120
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 348ms/step - log_likelihood_max_scaling: nan - loss: 2464.3582 - val_log_likelihood_max_scaling: nan - val_loss: 319.0421
Epoch 4/120
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 435ms/step - log_likelihood_max_scaling: nan - loss: 258.0143 - val_log_likelihood_max_scal

In [18]:
model.save('320_epochs_logLoss_8_12.keras')
# Save weights
model.save_weights('320_epochs_logLoss_8_12.weights.h5')

# Load weights
#loaded_weights = model.load_weights('170_epochs_accLoss_reluActivation_23_23.weights.h5')

In [None]:
loaded_model = tf.keras.models.load_model('120_epochs_accLoss31_30.keras')

In [20]:
history = model.fit(train_dataset, 
                    #batch[0],batch[1], #verbose=2,
                    validation_data=test_dataset,
                    #validation_data=next(iter(test_dataset)),
                    epochs=50, batch_size=batch_size)

Epoch 1/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 320ms/step - log_likelihood_max_scaling: 0.6713 - loss: 7.6528 - val_log_likelihood_max_scaling: 0.6833 - val_loss: 12.6153
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 411ms/step - log_likelihood_max_scaling: 0.6823 - loss: 11.0545 - val_log_likelihood_max_scaling: 0.6833 - val_loss: 12.1805
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 409ms/step - log_likelihood_max_scaling: 0.6940 - loss: 7.4634 - val_log_likelihood_max_scaling: 0.6833 - val_loss: 13.9168
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 404ms/step - log_likelihood_max_scaling: 0.6931 - loss: 8.9968 - val_log_likelihood_max_scaling: 0.6833 - val_loss: 12.9837
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 413ms/step - log_likelihood_max_scaling: 0.6776 - loss: 8.2306 - val_log_likelihood_max_scaling: 0.6833 - val_loss: 15.9271

In [None]:
# first try couldn't fit the values, just predicted mean if I kept the shape (output layer of shape 1 - tensor 283x100 -> 283x1)
# having a flatten layer between converges

# flatten layer and 12 samples -> predict the same for all 12 samples, maybe not enough filters

# PROBLEM why we can't fit multiple targets: layer normalization!! use batch norm instead

#---- with batch norm
# cnn model + mean estimation, loss ~80, but predicting differnt mean
# fcn model, loss ~81
# fcn model / min scaling -> loss 0.8 / 27 (lots of negative predictions)
# fcn model / max scaling / relu activation -> 3.1/8 (lots of 0 predictions) / with scale of 100, loss =14.9/43691
# cnn model / max scaling / mean pred -> 6.0/inf
# cnn ... no layer norm in beginning -> 15

# loss function for every output (batch,283) / 100 epochs
# cnn 1.5 loss
# cnn with smaller LR 0.22(also after 200 epochs)
# cnn with separate mean prediction loss 20.5 (lr0.0001) vs 3.5(lr0.0005) / can't even fit 2 samples (0.5 for lr 0.0005)

# cnn without mean prediction (2 samples, lr0.0005) 22.4   / lr0.001 0.4 loss, but targets still fit badly / only fitting target noVar 0.08 still bad

# difference between train / test = batch norm has significant effect here
#fcn + mean, 2 samples LR0.0005 -> 
#fcn + mean, 2 samples only loss on target -> 
#fcn + mean, 1 sample, only loss on target -> 0.03 targets are far off
#fcn, 1 sample, only loss on target -> 0.4 targets are far off
# -> train data was not normalized!!

# with regularization / without regularization doesn't matter that much as long as sample is normalized
# normalization per sample -> predict the same for all targets ~0.0978
# norm per sample + bis estimation -> predict same for all targets (besides 1) ~0.0978

# with learning rate schedule -> 0.06 lots more possible to not get stuck in local minima

#cnn / norm over train / bias estimation / lr0.01 / only target -> ~45 sum loss
#cnn / norm over train / bias estimation / lf0.01 / target + loss2 -> ~47 after 95 epochs (15 after~150epochs)

#cnn / norm over train / bais est / lr0.01 / target + loss / activation function relu instead of linear (conf + bias / still nan bc stddev =0, log(0) = nan)
# 39/40 but training seems to be a lot more stable
# after 170 epochs 23.7/23.5
# after 220 epochs 11/19 (but already went down to 14/16)
# after 250 epochs 12/16 (but already 16/15)
# after 300 epochs 12/17

# log loss
#after 120 epochs -> 80/59 
#after 170 epochs -> 30/37  (30/27 before)  / 0.67/0.68 log likelihood
# 17.7/17.5 after 177 epochs (best so far)
# 220 -> 83/79
# 270 -> 26/29
# 320 -> 8.7/13
# 350 -> 8.9/12.5 (was already 11)


# Assuming 'history' is your model's training history
train_loss = history.history['loss']
test_loss = history.history['val_loss']

epochs = range(1, len(train_loss) + 1)

plt.figure(figsize=(12, 6))
plt.plot(epochs, train_loss, 'b', label='Training loss')
#plt.plot(epochs, test_loss, 'r', label='Test loss')
plt.title('Training and Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# eval

In [22]:
#pred = model.predict(normData)
pred = model.predict(test_batch[0])
pred[:,0:2,0], test_batch[1][:,0:2] ,np.sqrt(np.exp(pred[:,0:2,1]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


(array([[0.06610027, 0.08522013],
        [0.19229797, 0.20303626],
        [0.273583  , 0.31230897],
        [0.15744403, 0.12355568],
        [0.21963206, 0.221281  ],
        [0.59530675, 0.58689016],
        [0.43527368, 0.46160156],
        [0.27138215, 0.2528267 ],
        [0.20721841, 0.22699125],
        [0.5539123 , 0.60307556],
        [0.18370745, 0.1905589 ],
        [0.4288279 , 0.45899945]], dtype=float32),
 <tf.Tensor: shape=(12, 2), dtype=float32, numpy=
 array([[0.09698724, 0.09683569],
        [0.20825225, 0.21028523],
        [0.2878623 , 0.2889837 ],
        [0.14158063, 0.14114746],
        [0.18724738, 0.19135208],
        [0.5811523 , 0.58253735],
        [0.43278694, 0.4439398 ],
        [0.24758054, 0.248844  ],
        [0.1983679 , 0.19857608],
        [0.5931966 , 0.5926916 ],
        [0.18650211, 0.18677506],
        [0.4276639 , 0.44121537]], dtype=float32)>,
 array([[0.11075503, 0.11957327],
        [0.11075962, 0.12040351],
        [0.11317819, 0.12182653

In [36]:
aggScore = []
for x,y in test_dataset:
    pred = model.predict(x)
    aggScore.append(log_likelihood_maxScalingComp(y,pred))
print(aggScore)
print(np.mean(aggScore))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[0.9989527189602132, 0.9995865162658214, 0.9994470712971446, 0.9995566807314736, 0.9996655509799878, 0.9994467575229277, 0.9992905453906

In [None]:
print('overall',(loss_fn0(batch[1],pred)))
for i in range(batch_size):
    print(f'batch {i}',np.mean(loss_fn0(batch[1][i,:],pred[i:i+1,:,:])))

In [None]:
fig = go.Figure()
for i in range(4):#[2,6,10,20,100]:
    fig.add_trace(go.Scatter(y=batch[0][i,:,0,0],mode='markers',name=f'f_{i}',marker=dict(size=3)))

fig.show()

In [None]:
fig = go.Figure()
for i in range(12):#[2,6,10,20,100]:
    fig.add_trace(go.Scatter(y=batch[1][i,:],mode='markers',name=f'gt_{i}',marker=dict(size=3)))
    fig.add_trace(go.Scatter(y=pred[i,:,0],mode='markers',name=f'pred_{i}',marker=dict(size=3)))

fig.show()

# sample pred

In [79]:
predArray = []
for x,y in test_dataset:
    pred = model.predict(x)
    #transform labels back to real domain
    scaledPred = pred[:,:,0]*maxTarget
    stdDev = (np.exp(pred[:,:,1])*maxTarget)

    #concatenate to df
    arr = np.concatenate((scaledPred,stdDev),axis=1)
    df = pd.DataFrame(arr, columns=sampleSub.columns[1:])
    predArray.append(df)
predArray = pd.concat(predArray)
predArray

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


Unnamed: 0,wl_1,wl_2,wl_3,wl_4,wl_5,wl_6,wl_7,wl_8,wl_9,wl_10,...,sigma_274,sigma_275,sigma_276,sigma_277,sigma_278,sigma_279,sigma_280,sigma_281,sigma_282,sigma_283
0,0.000315,0.000864,0.000797,0.000839,0.000543,0.000735,0.000598,0.000845,0.000754,0.000764,...,0.000199,0.000329,0.000255,0.000212,0.000227,0.000246,0.000360,0.000213,0.000244,0.000215
1,0.001416,0.001582,0.001860,0.001760,0.001857,0.001891,0.002271,0.001700,0.001577,0.001469,...,0.000191,0.000290,0.000253,0.000222,0.000223,0.000252,0.000348,0.000208,0.000256,0.000207
2,0.002138,0.002097,0.002545,0.002326,0.002379,0.002744,0.002619,0.002064,0.002173,0.002228,...,0.000175,0.000246,0.000236,0.000180,0.000196,0.000209,0.000289,0.000144,0.000204,0.000182
3,0.001158,0.000841,0.001297,0.001200,0.000946,0.001285,0.000659,0.000958,0.000904,0.000804,...,0.000187,0.000331,0.000261,0.000218,0.000219,0.000245,0.000339,0.000222,0.000229,0.000200
4,0.000951,0.001538,0.001928,0.001886,0.001514,0.002018,0.001528,0.001377,0.001309,0.001330,...,0.000186,0.000332,0.000241,0.000208,0.000218,0.000231,0.000319,0.000200,0.000228,0.000219
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,-0.000136,0.000484,0.000342,0.000329,0.000231,0.000361,0.000522,0.000250,0.000114,0.000062,...,0.000187,0.000339,0.000241,0.000198,0.000222,0.000230,0.000347,0.000207,0.000233,0.000227
11,0.001228,0.001475,0.001218,0.001347,0.001539,0.001560,0.001749,0.001230,0.001322,0.001155,...,0.000192,0.000310,0.000255,0.000227,0.000223,0.000248,0.000351,0.000208,0.000251,0.000201
0,0.001852,0.001961,0.002129,0.002086,0.002074,0.002334,0.001974,0.001915,0.001881,0.001839,...,0.000163,0.000236,0.000214,0.000166,0.000174,0.000193,0.000270,0.000136,0.000180,0.000167
1,0.001126,0.001983,0.001753,0.001639,0.001841,0.001872,0.002128,0.001472,0.001460,0.001487,...,0.000201,0.000332,0.000259,0.000228,0.000234,0.000254,0.000353,0.000215,0.000257,0.000230


In [74]:
testLabels = lablels.loc[[int(star) for star in test_stars]]
testLabels

Unnamed: 0_level_0,wl_1,wl_2,wl_3,wl_4,wl_5,wl_6,wl_7,wl_8,wl_9,wl_10,...,wl_274,wl_275,wl_276,wl_277,wl_278,wl_279,wl_280,wl_281,wl_282,wl_283
planet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1949187031,0.000775,0.000774,0.000774,0.000774,0.000774,0.000774,0.000774,0.000774,0.000774,0.000774,...,0.000790,0.000790,0.000790,0.000790,0.000789,0.000789,0.000789,0.000789,0.000789,0.000788
3875101730,0.001664,0.001681,0.001679,0.001676,0.001679,0.001674,0.001674,0.001680,0.001678,0.001675,...,0.001689,0.001689,0.001688,0.001688,0.001687,0.001687,0.001686,0.001686,0.001685,0.001684
4247918843,0.002301,0.002310,0.002307,0.002301,0.002302,0.002300,0.002298,0.002298,0.002298,0.002297,...,0.002319,0.002318,0.002317,0.002317,0.002319,0.002320,0.002320,0.002319,0.002318,0.002316
1012051641,0.001132,0.001128,0.001128,0.001127,0.001127,0.001127,0.001127,0.001128,0.001128,0.001128,...,0.001145,0.001145,0.001144,0.001144,0.001144,0.001144,0.001144,0.001144,0.001143,0.001143
612015401,0.001496,0.001529,0.001530,0.001530,0.001529,0.001525,0.001527,0.001533,0.001533,0.001533,...,0.001487,0.001487,0.001487,0.001487,0.001487,0.001487,0.001487,0.001487,0.001487,0.001487
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2320457425,0.000440,0.000445,0.000445,0.000443,0.000445,0.000443,0.000442,0.000443,0.000442,0.000442,...,0.000452,0.000451,0.000451,0.000451,0.000451,0.000451,0.000451,0.000450,0.000450,0.000450
2556934812,0.001366,0.001529,0.001513,0.001490,0.001533,0.001506,0.001485,0.001503,0.001480,0.001468,...,0.001349,0.001350,0.001352,0.001352,0.001349,0.001346,0.001346,0.001348,0.001348,0.001345
1121250116,0.001866,0.001857,0.001852,0.001846,0.001846,0.001843,0.001842,0.001844,0.001843,0.001841,...,0.001900,0.001899,0.001897,0.001897,0.001899,0.001900,0.001900,0.001899,0.001897,0.001896
1615603560,0.001392,0.001501,0.001490,0.001473,0.001502,0.001485,0.001471,0.001483,0.001470,0.001464,...,0.001577,0.001575,0.001574,0.001572,0.001570,0.001569,0.001567,0.001565,0.001561,0.001557


In [80]:
import numpy as np
import pandas as pd
import pandas.api.types
import scipy.stats


class ParticipantVisibleError(Exception):
    pass
def score(
        solution: pd.DataFrame,
        submission: pd.DataFrame,
        naive_mean: float,
        naive_sigma: float,
        sigma_true: float
    ) -> float:

    if submission.min().min() < 0:
        raise ParticipantVisibleError('Negative values in the submission')
    for col in submission.columns:
        if not pandas.api.types.is_numeric_dtype(submission[col]):
            raise ParticipantVisibleError(f'Submission column {col} must be a number')

    n_wavelengths = len(solution.columns)
    if len(submission.columns) != n_wavelengths*2:
        raise ParticipantVisibleError('Wrong number of columns in the submission')

    y_pred = submission.iloc[:, :n_wavelengths].values
    # Set a non-zero minimum sigma pred to prevent division by zero errors.
    sigma_pred = np.clip(submission.iloc[:, n_wavelengths:].values, a_min=10**-15, a_max=None)
    y_true = solution.values

    GLL_pred = np.sum(scipy.stats.norm.logpdf(y_true, loc=y_pred, scale=sigma_pred))
    GLL_true = np.sum(scipy.stats.norm.logpdf(y_true, loc=y_true, scale=sigma_true * np.ones_like(y_true)))
    GLL_mean = np.sum(scipy.stats.norm.logpdf(y_true, loc=naive_mean * np.ones_like(y_true), scale=naive_sigma * np.ones_like(y_true)))

    submit_score = (GLL_pred - GLL_mean)/(GLL_true - GLL_mean)
    return float(np.clip(submit_score, 0.0, 1.0))

In [81]:
predArray = predArray.clip(lower=0)
score(testLabels, predArray, naive_mean=meanTarget, naive_sigma=stdTarget*stdTarget, sigma_true=1e-10)

0.9999999999998621

: 