In [1]:
import os
import glob
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import scipy

In [None]:
labelDf0 = pd.read_csv("train_labels.csv")
labelDf0 = labelDf0.set_index('planet_id')
labelDf0

# data

In [2]:
tf.random.set_seed(42)
files = glob.glob(os.path.join('train/', '*/*'))
stars = []
for file in files:
    file_name = file.split('\\')[1]
    stars.append(file_name)
stars = np.unique(stars)

import random
random.seed(42)

def split_star_list(file_list, test_ratio=0.6):
    random.shuffle(file_list)
    split_index = int(len(file_list) * (1 - test_ratio))
    train_files = file_list[:split_index]
    test_files = file_list[split_index:]
    return train_files, test_files

train_stars, test_stars = split_star_list(stars)

labelDf = pd.read_csv("train_labels.csv")
labelDf = labelDf.set_index('planet_id')
meanLabels = np.mean(labelDf.mean())
stdLabels = np.std(labelDf.std())
maxLabels = np.max(labelDf.max())
minLabels = np.min(labelDf.min())

trainLabels = labelDf.loc[[int(star) for star in train_stars]]
meanTrainLabels = np.mean(trainLabels.mean())
stdTrainLabels = np.std(trainLabels.std())
maxTrainLabels = np.max(trainLabels.max())
minTrainLabels = np.min(trainLabels.min())

for col in labelDf.columns:
    labelDf.loc[:,col] = (labelDf[col]) / (maxTrainLabels)

# normalize over time and all samples, so we have a mean and a std dev per wavelength for all samples
def calcMeanAndStdOfTrain(train_stars):
    i = 0
    for star in train_stars:
        file_path = 'train/'+str(star)+'/combined.npz'
        with np.load(file_path) as data:
            x = data['a'][0,:,0:283,:]
            if i ==0:
                mean = np.mean(x,axis=(0))
                sumS = np.sum(x**2,axis=0)
            else:
                mean = mean + np.mean(x, axis=(0))
                sumS += np.sum(x**2,axis=0)
            i=i+1
    meanTrain = mean / i
    stdTrain = np.sqrt(sumS / (i*x.shape[0]) - meanTrain**2)    
    return meanTrain, stdTrain
meanTrain, stdTrain = calcMeanAndStdOfTrain(train_stars)

def normalize_over_train(features, labels):
    features = (features - meanTrain) / (stdTrain + 1e-6)
    return features, labels

# normalize over time per samples, so we have a mean and a std dev per wavelength for all samples
def calcMeanAndStdOfTrainPerStar(x):
    mean = np.mean(x,axis=(0))
    sumS = np.sum(x**2,axis=0)
    stdTrain = np.sqrt(sumS / (x.shape[0]) - mean**2)    
    return mean, stdTrain
def normalize_per_sample(features, labels):
    m,s = calcMeanAndStdOfTrainPerStar(features)
    features = (features) / (s + 1e-6)
    return features, labels




def load_npz(star):
    integer_value = tf.strings.to_number(star, out_type=tf.int64)
    python_int = integer_value.numpy()

    file_path = 'train/'+str(python_int)+'/combined.npz'
    try:
        with np.load(file_path) as data:
            features = data['a'][0,:,0:283,:]
            labels = labelDf.loc[python_int].to_numpy()
            features = np.reshape(features,(-1,25,283,4))
            features = np.mean(features,axis=1)
            #features, labels = normalize_per_sample(features,labels)
            features, labels = normalize_over_train(features,labels)
            return features, labels
    except Exception as e:
        print("Error loading file:", e, python_int)
    

def create_dataset(star_list, batch_size, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices(star_list)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(star_list))
    def load_and_process(x):
        features, labels = tf.py_function(
            func=load_npz,
            inp=[x],
            Tout=[tf.float64, tf.float32]
        )
        return features, labels

    dataset = dataset.map(load_and_process, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda x, y: (tf.ensure_shape(x,tf.TensorShape([225, 283, 4])), tf.ensure_shape(y, tf.TensorShape([283])))) #5625
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset


In [24]:
np.savez('helpers_origiData.npz',meanTrain=meanTrain, stdTrain=stdTrain,meanLabels=meanLabels,stdLabels=stdLabels,maxTrainLabels=maxTrainLabels)

In [3]:
tf.random.set_seed(42)
batch_size = 64

train_dataset = create_dataset(train_stars, batch_size, shuffle=True)
test_dataset = create_dataset(test_stars, batch_size, shuffle=False)

In [None]:
trainStars=[821591986,3213637048,2816803663,3875726419,2020870068,553530207,3584299946,3082702832,497611894,396266265,3421574015,1594641765,2250572533,17002355,2066433619,24135240,598943473,3943521076,1907037428,3464855505,1528112893,2737173563,3957394378,2346223738,1949835946,3342950340,1946982092,265537422,235073610,2890766898,2791509259,2675006243,1362719118,1161253140,2061358467,2731744229,2026728872,3247773632,3603727260,2999629755,782689910,3838183950,1240300219,2325932273,3257778880,3163168710,1851017591,1093580069,1318438045,2561946317,3283934223,1806781124,636190362,312842524,3501979807,1438611471,453007076,1539890953,4055463712,2911173444,3074485005,2016150489,1653686448,2720835155,2887450026,2762956949,4094260407,3625059830,1583438553,4046238097,3143365022,2452762851,2074815590,1508325713,3247461698,640765770,3267648621,229701106,1388423740,1071978212,1859732941,4093882471,3861091963,2027199735,112782545,772915060,2895446126,2790829338,966155807,4013505860,3290009033,3866291478,2074140845,1656580533,1839665471,3724419524,2780013210,1340895569,1055599128,1579203925,1981364273,1495061258,3760333362,3032660829,773356318,3959779372,3103511751,93311500,1901457679,2132729888,4156643187,2253206868,2206519853,3685062779,3391288154,3137638902,1965638805,377187848,3952673580,1101707909,2465743719,887347899,1114710037,1803632865,2758726928,3482461315,1604027523,3937238283,2364740784,1971619027,2503200730,2894958665,1540575349,1990548959,3689192911,1867440938,3034529013,3796624928,1723375224,2982016260,4176562407,3475796311,3101987149,3642128364,3214557574,3109601437,1963502034,3892132996,2811219064,2633183716,1609759068,2570413614,1909239671,1068095157,1693505406,3885613633,3671097679,4011881058,998289603,3960808611,1993590741,239407592,1720388855,463235157,3274607066,3983854782,2233970670,1012571429,2894861895,193269369,3187674344,2941815911,1080605842,1439196998,210689998,2545226729,749245819,1542505990,3117487606,1429910325,1944230894,4154084890,1060351656,3779392391,168297458,3932146472,3035295868,2276622978,1842626173,1592156923,1885744867,2713826507,2009927905,777145011,2968910705,2081786992,1011759019,2237942433,496472369,141323216,1390687693,901361366,3705719250,1035787464,1236398737,1254987835,3483698151,1954167669,1226886844,735692747,3912318842,540395971,1040089248,1725624016,2558442317,3270929768,2137970284,2870915117,2561838459,2550833762,3936719360,2575579664,2702825697,835158662,2914588022,1924898052,2899982705,3842343027,3699474491,1788675439,846551798,62282335,3953028309,1754973667,1636090864,1770216760,4246618328,2047136560,2609107638,1218579540,4199801857,4249337798,4257395405,3734125987,3763353910,233675174,1285749933,2913196659,33548644,3962521565,302857654,1649111978,2194084083,2455073775,61961538,2906452125,155746318,2601096321,286376329,1798931901,603237272,1431415873,1954665634,3271952720,137536026,3503831607,1802809018,3597960801,57518461]
testStars=[4167675411,1437547087,2692159256,3235958758,3227501674,208669662,1482309188,521954473,342497505,1438554421,1891663121,4165611714,273602018,1711683835,1082014264,1394990039,1101079594,421386193,619448599,3888934497,1122354479,557327341,2660295140,2384398114,3182552250,1771027672,1732981508,3961748982,159506197,2869539781,565785039,2684704287,2758941622,1313620170,724899303,1406894216,1297419156,3308013333,3691267724,2817108197,1819058753,3747847047,386885929,2536093666,2958627654,178016934,2853876513,1980994560,343035647,1662041072,1934262433,2930443698,2385019512,3340984301,25070640,3124355137,1284352058,2571663188,2758428682,3756521544,1012409820,1205725641,1817510468,521001195,3545994138,3531723440,534940522,785834,1656095778,3082264358,2034314944,3339424565,4046148192,1705473070,92029723,827488781,2465063333,2858591687,2636709977,2573514144,2751412614,1087365206,2927554342,301818265,2590423296,3713067054,3329685364,3718288752,3636815200,2885306803,165516591,2660562218,1184823185,902166631,1227146905,2386941703,3963820198,469234852,3311807250,2287863054,4141729806,26372015,511262612,3023519715,1854569585,847875671,3000955010,2530918707,637988352,1806321292,1129361124,1604366860,3277793728,14485303,2314241972,77523557,2995259050,2863140027,2910555879,3269061542,615304059,4286133832,2224327373,1718377842,2025689074,1502650989,1005054328,2040852024,1994184718,544877920,3737988195,3913138351,3539478092,3454679764,1504537906,4017761584,1021609935,3338393823,1264467043,3039307861,989333577,830053386,2845050954,985957621,3459443535,2193939147,1215971796,3284427611,4197469272,2237015181,746498582,3405376171,696618104,2197931868,50637799,3452964090,1569258985,737266850,1315791861,3207402559,1137318614,3704372959,2042816882,2993461427,1277115141,2821963592,2849517986,79009616,864681126,2248456905,3796601559,2062797353,385995586,1947998963,793844575,2862728730,1273256942,4196709764,1569213600,2059081577,3215997593,3478404079,3135808055,2254353101,1661748415,262048960,3472047977,2597490105,4228013544,3804631547,506349321,1599653349,2303635924,2877156894,855962840,1655967875,2445749116,3678150269,3399621278,1076727237,4012276852,3549820907,1173280312,3690278318,90833891,514590299,1196124605,29348276,4150857627,4185365244,3693798020,4133293270,2560123809,2644487066,1044371083,1907771348,514678999,1629044074,1669788725,2217132592,1370642288,2450147796,1989641334,421791107,3538208212,708534821,2877883556,2039381353,795613079,2954316800,1583591551,2899498670,611290582,3041422882,53735941,3983871342,3673677284,3750875292,506148838,2708602782,3441268113,75219649,4273166473,1887580996,3710453865,1697530312,1413437034,3771476325,1676288765,1510814158,2295018082,730244369,1347888499,2291997915,2734053664,2986259922,557779238,1085856726,3795252140,2838808228,100468857,1872623256,303438165,4180636816,848292925,401017528,2670442737,1598287691,391267577,1949187031,3875101730,4247918843,1012051641,612015401,1859679770,3432603686,723247337,998050736,807484101,743163637,57669658,1645275092,1053056651,2372647732,3689245843,4017291223,3059098383,3094871953,1392362496,811352473,1841403352,818295678,1608928178,1273237025,1544052178,3913207660,3838792377,1762835180,528183114,3193829447,720318091,3385600500,2466007563,4138071307,2333658941,957610605,2486025247,1785765384,3625877959,3175562842,3874271955,2086295868,2764364262,473550121,208206471,1396564311,2506720535,3210028778,277905930,2227472043,912497889,283919092,1338777429,2178609948,4247255382,2515050523,3056887059,3648438690,1885277587,4080013326,968882953,1910935418,1429936612,2506277319,2078072980,3023703329,2939180626,1877571378,3575998926,2994980578,803524404,890677371,780066751,1583874489,1468955155,2630811963,1261712158,222583025,1420180584,2015045591,525433641,2982060809,2641456641,4205863992,145136829,3046407287,1671135782,4096233594,3633474854,1242413576,2470022534,704002160,2898410358,2962442845,1574318756,3058413680,944701519,1548764506,2128229789,2883014146,1852150662,824687737,2891311453,919342592,1864259668,1017422390,2560873375,585737420,3533872876,2189033555,3308226296,941186385,2035492415,4266129805,1127135463,688695194,3904845904,2247624106,2161819831,1554937367,113675418,1151088909,333122533,595119503,1537427309,4175492103,1767944001,1585014334,2196974738,2320457425,2556934812,871770446,1615603560,1121250116]

common=list(set(trainStars) & set(testStars))
common

In [None]:
for x,y in test_dataset:
    #print(x.shape, y.shape)
    a=0
    #break

# CNN

In [None]:
timepoints = 225
representations = 4
wavelengths = 283
targetWavelengths = 283

class Reshape1(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        x = tf.transpose(x, perm=[0,2,1,3])
        #x = tf.reshape(x, [-1, self.timepoints, tf.cast(self.wavelengths * self.representations, tf.int32)])
        return x
    
class Reshape11(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        x = tf.transpose(x, perm=[0,2,1])
        #x = tf.reshape(x, [-1, self.timepoints, tf.cast(self.wavelengths * self.representations, tf.int32)])
        return x

class Reshape2(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x_pred, x_confidence):
        x = tf.concat([x_pred, x_confidence], axis = -1)
        
        return x
    
class Reshape22(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x_pred, x_confidence):
        x_pred = tf.expand_dims(x_pred, axis=-1)
        x_confidence = tf.expand_dims(x_confidence, axis=-1)
        x = tf.concat([x_pred, x_confidence], axis = -1)
        return x
    
class reduce(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        mean = tf.reduce_sum(x,axis=-1)
        mean = tf.expand_dims(mean, axis=-1)
        return mean
class reduce1(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        mean = tf.reduce_sum(x,axis=-1)
        return mean


def cnnM(outputDim = 283):
    inp = tf.keras.Input(shape=(timepoints, wavelengths, representations))
    x = inp[:,:,:,1]


    #x = Reshape11()(x)
    dim = timepoints
    for i in range(3):
        x = tf.keras.layers.Conv1D(filters=wavelengths, kernel_size=(5), padding='valid')(x)
        x = tf.keras.layers.AveragePooling1D(2)(x)

    x = Reshape11()(x)
    x = tf.keras.layers.Dense(1000)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1000)(x)
    #mean = tf.keras.layers.Dense(1,activation='relu')(x)
    x_pred = tf.keras.layers.Dense(283, activation='linear')(x)
    #x_pred = x_pred+mean
    x_confidence = tf.keras.layers.Dense(283, activation='linear')(x)
    x = Reshape22()(x_pred, x_confidence)

    model = tf.keras.Model(inp, x)
    return model


model = cnnM() 
model.summary()

In [None]:
batch = next(iter(train_dataset))
out = model(batch[0])
test_batch = next(iter(test_dataset))
batch[0].dtype ,batch[1].dtype, out.dtype,batch[0].shape ,batch[1].shape, out.shape

In [None]:
def log_likelihood_zScoreTarget(y_trueZScore, y_pred):
    # stdDev_zScorePred = 1/n * sqrt((y_zScore - y_zScoreMean)^2) = 1/n *sqrt(sum( (y-mean)/std - (y_mean-mean)/std )^2) = 1/n * sqrt(sum( (y-y_mean)/std )^2 )) = 1/std * 1/n * sqrt(sum(y-y_mean)^2) = stdDev / std
    # stdDev_zScorePred = stdDev_pred / std
    # y_pred contains 1. y_zScore 2. log(stdDev_zScore)

    y_true = y_trueZScore * stdTrainLabels + meanTrainLabels   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_predZScore = y_pred[:, :,0]
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev_zScore) = log(stdDev / std) = log(stdDev) - log(std) -> log(stdDev) = log(stdDev_zScore) + log(std)

    y_pred0 = y_predZScore * stdTrainLabels + meanTrainLabels
    stdDev = tf.exp(log_sigma)*stdTrainLabels  # Exponentiate to get variance + scale back from zscore 
    logStdDev = log_sigma + tf.math.log(stdTrainLabels)

    L_pred = -0.5*(tf.math.log(2*np.pi) + logStdDev + tf.square(y_true - y_pred0) / stdDev)
    L_ref = -0.5*(tf.math.log(2*np.pi) +  tf.math.log(stdLabels*stdLabels) + tf.square(y_true-meanLabels) / stdLabels / stdLabels)   # ( (y_true - mean)/std )^2 = y_trueZScore^2  (y_true = y_trueZScore * std + mean)
    L_ideal = -0.5*(tf.math.log(2*np.pi) + tf.math.log(1e-10))

    L = (tf.reduce_sum(L_pred) -tf.reduce_sum(L_ref)) / (tf.reduce_sum(L_ideal)*283*5625 - tf.reduce_sum(L_ref))
    
    return L

def log_likelihood_maxScaling(y_trueMax, y_pred):
    # stdDev_zScorePred = 1/n * sqrt((y_zScore - y_zScoreMean)^2) = 1/n *sqrt(sum( (y-mean)/std - (y_mean-mean)/std )^2) = 1/n * sqrt(sum( (y-y_mean)/std )^2 )) = 1/std * 1/n * sqrt(sum(y-y_mean)^2) = stdDev / std
    # stdDev_zScorePred = stdDev_pred / std
    # y_pred contains 1. y_zScore 2. log(stdDev_zScore)

    y_true = y_trueMax * maxTrainLabels #std + mean   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_predMax = y_pred[:, :,0]
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev_zScore) = log(stdDev / std) = log(stdDev) - log(std) -> log(stdDev) = log(stdDev_zScore) + log(std)

    y_pred0 = y_predMax *maxTrainLabels #* std + mean
    sigma = tf.exp(log_sigma)*maxTrainLabels  # Exponentiate to get variance + scale back from zscore 
    logStdDev = tf.math.log(sigma*sigma)# + tf.math.log(max)

    L_pred = -0.5*(tf.math.log(2*np.pi) + logStdDev + tf.square((y_true - y_pred0) / sigma))
    L_ref = -0.5*(tf.math.log(2*np.pi) +  tf.math.log(stdLabels**4) + tf.square((y_true - meanLabels)/(stdLabels*stdLabels)))   # ( (y_true - mean)/std )^2 = y_trueZScore^2  (y_true = y_trueZScore * std + mean)
    L_ideal = -0.5*(tf.math.log(2*np.pi) + tf.math.log((1e-5)**4)) * tf.ones_like(y_predMax)
    #print(L_pred)
    #print(L_ref)
    #print(L_ideal)
    #print(tf.reduce_sum(L_pred),tf.reduce_sum(L_ideal),tf.reduce_sum(L_ref))
    L = (tf.reduce_sum(L_pred) -tf.reduce_sum(L_ref)) / (tf.reduce_sum(L_ideal) - tf.reduce_sum(L_ref))
    
    return L

def log_likelihood_maxScaling_scipy(y_trueMax, y_pred):
    # stdDev_zScorePred = 1/n * sqrt((y_zScore - y_zScoreMean)^2) = 1/n *sqrt(sum( (y-mean)/std - (y_mean-mean)/std )^2) = 1/n * sqrt(sum( (y-y_mean)/std )^2 )) = 1/std * 1/n * sqrt(sum(y-y_mean)^2) = stdDev / std
    # stdDev_zScorePred = stdDev_pred / std
    # y_pred contains 1. y_zScore 2. log(stdDev_zScore)

    y_true = y_trueMax * maxTrainLabels #std + mean   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_predMax = y_pred[:, :,0]
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev_zScore) = log(stdDev / std) = log(stdDev) - log(std) -> log(stdDev) = log(stdDev_zScore) + log(std)

    y_pred0 = y_predMax *maxTrainLabels #* std + mean
    sigma = tf.exp(log_sigma)*maxTrainLabels  # Exponentiate to get variance + scale back from zscore 

    GLL_pred = np.sum(scipy.stats.norm.logpdf(y_true, loc=y_pred0, scale=sigma))
    GLL_true = np.sum(scipy.stats.norm.logpdf(y_true, loc=y_true, scale=(1e-10) * np.ones_like(y_true)))
    GLL_mean = np.sum(scipy.stats.norm.logpdf(y_true, loc=meanLabels * np.ones_like(y_true), scale=(stdLabels*stdLabels) * np.ones_like(y_true)))

    submit_score = (GLL_pred - GLL_mean)/(GLL_true - GLL_mean)
    #print(GLL_pred, GLL_true, GLL_mean)
    
    return submit_score

log_likelihood_zScoreTarget(batch[1], out)
log_likelihood_maxScaling(batch[1], out),log_likelihood_maxScaling_scipy(batch[1],out)

In [None]:
# investigate metric: take away only good in a certain range depending on mse
scores=[]
expValues=[]
copyOut=out.numpy()
for i,f in enumerate(np.linspace(0,3,100)):
    factor =f#2**(f-10)
    copyOut[:,:,1] = np.log(tf.ones_like(out[:,:,1]) * factor)/maxTrainLabels
    s = log_likelihood_maxScaling_scipy(batch[1],copyOut)
    print(i, s)
    scores.append(s)
print(scores)
plt.figure(figsize=(12, 6))
plt.plot(range(len(scores)), scores, 'b', label='Training loss')
plt.show()

In [7]:
def loss_mae(y_true_zScore, y_pred):
    y_predZScore = y_pred[:, :,0]  # y_zScore = (y - mean)/std
    logConfidence = tf.math.exp(y_pred[:, :,1]) # logSigma = log(sigma / std)  we predict sigma NOT stdDev!!
    largerThanT = tf.greater(logConfidence, tf.exp(20.0))
    logConfidence = tf.where(largerThanT, y_pred[:,:,1] + tf.exp(20.0), logConfidence)
    
    loss = tf.math.abs(y_true_zScore - y_predZScore)#tf.math.abs(y_true_zScore-y_predZScore)
    loss_2 = tf.math.abs(loss-(logConfidence))
    return tf.reduce_sum(loss, axis=-1) + tf.reduce_sum(loss_2,axis=-1)

def mae(y_true_zScore, y_pred):
    y_predZScore = y_pred[:, :,0]  # y_zScore = (y - mean)/std
    loss = tf.math.abs(y_true_zScore - y_predZScore)#tf.math.abs(y_true_zScore-y_predZScore)
    return tf.reduce_sum(loss, axis=-1)
def mse(y_true_zScore, y_pred):
    y_predZScore = y_pred[:, :,0]  # y_zScore = (y - mean)/std
    loss = tf.square(y_true_zScore - y_predZScore)#tf.math.abs(y_true_zScore-y_predZScore)
    return tf.reduce_sum(loss, axis=-1)

def loss_mse(y_true_zScore, y_pred):
    y_predZScore = y_pred[:, :,0]  # y_zScore = (y - mean)/std
    logConfidence = tf.math.exp(y_pred[:, :,1]) # logSigma = log(sigma / std)  we predict sigma NOT stdDev!!
    largerThanT = tf.greater(logConfidence, tf.exp(20.0))
    logConfidence = tf.where(largerThanT, y_pred[:,:,1] + tf.exp(20.0), logConfidence)
    
    loss = tf.square(y_true_zScore - y_predZScore)#tf.math.abs(y_true_zScore-y_predZScore)
    loss_2 = tf.square(loss-(logConfidence))
    return tf.reduce_sum(loss, axis=-1)# + 0.001*tf.reduce_sum(loss_2,axis=-1)

def log_loss_maxScaling(y_trueMax, y_pred):
    y_true = y_trueMax * maxTrainLabels #std + mean   # y_zScore = (y - mean) / std -> y = y_zScore *std + mean

    y_predMax = y_pred[:, :,0]
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev_zScore) = log(stdDev / std) = log(stdDev) - log(std) -> log(stdDev) = log(stdDev_zScore) + log(std)

    y_pred0 = y_predMax *maxTrainLabels #* std + mean
    sigma = tf.exp(log_sigma)*maxTrainLabels  # Exponentiate to get variance + scale back from zscore 
    logStdDev = tf.math.log(sigma*sigma)# + tf.math.log(max)

    L_pred = -0.5*(tf.math.log(2*np.pi) + logStdDev + tf.square((y_true - y_pred0) / sigma))
    L_ref = -0.5*(tf.math.log(2*np.pi) +  tf.math.log(stdLabels**4) + tf.square((y_true - meanLabels)/(stdLabels*stdLabels)))   # ( (y_true - mean)/std )^2 = y_trueZScore^2  (y_true = y_trueZScore * std + mean)
    L_ideal = -0.5*(tf.math.log(2*np.pi) + tf.math.log((1e-5)**4)) * tf.ones_like(y_predMax)
    L = -((tf.reduce_sum(L_pred) -tf.reduce_sum(L_ref)) / (tf.reduce_sum(L_ideal) - tf.reduce_sum(L_ref)) -1)
    return L

In [None]:
tf.random.set_seed(42)

lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=0)

#optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(loss=loss_mse,metrics=[log_likelihood_maxScaling], optimizer=optimizer)
#model.compile(loss='mae', ,metrics=[log_likelihood_zScoreTarget]optimizer=optimizer)
#model.compile(loss=loss_mae, optimizer=optimizer)

history = model.fit(#train_dataset, 
                    batch[0],batch[1], #verbose=2,
                    #validation_data=test_dataset,
                    #validation_data=(test_batch[0],test_batch[1]),
                    epochs=2000, batch_size=batch_size,
                    #callbacks=[lr_callback]
                    )


In [None]:
# experiments with original data (faster processing)
# equal weighted loss of mean & stddev -> mse ~ 7/7 -> but can't fit targes at all!!!!!! this is the issue

# training one batch for 1200 epochs -> loss (mse) ~0.02 seems to fit okish to data
# 1600 epochs ~ 0.003 gets better and better
# 2000 epochs ~0.001

# 2000 epochs with smaller net -> 1.4, can't fit

# lr0.000001 get's stuck at like 30, lr much smaller + retrain, doesn't beat 5

In [23]:
model.save('originalData_222_epochs_accLoss_reluActivation_8_10.keras')
# Save weights
model.save_weights('originalData_222_epochs_accLoss_reluActivation_8_10.weights.h5')

# Load weights
#loaded_weights = model.load_weights('170_epochs_accLoss_reluActivation_23_23.weights.h5')

In [None]:
loaded_model = tf.keras.models.load_model('120_epochs_accLoss31_30.keras')

In [None]:
history = model.fit(#train_dataset, 
                    batch[0],batch[1], #verbose=2,
                    #validation_data=test_dataset,
                    #validation_data=next(iter(test_dataset)),
                    epochs=400, batch_size=batch_size)

#one epoch

In [None]:
# first try couldn't fit the values, just predicted mean if I kept the shape (output layer of shape 1 - tensor 283x100 -> 283x1)
# having a flatten layer between converges

# flatten layer and 12 samples -> predict the same for all 12 samples, maybe not enough filters

# PROBLEM why we can't fit multiple targets: layer normalization!! use batch norm instead

#---- with batch norm
# cnn model + mean estimation, loss ~80, but predicting differnt mean
# fcn model, loss ~81
# fcn model / min scaling -> loss 0.8 / 27 (lots of negative predictions)
# fcn model / max scaling / relu activation -> 3.1/8 (lots of 0 predictions) / with scale of 100, loss =14.9/43691
# cnn model / max scaling / mean pred -> 6.0/inf
# cnn ... no layer norm in beginning -> 15

# loss function for every output (batch,283) / 100 epochs
# cnn 1.5 loss
# cnn with smaller LR 0.22(also after 200 epochs)
# cnn with separate mean prediction loss 20.5 (lr0.0001) vs 3.5(lr0.0005) / can't even fit 2 samples (0.5 for lr 0.0005)

# cnn without mean prediction (2 samples, lr0.0005) 22.4   / lr0.001 0.4 loss, but targets still fit badly / only fitting target noVar 0.08 still bad

# difference between train / test = batch norm has significant effect here
#fcn + mean, 2 samples LR0.0005 -> 
#fcn + mean, 2 samples only loss on target -> 
#fcn + mean, 1 sample, only loss on target -> 0.03 targets are far off
#fcn, 1 sample, only loss on target -> 0.4 targets are far off
# -> train data was not normalized!!

# with regularization / without regularization doesn't matter that much as long as sample is normalized
# normalization per sample -> predict the same for all targets ~0.0978
# norm per sample + bis estimation -> predict same for all targets (besides 1) ~0.0978

# with learning rate schedule -> 0.06 lots more possible to not get stuck in local minima

#cnn / norm over train / bias estimation / lr0.01 / only target -> ~45 sum loss
#cnn / norm over train / bias estimation / lf0.01 / target + loss2 -> ~47 after 95 epochs (15 after~150epochs)

#cnn / norm over train / bais est / lr0.01 / target + loss / activation function relu instead of linear (conf + bias / still nan bc stddev =0, log(0) = nan)
# 39/40 but training seems to be a lot more stable
# after 170 epochs 23.7/23.5
# after 220 epochs 11/19 (but already went down to 14/16)
# after 250 epochs 12/16 (but already 16/15)
# after 300 epochs 12/17


# Assuming 'history' is your model's training history
train_loss = history.history['loss']
test_loss = history.history['val_loss']

epochs = range(1, len(train_loss) + 1)

plt.figure(figsize=(12, 6))
plt.plot(epochs, train_loss, 'b', label='Training loss')
#plt.plot(epochs, test_loss, 'r', label='Test loss')
plt.title('Training and Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# eval

In [None]:
#pred = model.predict(normData)
pred = model.predict(batch[0])
pred[:,0:2,0]*maxLabels, batch[1][:,0:2]*maxLabels ,np.exp(pred[:,0:2,1])*maxLabels

In [None]:
print('overall',(mae(batch[1],pred)))
for i in range(batch_size):
    print(f'batch {i}',(mse(batch[1][i,:],pred[i:i+1,:,:])))

In [None]:
fig = go.Figure()
for i in range(4):#[2,6,10,20,100]:
    fig.add_trace(go.Scatter(y=batch[0][i,:,0,0],mode='markers',name=f'f_{i}',marker=dict(size=3)))

fig.show()

In [None]:
fig = go.Figure()
for i in range(10): #range(12):# 
    fig.add_trace(go.Scatter(y=batch[1][i,:],mode='markers',name=f'gt_{i}',marker=dict(size=3)))
    fig.add_trace(go.Scatter(y=pred[i,:,0],mode='markers',name=f'pred_{i}',marker=dict(size=3)))
fig.show()

In [None]:
for i in range(10): #range(12):#
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=batch[1][i,:],mode='markers',name=f'gt_{i}',marker=dict(size=3)))
    fig.add_trace(go.Scatter(y=pred[i,:,0],mode='markers',name=f'pred_{i}',marker=dict(size=3)))
    fig.show()