In [2]:
import os
import glob
import numpy as np
import tensorflow as tf
import pandas as pd

In [None]:
import tensorflow as tf

# Check available physical devices (GPUs)
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) == 0:
    print("No GPU devices found.")
else:
    for device in physical_devices:
        print("GPU:", device)

# dataset

In [61]:
files = glob.glob(os.path.join('train/', '*/*'))
stars = []
for file in files:
    file_name = file.split('\\')[1]
    stars.append(file_name)
stars = np.unique(stars)

labelDf = pd.read_csv("train_labels.csv")
labelDf = labelDf.set_index('planet_id')

import random
random.seed(42)

def split_star_list(file_list, test_ratio=0.2):
    random.shuffle(file_list)
    split_index = int(len(file_list) * (1 - test_ratio))
    train_files = file_list[:split_index]
    test_files = file_list[split_index:]
    return train_files, test_files

train_stars, test_stars = split_star_list(stars)

def preprocess_data(features, labels):
    # Perform any necessary preprocessing here
    return features, labels

def load_npz(star):
    integer_value = tf.strings.to_number(star, out_type=tf.int64)
    python_int = integer_value.numpy()

    file_path = 'train/'+str(python_int)+'/combined.npz'
    try:
        with np.load(file_path) as data:
            features = data['a'][0,:,0:283,:]
            labels = labelDf.loc[python_int].to_numpy()

            features, labels = preprocess_data(features,labels)
            return features, labels
    except Exception as e:
        print("Error loading file:", e, python_int)
    

def create_dataset(star_list, batch_size, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices(star_list)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(star_list))
    def load_and_process(x):
        features, labels = tf.py_function(
            func=load_npz,
            inp=[x],
            Tout=[tf.float64, tf.float32]
        )
        return features, labels

    dataset = dataset.map(load_and_process, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda x, y: (tf.ensure_shape(x,tf.TensorShape([5625, 283, 4])), tf.ensure_shape(y, tf.TensorShape([283]))))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset


In [None]:
len(train_stars)

In [73]:
batch_size = 4

train_dataset = create_dataset(train_stars, batch_size, shuffle=True)
test_dataset = create_dataset(test_stars, batch_size, shuffle=False)

In [None]:
for x,y in train_dataset:
    print(x.shape, y.shape)


# model

In [None]:
tf.random.set_seed(42)

class ReduceDim(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x):
        x = tf.squeeze(x, axis=-1)
        return x
    
class Reshape1(tf.keras.layers.Layer):
    def __init__(self, timepoints, representations, wavelengths, **kwargs):
        super().__init__(**kwargs)
        self.timepoints = timepoints
        self.wavelengths = wavelengths
        self.representations = representations
    def call(self, x):
        x = tf.transpose(x, perm=[0,2,1,3])
        #x = tf.reshape(x, [-1, self.timepoints, tf.cast(self.wavelengths * self.representations, tf.int32)])
        return x

class Reshape2(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def call(self, x_pred, x_confidence):
        x = tf.concat([x_pred, x_confidence], axis = -1)
        
        return x

class TransformerEncoder(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, feed_forward_dim):
        super().__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim//num_heads)
        self.ffn1 = tf.keras.layers.Dense(embed_dim)
        self.ffn2 = tf.keras.layers.Dense(feed_forward_dim)
    def call(self, x):
        residual = x
        x = self.att(x, x)
        x = x + residual
        #x = self.ffn1(x)
        x = self.ffn2(x)
        return x
    

timepoints = 5625
representations = 4
wavelengths = 283
targetWavelengths = 283
def buildModel(outputDim = 283):
    inp = tf.keras.Input(shape=(timepoints, wavelengths, representations))
    x = inp

    # plan:
    # 1. use cnn filter so go from timepoints, wavelengths, representations -> timepoints, wavelengths (283 1d filter)
    # 2. transpose, timepoints,wavelengths -> wavelengths,timepoints

       # batch, wavelengths, time, representation

    x = Reshape1(timepoints, representations, wavelengths)(x) #make to [batch_size, wavelengths, time,repr]
    # Use a 1D Convolutional layer with kernel size of 1 to reduce the last dimension
    x = tf.keras.layers.Conv2D(filters=1, kernel_size=(1, 1), padding='valid')(x)
    x = ReduceDim()(x)
    dim = int(timepoints/4)# x.shape[2]
    x = tf.keras.layers.Dense(dim)(x)
    #x = tf.keras.layers.Conv2D(filters=dim, kernel_size=(3, 3), padding='valid')(x)

    #x = tf.keras.layers.Conv1D(filters=dim, kernel_size=8, padding='same', activation='relu')(x)

    for i in range(5):
        x = TransformerEncoder(embed_dim=dim, num_heads=4, feed_forward_dim=int(dim/(2)))(x)
        dim = int(dim/2)

    x_pred = tf.keras.layers.Dense(1, activation='linear')(x)
    x_confidence = tf.keras.layers.Dense(1, activation='linear')(x)
    x = Reshape2()(x_pred, x_confidence)

    model = tf.keras.Model(inp, x)
    return model


model = buildModel()
model.summary()

In [None]:
batch = next(iter(train_dataset))
out = model(batch[0])
batch[0].dtype ,batch[1].dtype, out.dtype,batch[0].shape ,batch[1].shape

In [None]:
def gaussian_log_likelihood(y_true, y_pred):
    print(y_true.shape, y_pred.shape)
    # y_pred is expected to contain both mean and log variance
    mu = y_pred[:, :,0]  # Mean
    log_sigma = y_pred[:, :,1]  # Log of the standard deviation / we predict log(stdDev)

    # Calculate the variance
    sigma2 = tf.exp(log_sigma)  # Exponentiate to get variance
    nll = 0.5 * (tf.math.log(2 * np.pi) + log_sigma + tf.square(y_true - mu) / sigma2)
    
    return tf.reduce_mean(nll)



def loss_fn(y_true, y_pred):
    mu = y_pred[:, :,0]  # Mean
    confidence = y_pred[:, :,1] 

    loss = tf.math.abs(y_true-mu)
    loss_2 = tf.math.abs(loss-confidence)
    return tf.reduce_sum(loss+loss_2)

loss_fn(batch[1], out)
gaussian_log_likelihood(batch[1], out)

In [None]:
tf.random.set_seed(42)
loss = loss_fn
optimizer = tf.keras.optimizers.AdamW(learning_rate=0.0001)
model.compile(loss=loss, optimizer=optimizer)

history = model.fit(#train_dataset, 
                    batch[0],batch[1], #verbose=2,
                    validation_data=test_dataset,
                    epochs=100, batch_size=batch_size,
                    #callbacks=[validation_callback(val_metric_list), lr_callback, WeightDecayCallback()]
                    )

In [None]:
pred = model.predict(test_dataset)

In [None]:
pred.sum()

In [None]:
pred
# TODO: use normalization / scale targets such that gradients are big!