In [None]:
import numpy as np  
import pandas as pd 
import math
from random import shuffle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import Sequence

In [None]:
path_df = '../input/seti-breakthrough-listen/train_labels.csv'
path_tr = '../input/seti-breakthrough-listen/train'
path_te = '../input/seti-breakthrough-listen/test'
path_sb = '../input/seti-breakthrough-listen/sample_submission.csv'

train = pd.read_csv(path_df)
sub = pd.read_csv(path_sb)
train['file_path'] = train['id'].apply(lambda x: f'../input/seti-breakthrough-listen/train/{x[0]}/{x}.npy')
test= pd.read_csv(path_sb)
test['file_path'] = test['id'].apply(lambda x: f'../input/seti-breakthrough-listen/test/{x[0]}/{x}.npy')

In [None]:
class SETISequence(Sequence):
    def __init__(self,df, ids, y=None, batch_size=1024, shuffle=True):
        self.ids = ids
        self.df = df
        self.y = y
        self.is_train = False if y is None else True
        self.batch_size = batch_size
        self.shuffle = shuffle
    
    def __len__(self):
        return math.ceil(len(self.ids) / self.batch_size)
    
    def __getitem__(self, idx):
        batch_ids = self.ids[idx * self.batch_size: (idx + 1) * self.batch_size]
        if self.y is not None:
            batch_y = self.y[idx * self.batch_size: (idx + 1) * self.batch_size]
        
        list_x = [np.load(self.df['file_path'][x]) for x in batch_ids]
        batch_X = np.transpose(np.stack(list_x),  (0, 1, 3, 2))

        if self.is_train:
            return batch_X, batch_y
        else:
            return batch_X
    
    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.ids, self.y))
            shuffle(ids_y)
            self.ids, self.y = list(zip(*ids_y))

In [None]:
def build_model():           
        
    conv  = tf.keras.layers.Conv2D(1, (3,3),padding='valid')
    pool1 = tf.keras.layers.MaxPooling2D(pool_size=(1,4), padding='valid')
    gru   = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32, return_sequences=True))
    pool2 = tf.keras.layers.GlobalMaxPooling1D()
    
    
    inputs = tf.keras.layers.Input(shape=(6,256, 273,1))
  
    x = tf.keras.layers.TimeDistributed(conv, name="Conv_1")(inputs)
    x = tf.keras.layers.TimeDistributed(pool1, name="Pool_1")(x)
    x = tf.keras.layers.TimeDistributed(conv, name="Conv_2")(x)   
    x = tf.keras.layers.TimeDistributed(tf.keras.layers.Reshape((252,65)))(x)
    x = tf.keras.layers.TimeDistributed(gru,name="GRU_1")(x)
    x = tf.keras.layers.TimeDistributed(pool2, name="Pool_2")(x)
    x = tf.keras.layers.Flatten(name="Flat")(x)
    
    x = tf.keras.layers.Dense(64, activation="relu", name="Dense_1")(x) 
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.1)(x)
       
    x = tf.keras.layers.Dense(1, activation="sigmoid", name="Output_sigmoid")(x)

    model = tf.keras.Model(inputs=inputs, outputs=x)
    
    return model

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(.01, decay_steps=10, decay_rate=0.99, staircase=True)

model = build_model()
model.compile(optimizer=tf.keras.optimizers.Adam(lr_schedule),
                  loss = tf.keras.losses.BinaryCrossentropy(), 
                  metrics = tf.keras.metrics.AUC(name='auc'))   

model.summary()

In [None]:
train_model=True
batch_size=32

ids = train.index.values
y_train=train.target
ids_tst = sub.index.values

    
trn = SETISequence(train,ids, y_train.values, batch_size=batch_size)
model.fit(trn,epochs=3)
    
tst = SETISequence(test,ids_tst, batch_size=batch_size)
p_tst = model.predict(tst,verbose=1)


sub['target'] = p_tst
sub.to_csv(f'submission_file.csv', index=False)
sub.head()