**Baseline Notebook for starters**

Hi all, 

I have been working on the SETI ET competition. Unfortunately, due to time constraints and other commitments, I have not been able to continue to improve the solution. 

However, I am releasing my version of the baseline solution with EfficientNetB3 so that someone looking to enter the competition last minute can leverage this and build further

Consider upvoting if you found this notebook helpful. Thank you!

( Note - This is a generic baseline solution, not a high scoring notebook. The sole purpose of releasing this is to give beginners a chance to build something out of a simple solution which I discontinued to develop)

In [None]:
!pip install -q efficientnet

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import math
import tensorflow as tf
from tensorflow import keras
from sklearn import model_selection
import efficientnet.tfkeras as efn

In [None]:
data_dir = Path('../input/seti-breakthrough-listen/')
train_data_dir = data_dir / 'train'
test_data_dir = data_dir / 'test'
train_label_file = data_dir / 'train_labels.csv'
sample_file = data_dir / 'sample_submission.csv'

In [None]:
label = pd.read_csv(train_label_file, index_col='id')
sub = pd.read_csv(sample_file, index_col='id')

In [None]:
def id_to_path(s, train=True):
    data_dir = train_data_dir if train else test_data_dir
    return data_dir / s[0] / f'{s}.npy'

In [None]:
input_size = (273, 256, 3)
batch_size = 32
n_epoch = 2
seed = 42

In [None]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, x_set, y_set=None, batch_size=32):
        self.x , self.y = x_set, y_set
        self.batch_size = batch_size
        self.is_train = False if y_set is None else True
        
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    
    def __getitem__(self, idx):
        batch_ids = self.x[idx * self.batch_size: (idx + 1) * self.batch_size]
        if self.y is not None:
            batch_y = self.y[idx * self.batch_size: (idx + 1) * self.batch_size]
        
        list_x = [np.load(id_to_path(x, self.is_train))[::2] for x in batch_ids]
        batch_x = np.moveaxis(list_x,1,-1)
        batch_x = batch_x.astype("float") / 255
        
        if self.is_train:
            return batch_x, batch_y
        else:
            return batch_x

In [None]:
model = tf.keras.Sequential([
        efn.EfficientNetB3(input_shape=input_size,weights='imagenet',include_top=False),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(1, activation='sigmoid')
        ])

model.summary()
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy', metrics=[keras.metrics.AUC()])

In [None]:
x0 = label.index.values
y0 = label['target'].values

x1 = sub.index.values

x_train, x_val, y_train, y_val = model_selection.train_test_split(x0, y0, test_size=.2, random_state=seed)

train = DataGenerator(x_train, y_train, batch_size=batch_size)
val = DataGenerator(x_val, y_val, batch_size=batch_size)
test = DataGenerator(x1, batch_size=batch_size)

model.fit(train, validation_data=val, epochs=n_epoch)

In [None]:
prediction = model.predict(test).flatten()

sub['target'] = prediction
sub.to_csv('submission.csv')