In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
import keras
import keras.layers as L
import math
from keras.utils import Sequence
from keras.preprocessing import image
from random import shuffle
from sklearn.model_selection import train_test_split
import plotly.express as px
import seaborn as sns

In [None]:
train_labels =pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')
sample_submission = pd.read_csv('../input/seti-breakthrough-listen/sample_submission.csv')

In [None]:
px.histogram(train_labels['target'])

In [None]:
arr = np.load('../input/seti-breakthrough-listen/train/f/fffbb1c9c3d6c31.npy').astype(np.float32)

In [None]:
fig = plt.figure(figsize=(16, 10))
for i in range(6):
    plt.subplot(6,1,i+1)
    plt.imshow(arr[i],aspect='auto')

helper function

In [None]:
def on_target(arr):  #will return a new array that contain only on target array
    arr0 = arr[::2]
    return arr0

In [None]:
fig = plt.figure(figsize=(16, 10))
arr_new = on_target(arr)
for i in range(3):
    plt.subplot(6,1,i+1)
    plt.imshow(arr_new[i],aspect='auto')

In [None]:
def id_to_path(idx,train=True):
    path = '../input/seti-breakthrough-listen/'
    if train:
        folder = 'train/'
    else:
        folder = 'test/'
    path+=folder+idx[0]+'/'+idx+'.npy'
    return path

In [None]:
class Dataset(Sequence):
    def __init__(self,idx,y=None,batch_size=16,shuffle=True):
        self.idx = idx
        self.batch_size = batch_size
        self.shuffle = shuffle
        if y is not None:
            self.is_train=True
        else:
            self.is_train=False
        self.y = y
    def __len__(self):
        return math.ceil(len(self.idx)/self.batch_size)
    def __getitem__(self,ids):
        batch_ids = self.idx[ids * self.batch_size:(ids + 1) * self.batch_size]
        if self.y is not None:
            batch_y = self.y[ids * self.batch_size: (ids + 1) * self.batch_size]
            
        list_x1 = np.array([np.load(id_to_path(x, self.is_train))[::2].reshape(3*273,256) for x in batch_ids]).transpose(1,2,0)
        list_x2 = np.array([np.zeros((3,3*273,256)) for x in batch_ids]).transpose(1,2,3,0)
        list_x2[0::] = list_x1
        list_x2[1::] = list_x1
        list_x2[2::] = list_x1
        batch_x = np.transpose(list_x2,(3,1,2,0))
        if self.is_train:
            return batch_x, batch_y
        else:
            return batch_x
    
    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.idx, self.y))
            shuffle(ids_y)
            self.idx, self.y = list(zip(*ids_y))

In [None]:
train_idx =  train_labels['id'].values
y = train_labels['target'].values
test_idx = sample_submission['id'].values

In [None]:
x_train,x_valid,y_train,y_valid = train_test_split(train_idx,y,test_size=0.05,random_state=42,stratify=y)

In [None]:
train_dataset = Dataset(x_train,y_train)
valid_dataset = Dataset(x_valid,y_valid)
test_dataset = Dataset(test_idx)

In [None]:
!pip install -U efficientnet

In [None]:
import efficientnet.keras as efn

In [None]:
model = tf.keras.Sequential([efn.EfficientNetB0(input_shape=(3*273,256,3),weights='imagenet',include_top=False),
        L.GlobalAveragePooling2D(),
        L.Dense(32,activation='relu'),
        L.Dense(1, activation='sigmoid')
        ])

model.summary()
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy', metrics=[keras.metrics.AUC()])

In [None]:
model.summary()

In [None]:
def auc_plot(auc,val_auc):
    plt.plot(auc)
    plt.plot(val_auc)
    plt.xlabel('epochs')
    plt.ylabel('auc')
    plt.title('auc vs epochs')
    plt.legend(['auc','val_auc'])
    plt.show()
def loss_plot(loss,val_loss):
    plt.plot(loss)
    plt.plot(val_loss)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.title('loss vs epochs')
    plt.legend(['loss','val_loss'])
    plt.show()

In [None]:
history = model.fit(train_dataset,epochs=3,validation_data=valid_dataset)

In [None]:
#auc_plot(history.history['auc'],history.history['val_auc'])

In [None]:
loss_plot(history.history['loss'],history.history['val_loss'])

In [None]:
preds = model.predict(test_dataset)

In [None]:
preds = preds.reshape(-1)

In [None]:
submission = pd.DataFrame({'id':sample_submission['id'],'target':preds})

In [None]:
submission

In [None]:
submission.to_csv('submission.csv',index=False)

# Refrences

https://www.kaggle.com/awsaf49/seti-bl-spatial-info-tf-tpu<br>
https://www.kaggle.com/c/seti-breakthrough-listen/discussion/239552