In [None]:
#some of the codelines are copied from https://www.kaggle.com/wspinkaggle/seti-basic-tensorflow-efficientnet

# Import the required libraries

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import math
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import pathlib
from tensorflow.keras.applications import EfficientNetB2
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Set the path for input data

In [None]:
data_dir = Path('../input/seti-breakthrough-listen/')
train_data_dir = data_dir / 'train'
test_data_dir = data_dir / 'test'

train_label_file = data_dir / 'train_labels.csv'
sample_file = data_dir / 'sample_submission.csv'

In [None]:
df_labels = pd.read_csv(train_label_file, index_col='id')
df_labels.head()

# Visualise the input data

### Datapoint where target is 0

In [None]:
df_labels.query("target == 0").sample(3)

In [None]:
d_point = np.load('../input/seti-breakthrough-listen/train/6/6759b44dd672.npy')
d_point = d_point.astype('float')

plt.figure(figsize=(16,10))
for i in range(6):
    plt.subplot(6, 1, i + 1)
    if i == 0:
        plt.title('File name: 6759b44dd672  | Target: 0', fontsize=18)
    plt.imshow(d_point[i].astype(float), interpolation='nearest', aspect='auto')
    plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
    plt.xticks([])
plt.show()

In [None]:
d_point = np.load('../input/seti-breakthrough-listen/train/e/ee3e7543040a.npy')
d_point = d_point.astype('float')

plt.figure(figsize=(16,10))
for i in range(6):
    plt.subplot(6, 1, i + 1)
    if i == 0:
        plt.title('File name: ee3e7543040a  | Target: 0', fontsize=18)
    plt.imshow(d_point[i].astype(float), interpolation='nearest', aspect='auto')
    plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
    plt.xticks([])
plt.show()

### Datapoint where target is 1

In [None]:
df_labels.query("target == 1").sample(3)

In [None]:
d_point = np.load('../input/seti-breakthrough-listen/train/a/a5db9a15fb61.npy')
d_point = d_point.astype('float')

plt.figure(figsize=(16,10))
for i in range(6):
    plt.subplot(6, 1, i + 1)
    if i == 0:
        plt.title('File name: 22fa5d1a87de  | Target: 1', fontsize=18)
    plt.imshow(d_point[i].astype(float), interpolation='nearest', aspect='auto')
    plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
    plt.xticks([])
plt.show()

In [None]:
d_point = np.load('../input/seti-breakthrough-listen/train/8/84cd8577baec.npy')
d_point = d_point.astype('float')

plt.figure(figsize=(16,10))
for i in range(6):
    plt.subplot(6, 1, i + 1)
    if i == 0:
        plt.title('File name: b18e4f5d7132  | Target: 1', fontsize=18)
    plt.imshow(d_point[i].astype(float), interpolation='nearest', aspect='auto')
    plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
    plt.xticks([])
plt.show()

It can be seen that in the data points where target is 1, there is a vertical or inclined line. The horizontal lines are mostly noise and can be eliminated.
We will use fourier tranform to filter out the horizontal lines before the data is feed to the CNN model.

Lets look at an example of the fourier tranform.

# Filtering the signal using Fourier transform.

In [None]:
#Fourier function

def fourier_masker_ver(image):
    dark_image_fourier =np.fft.fftshift(np.fft.fft2(image))
    dark_image_fourier[:, 124:136] = 1
    fig, ax = plt.subplots(1,3,figsize=(15,15))
    ax[0].imshow(np.log(abs(dark_image_fourier)), cmap='gray')
    ax[0].set_title('Fourier Image', fontsize = 15)
    ax[1].imshow(image)
    ax[1].set_title('Original Image', fontsize = 15);
    ax[2].imshow(abs(np.fft.ifft2(dark_image_fourier)))
    ax[2].set_title('Transformed  Image', fontsize = 15);

In [None]:
df_labels.query("target == 1").sample(3)

In [None]:
d_point = np.load('../input/seti-breakthrough-listen/train/2/2c407e6d4cce.npy')
d_point = d_point.astype('float')/255

In [None]:
fourier_masker_ver(d_point[0])

In [None]:
fourier_masker_ver(d_point[1])

In [None]:
fourier_masker_ver(d_point[2])

As seen above the horizontal lines are removed by fourier transform. Now lets build a model based on EfficientNetB2.

In [None]:
def id_to_path(file_id, train=True):
    data_dir = train_data_dir if train else test_data_dir
    return data_dir / file_id[0] / f'{file_id}.npy'

In [None]:
class SETISequence(Sequence):
    """
    Taken from this nice starter notebook https://www.kaggle.com/kenjirokiyono/seti-simple-code-for-beginners-tensorflow and added the fourier transform step
    """
    def __init__(self, x_set, y_set=None, batch_size=64):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.is_train = False if y_set is None else True
    
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    
    def __getitem__(self, idx):
        batch_ids = self.x[idx * self.batch_size: (idx + 1) * self.batch_size]
        if self.y is not None:
            batch_y = self.y[idx * self.batch_size: (idx + 1) * self.batch_size]
        
        # taking channels 
        list_x=[]
        # below is the fourier transform step
        for x in batch_ids:
            new = np.load(id_to_path(x, train=self.is_train))
            new = new.astype('float')/255
            new = np.fft.fftshift(np.fft.fft2(new))
            new[:, :, 120:136] = 1
            new = abs(np.fft.ifft2(new))
            list_x.append(new)
        batch_x = np.moveaxis(list_x,1,-1)
        batch_x = batch_x.astype("float")
        
        if self.is_train:
            return batch_x, batch_y
        else:
            return batch_x
        
# small output test
SETISequence(["00047dfc96a9"], [1], batch_size=2).__getitem__(0)[0].shape

# Model

In [None]:
data_augmentation_1 = tf.keras.layers.experimental.preprocessing.RandomTranslation(
    height_factor=0.2, width_factor=0.2, fill_mode='wrap',
    interpolation='bilinear', seed=None, fill_value=0.0
)
data_augmentation_2 = tf.keras.layers.experimental.preprocessing.RandomFlip("vertical")

In [None]:
lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001, 
    decay_steps=1000, 
    decay_rate=0.9)

In [None]:
model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(3,(3,3), strides=(1,1), padding="same", activation='relu', input_shape=(273,256,6)), data_augmentation_1, data_augmentation_2,
        EfficientNetB2(input_shape=(273, 256, 3), weights='imagenet', include_top=False, drop_connect_rate=0.4),
        tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
        ])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_scheduler),
              loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

## To make it quick, i'll run the model on 2000 data points only.

In [None]:
train_ids = df_labels.index.values
train_y = df_labels['target'].values

In [None]:
train = SETISequence(train_ids, train_y, batch_size=64)

In [None]:
history = model.fit(train, epochs=8)

In [None]:
submission = pd.read_csv(sample_file, index_col='id')
submission.head()

In [None]:
test_ids = submission.index.values

In [None]:
test = SETISequence(test_ids, batch_size=64)

In [None]:
test_prediction = model.predict(test)

In [None]:
final_pred = np.where(test_prediction > 0.5, 1, 0)

In [None]:
final_pred[:10]

In [None]:
submission['target'] = final_pred
submission.to_csv('sub.csv', index=False)
submission.head()

### The model might not perfom well, but I have only trained the model on limited data and just 5 epochs. 
### But do let me know what you think about the idea to use fourier transform as a preprocessor.