# Imports

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import (
        vgg16,
        resnet50,
        inception_v3)
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import callbacks
import tensorflow_addons as tfa

# Load the Data

In [None]:
input_path = '../input/seti-breakthrough-listen'
train_path = '../input/seti-breakthrough-listen/train'
test_path = '../input/seti-breakthrough-listen/test'
sample_path = '../input/seti-breakthrough-listen/sample_submission.csv'
labels_path = '../input/seti-breakthrough-listen/train_labels.csv'

In [None]:
train_labels = pd.read_csv(labels_path)
train_labels =  train_labels.set_index('id')
train_labels.loc['0000799a2b2c42d'].target

In [None]:
dirs = [d for d in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, d))]
dirs.sort()
print(len(dirs))
print(dirs)
train_tensors = []
y_train = []
for i, dir in enumerate(dirs):
    if i == 0:
        print(f'loading... {i+1} / {len(dirs)}')
        files_list = [f for f in os.listdir(os.path.join(train_path, dir))]

        for j, file_name in enumerate(tqdm(files_list)):

            current_file = np.load(os.path.join(train_path, dir, file_name))
            if j == 0:
                train_tensors.append(current_file[:,:256,:256])
                y_train.append(train_labels.loc[file_name[:-4]].target)
            elif j != 0:
                train_tensors.append(current_file[:,:256,:256])
                y_train.append(train_labels.loc[file_name[:-4]].target)

In [None]:
X_train = np.array(train_tensors)
y_train = np.array(y_train)

In [None]:
X_train = np.moveaxis(X_train, 1, -1)

In [None]:
x = np.where(y_train == 1)[0]
x[:10]

In [None]:
X_train[x] = X_train[0]

# Exhibition of the Data

In [None]:
img_num = 35
figure = plt.figure(num=0, figsize=(18,7))
for i in range(6):

    plt.subplot(6, 1, i + 1)
    image = X_train[img_num][i].astype(float)
#     image = np.vstack(image).transpose((1, 0)) 
    plt.imshow(image, interpolation='nearest', aspect='auto')
    plt.title(img_num)
    plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'green'})
    plt.xticks([])
    plt.colorbar()

plt.show()

In [None]:
print(X_train.shape)
print(y_train.shape)

# Define the Model

In [None]:
resnet_model = resnet50.ResNet50(weights='imagenet')
for layer in resnet_model.layers[:161]:
    layer.trainable = False

In [None]:
img_height = 256
img_width = 256
channels = 6

In [None]:
def my_block(inputs, block_name='MyBlock'):
    if block_name == 'MyBlock':
        with tf.name_scope(block_name):
            conv = layers.Conv2D(filters=3, strides=2, kernel_size=2, padding='same')(inputs)
            bn = layers.BatchNormalization()(conv)
            relu = layers.ReLU()(bn)
            return relu
    
    elif block_name == 'MyEnd':
        with tf.name_scope(block_name):
            conv = layers.Conv2D(filters=3, strides=2, kernel_size=2, padding='same')(inputs)
            bn = layers.BatchNormalization()(conv)
            relu = layers.ReLU()(bn)
            return relu

In [None]:
def preprocessing_layer(inputs, block_name='MyPre'):
    with tf.name_scope(block_name):
        pre = preprocessing.Resizing(int(224), int(224), interpolation='bilinear')(inputs)
#         pre = preprocessing.RandomFlip('horizontal')(inputs)
#         pre = preprocessing.RandomFlip('vertical')(pre)
#         pre = preprocessing.RandomContrast(0.5)(inputs) 
        return pre

In [None]:
inputs = keras.Input(shape=(img_height, img_width, 6))
x = preprocessing_layer(inputs, block_name='MyPre')
# x = my_block(inputs, block_name='MyBlock')
# x = my_block(x, block_name='MyBlock')
# x = my_block(x, block_name='MyEnd')
x = tf.keras.layers.Conv2D(3,kernel_size=1, strides=1)(x)
x = resnet_model(x)
# x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(2, activation='softmax')(x)

In [None]:
model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

In [None]:
# model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#               metrics=['accuracy'])

#             loss=tfa.losses.SigmoidFocalCrossEntropy(),

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy'])

In [None]:
X_train = X_train[:1000]
y_train = y_train[:1000]

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, shuffle=False, random_state=0)

In [None]:
y_train = y_train.astype('float32')
y_valid = y_valid.astype('float32')

# Training the Model

In [None]:
keras.backend.clear_session()

early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=10, # how many epochs to wait before stopping
    restore_best_weights=True,
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=32,
    epochs=10,
    callbacks=[early_stopping], # put your callbacks in a list
    verbose=1,  # turn off training log
)
history_df = pd.DataFrame(history.history)

history_df.loc[0:, ['loss', 'val_loss']].plot()
history_df.loc[0:, ['accuracy', 'val_accuracy']].plot()
print(("Minimum Validation Loss: {:0.4f}").format(history_df['val_loss'].min()))

# Exhibition of the Results

In [None]:
predictions = model.predict(X_valid)
predictions = np.argmax(predictions, axis=1)   

In [None]:
x = np.where(y_valid == 1)[0]
x[:20]

In [None]:
y_valid

In [None]:
predictions

# Accuracy of the Model

In [None]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(predictions, y_valid)

# To Be Continued

In [None]:
# model.save("my_model.h5")

In [None]:
# model = tf.keras.models.load_model('../input/my_model')

In [None]:
# submission = pd.read_csv(sample_path)

In [None]:
# submission.iloc[0].target

In [None]:
# dirs = [d for d in os.listdir(test_path) if os.path.isdir(os.path.join(test_path, d))]
# dirs.sort()
# print(len(dirs))
# print(dirs)

# flag = 0
# for i, dir in enumerate(dirs):
#         print(f'loading... {i+1} / {len(dirs)}')
#         files_list = [f for f in os.listdir(os.path.join(test_path, dir))]

#         for j, file_name in enumerate(tqdm(files_list)):
#             current_file = np.load(os.path.join(test_path, dir, file_name))
#             current_file = current_file[:,:256,:256]
#             current_file = current_file.reshape(1, 256, 256, 6)
#             predictions = model.predict(current_file)
#             predictions = np.argmax(predictions, axis=1)  
#             submission.iloc[flag].target = predictions
#             flag += 1

In [None]:
# submission.target = 0

In [None]:
# submission

In [None]:
# submission.to_csv('submission.csv', index=False)

In [None]:
# X_test = np.array(test_tensors[:100])
# X_test = X_test.reshape(-1, 256, 256, 6)

In [None]:
# y_predict = model.predict(X_test)

In [None]:
# y_predict

In [None]:
# for filename in os.listdir(dir):
#     if filename.endswith('.npy'):
#         arrays[filename] = load_array(filename)