# ALIEN
tensorflow simple baseline   
version1 efnb0 fold0  
version4 efnb7 fold0  
version5 efnb0 fold0~5  
​
dataset:  
https://www.kaggle.com/h053473666/seti-npy-543512-09  
https://www.kaggle.com/h053473666/seti-npy-543512-af  
https://www.kaggle.com/h053473666/seti-npy-543512-test  
​
train:  
https://www.kaggle.com/h053473666/tensorflow-tpu-seti-efficientnet-train  


In [None]:
!pip install efficientnet -q

In [None]:
import os

import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.model_selection import GroupKFold

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='npy'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'npy':
            img = tf.io.decode_raw(file_bytes, tf.float16)
            img = img[64:]
            img = tf.reshape(img, [546, 512])
            img = tf.stack([img, img, img], axis=-1)
            img = tf.cast(img, tf.float32) / 255.0
            img = tf.image.resize(img, target_size)
        else:
            if ext == 'png':
                img = tf.image.decode_png(file_bytes, channels=3)
            elif ext in ['jpg', 'jpeg']:
                img = tf.image.decode_jpeg(file_bytes, channels=3)
            else:
                raise ValueError("Image extension not supported")
            img = tf.cast(img, tf.float32) / 255.0
            img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [None]:
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 32
IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 512)
sub_df = pd.read_csv('../input/seti-breakthrough-listen/sample_submission.csv')

test_paths = '../input/seti-npy-543512-test/' + sub_df['id'] + '.npy'
label_cols = sub_df.columns[1]

test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]), ext='npy')
dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=False, 
    shuffle=False, augment=False, cache=False,
    decode_fn=test_decoder
)

with strategy.scope():
    
    models = []
    
    models0 = tf.keras.models.load_model(
        '../input/k/h053473666/seti-efnb0-train-fold0-546-512-img/model0.h5'
    )
    models1 = tf.keras.models.load_model(
        '../input/k/h053473666/seti-efnb0-train-fold0-546-512-img/model1.h5'
    )
    models2 = tf.keras.models.load_model(
        '../input/k/h053473666/seti-efnb0-train-fold0-546-512-img/model2.h5'
    )
    models3 = tf.keras.models.load_model(
        '../input/k/h053473666/seti-efnb0-train-fold0-546-512-img/model3.h5'
    )
    models4 = tf.keras.models.load_model(
        '../input/k/h053473666/seti-efnb0-train-fold0-546-512-img/model4.h5'
    )
    
    models.append(models0)
    models.append(models1)
    models.append(models2)
    models.append(models3)
    models.append(models4)

    
    
    
sub_df[label_cols] = sum([model.predict(dtest, verbose=1) for model in models]) / len(models)

sub_df.to_csv('submission.csv',index=False)