In [None]:
! pip install -q efficientnet

In [None]:
import numpy as np 
import pandas as pd 
import os
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
import tensorflow as tf
import tensorflow.keras.layers as l
import efficientnet.tfkeras as efn
from kaggle_datasets import KaggleDatasets

In [None]:
# TPU setting
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
print(tpu.master())
print(tpu_strategy.num_replicas_in_sync)

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False
gcs_path = KaggleDatasets().get_gcs_path()

In [None]:
sample = pd.read_csv("/kaggle/input/alaska2-image-steganalysis/sample_submission.csv")

In [None]:
BATCH_SIZE = 32 * tpu_strategy.num_replicas_in_sync

In [None]:

dir_name = ['Test', 'JUNIWARD', 'JMiPOD', 'Cover', 'UERD']
df = pd.DataFrame({})

lists = []
cate = []

for dir_ in dir_name:
    list_ = os.listdir("/kaggle/input/alaska2-image-steganalysis/"+dir_+"/")
    lists = lists+list_
    cate_ = np.tile(dir_,len(list_))
    cate = np.concatenate([cate,cate_])
    
df["cate"] = cate
df["name"] = lists

In [None]:
df["path"] = [str(os.path.join(gcs_path,cate,name)) for cate, name in zip(df["cate"], df["name"])]

In [None]:
def cate_label(x):
    if x["cate"] == "Cover":
        res = 0
    else:
        res = 1
    return res

Test_df = df.query("cate=='Test'").sort_values(by="name")
Train_df = df.query("cate!='Test'")
Train_df["flg"] = df.apply(cate_label, axis=1)

In [None]:
Train_df["cate"].value_counts()

In [None]:
Train_df = Train_df.sample(80000)
Train_df["cate"].value_counts()

In [None]:
X = Train_df["path"]
y = Train_df["flg"]

X_train, X_val, y_train, y_val = train_test_split(X,y, test_size=0.2, random_state=10)

In [None]:
X_train, X_val, y_train, y_val = np.array(X_train), np.array(X_val), np.array(y_train), np.array(y_val)

In [None]:
X_test = np.array(Test_df["path"])

In [None]:
def decode_image(filename, label=None, image_size=(512,512)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32)/255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False

train_dataset = (tf.data.Dataset.from_tensor_slices((X_train, y_train)).prefetch(AUTO).with_options(ignore_order)
                 .map(decode_image, num_parallel_calls=AUTO).shuffle(512).batch(BATCH_SIZE).repeat())

valid_dataset = (tf.data.Dataset.from_tensor_slices((X_val, y_val)).map(decode_image, num_parallel_calls=AUTO)
                    .cache().batch(BATCH_SIZE).prefetch(AUTO))

test_dataset = (tf.data.Dataset.from_tensor_slices((X_test)).map(decode_image, num_parallel_calls=AUTO)
                    .batch(BATCH_SIZE))

In [None]:
with tpu_strategy.scope():
    model = tf.keras.Sequential([
        efn.EfficientNetB7(input_shape=(512,512,3),weights='imagenet',include_top=False),
        l.GlobalAveragePooling2D(),
        l.Dense(1, activation="sigmoid")
    ])
    
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    model.summary()

In [None]:
STEPS_PER_EPOCH = X_train.shape[0] // BATCH_SIZE
callbacks = [tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)]

EPOCHS = 2
hist = model.fit(train_dataset, epochs=EPOCHS,
                   steps_per_epoch=STEPS_PER_EPOCH, validation_data=valid_dataset, callbacks=callbacks, workers=4, use_multiprocessing=True)

In [None]:
pred = model.predict(test_dataset, verbose=1)

In [None]:
train_loss = hist.history["loss"]
val_loss = hist.history["val_loss"]
train_acc = hist.history["accuracy"]
val_acc = hist.history["val_accuracy"]

fig, ax = plt.subplots(1,2,figsize=(10,6))
ax[0].plot(range(len(train_loss)), train_loss, label="train_loss")
ax[0].plot(range(len(val_loss)), val_loss, label="val_loss")
ax[0].set_xlabel("epochs")
ax[0].set_ylabel("loss")
ax[0].set_title("EfficientNetB7 loss")
ax[0].legend()

ax[1].plot(range(len(train_acc)), train_acc, label="train_accuracy")
ax[1].plot(range(len(val_acc)), val_acc, label="val_accuracy")
ax[1].set_xlabel("epochs")
ax[1].set_ylabel("accuracy")
ax[1].set_title("EfficientNetB7 accurary")
ax[1].legend()

In [None]:
sample = sample.copy()
sample["Label"] = pred
sample.to_csv("submission.csv", index=False)

In [None]:
sample["Label"].describe()