In [None]:
import tensorflow as tf

In [None]:
!ls ../input

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    TPU = tf.distribute.cluster_resolver.TPUClusterResolver()  
except ValueError:
    TPU = None

if TPU:
    print(f"\n... RUNNING ON TPU - {TPU.master()}...")
    tf.config.experimental_connect_to_cluster(TPU)
    tf.tpu.experimental.initialize_tpu_system(TPU)
    strategy = tf.distribute.experimental.TPUStrategy(TPU)
else:
    print(f"\n... RUNNING ON CPU/GPU ...")
    # Yield the default distribution strategy in Tensorflow
    #   --> Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy() 

# What Is a Replica?
#    --> A single Cloud TPU device consists of FOUR chips, each of which has TWO TPU cores. 
#    --> Therefore, for efficient utilization of Cloud TPU, a program should make use of each of the EIGHT (4x2) cores. 
#    --> Each replica is essentially a copy of the training graph that is run on each core and 
#        trains a mini-batch containing 1/8th of the overall batch size
N_REPLICAS = strategy.num_replicas_in_sync
    
print(f"... # OF REPLICAS: {N_REPLICAS} ...\n")

print(f"\n... ACCELERATOR SETUP COMPLTED ...\n")

In [None]:
print(f"\n... XLA OPTIMIZATIONS STARTING ...\n")

print(f"\n... CONFIGURE JIT (JUST IN TIME) COMPILATION ...\n")
# enable XLA optmizations (10% speedup when using @tf.function calls)
tf.config.optimizer.set_jit(True)

print(f"\n... XLA OPTIMIZATIONS COMPLETED ...\n")

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()

# Step 2: Set the credentials
user_secrets.set_tensorflow_credential(user_credential)

In [None]:
from kaggle_datasets import KaggleDatasets
DATA_DIR = KaggleDatasets().get_gcs_path("siim-cocolike-tfrecords")
DATA_DIR

In [None]:
MODEL_DIR = KaggleDatasets().get_gcs_path("effdet-pretrained-weights")
MODEL_DIR

In [None]:
!cp -r ../input/effnetv2-rep/brain_automl brain_automl

In [None]:
!cd ./brain_automl/efficientdet && pip install -r requirements.txt

In [None]:
%%writefile voc_config.yaml
num_classes: 2
label_map: {1: opacity}
learning_rate: 0.005
lr_warmup_init: 0.0005
moving_average_decay: 0.0
image_size: 1024

In [None]:
!cp ./brain_automl/efficientdet/dataset/inspect_tfrecords.py ./brain_automl/efficientdet/inspect_tfrecords.py

In [None]:
!cd ./brain_automl/efficientdet && python inspect_tfrecords.py --file_pattern=gs://kds-13a6c5b6b326715ec98db9e72921709d07843614320f5db910c0f9a3/fold_4/*.tfrecord --hparams=../../voc_config.yaml

In [None]:
!ls ./brain_automl/efficientdet/tfrecord_samples

In [None]:
from IPython import display
import os
display.display(display.Image(os.path.join("./brain_automl/efficientdet/tfrecord_samples", 'sample0.jpg')))

In [None]:
from PIL import Image
Image.open(os.path.join("./brain_automl/efficientdet/tfrecord_samples", 'sample2.jpg')).size

In [None]:
(4294/5)*4, (4294/5)

In [None]:
!cd ./brain_automl/efficientdet && python main.py --mode=train \
    --tpu=grpc://10.0.0.2:8470 \
    --train_file_pattern=gs://kds-13a6c5b6b326715ec98db9e72921709d07843614320f5db910c0f9a3/fold_4/train*.tfrecord \
    --model_name=efficientdet-d4 \
    --model_dir=gs://effdet_siim_output/efficientdet-d4-finetune/1024/fold_4  \
    --ckpt=gs://kds-0cc6dd8e02c0465eac82622a3d1fa2cd7a33ea8d3f1f1a992228d18a/efficientdet-d4  \
    --num_examples_per_epoch=3435 --num_epochs=35  \
    --train_batch_size=16 \
    --save_checkpoints_steps=250 \
    --iterations_per_loop=250 \
    --hparams=../../voc_config.yaml \
    --strategy=tpu