# TPU Modeling

This Notebook aims to demonstrate how to circumvent the RAM restriction, while using the TPU. This Notebook uses a custom training loop, which unlike the tf.keras.utils.Sequence method of loading the data, also allows the usage of the TPU. If anybody has additional ideas on how to speed up the training process, feedback would be appreciated!

**imports**

In [None]:
!pip install -U efficientnet

import os
import sys

import math
from random import shuffle
import time

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import models, layers, metrics
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy
from tensorflow.keras.applications import EfficientNetB0, InceptionV3

from sklearn.model_selection import train_test_split, KFold

import efficientnet.keras as efn

# TPU Setup

**TPU selection Function**

In [None]:
def auto_select_accelerator():
    """
    Reference: 
        * https://www.kaggle.com/mgornergoogle/getting-started-with-100-flowers-on-tpu
        * https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training
    """
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

**Select TPU**

In [None]:
strategy = auto_select_accelerator()
batch_size=16*strategy.num_replicas_in_sync

# Dataset Creation

In [None]:
train_labels = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')

train_idx=train_labels['id'].values
y = train_labels['target'].values

x_train,x_valid,y_train,y_valid = train_test_split(train_idx,y,test_size=0.05,random_state=42)

#oversample needles for training data
where_needle = np.where(y_train == 1)[0]

oversample_x = x_train[where_needle]
oversample_y = y_train[where_needle]

oversample_x = np.concatenate([oversample_x for i in range(2)])
oversample_y = np.concatenate([oversample_y for i in range(2)])

x_train = np.concatenate([x_train, oversample_x])
y_train = np.concatenate([y_train, oversample_y])

#shuffle data
idx = shuffle([i for i in range(x_train.shape[0])])

#reformat data into needed format
x_train = x_train[idx].reshape(-1, 1)
y_train = y_train[idx].reshape(-1, 1)

x_valid = x_valid.reshape(-1, 1)
y_valid = y_valid.reshape(-1, 1)

#create tensorflow datasets
dtrain = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dtrain = dtrain.shuffle(1024).batch(batch_size)

dvalid = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
dvalid = dvalid.batch(batch_size)

# Modeling

**Model Function**

In [None]:
def create_model():
    input_ = layers.Input((3*273,256,3))
    
    x = efn.EfficientNetB0(input_shape=(3*273,256,3),weights='noisy-student',include_top=False)(input_)
    x = layers.GlobalAveragePooling2D()(x)
    
    x = layers.Dense(32)(x)
    x = layers.Activation("relu")(x)
    
    x = layers.Dense(1, activation="sigmoid")(x)
    
    model = models.Model(inputs=input_, outputs=x)
    
    return model

**Initialize Model & Metrics in TPU Strategy**

In [None]:
with strategy.scope():
    model = create_model()
    
    optimizer = Adam(learning_rate=1e-3)
    
    train_auc = metrics.AUC()
    valid_auc = metrics.AUC()
    
    train_loss = metrics.Sum()
    valid_loss = metrics.Sum()
    
    loss = lambda a,b: tf.nn.compute_average_loss(BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(a,b),
                                                  global_batch_size=batch_size)

**Data loading Functions**

In [None]:
def id_to_path(idx,train=True):
    path = '../input/seti-breakthrough-listen/'
    if train:
        folder = 'train/'
    else:
        folder = 'test/'
    path+=folder+idx[0]+'/'+idx+'.npy'
    return path

def decode_batch(batch_ids):
    batch_x = list()
    
    for x in batch_ids.numpy():
        
        arr = np.load(id_to_path(x[0].decode(), train=True))
  
        x1 = arr[[0, 2, 4]].reshape(3*273, 256, 1)
        x2 = arr[[1, 3, 5]].reshape(3*273, 256, 1)
            
        batch_x.append(np.concatenate([x1, x2, x1], axis=2).reshape(3*273, 256, 3))
        
    return np.array(batch_x)

**Training Functions**

In [None]:
@tf.function
def train_step(x, y):
    #apply gradient descent
    with tf.GradientTape() as tape:
        predictions = model(x, training=True)       
        loss_value = loss(y, predictions)
                
    grads = tape.gradient(loss_value, model.trainable_weights) 
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    
    #update metrics
    train_auc.update_state(y, predictions)
    train_loss.update_state(loss_value)

@tf.function
def valid_step(x, y):
    predictions = model(x, training=False)
    
    loss_value = loss(y, predictions)
    
    #update metrics
    valid_auc.update_state(y, predictions)
    valid_loss.update_state(loss_value)

**Training**

In [None]:
#gets the number of batches in datasets
train_batches = [i for i,_ in enumerate(dtrain)][-1] + 1
valid_batches = [i for i,_ in enumerate(dvalid)][-1] + 1

#saves the best metric(auc) value as a type ModelCheckpoint callback
best_metric = 0

for epoch in range(5):
    epoch_start_time = time.time()
    
    print("Starting Epoch {}:".format(epoch + 1))
    #train for all batches in dataset
    for step, (batch_x, batch_y) in enumerate(dtrain):
        batch_x = decode_batch(batch_x)
        
        strategy.run(train_step, args=(batch_x, batch_y))
        
        sys.stdout.write("\rFinished Step {} / {} --> loss: {} | AUC: {}".format(step + 1, train_batches, 
                                                                                 train_loss.result().numpy()/(step + 1),
                                                                                 train_auc.result().numpy()))
        sys.stdout.flush()
        
    #shuffle dataset for next epoch
    dtrain = dtrain.shuffle(1024)
    
    #validate with validation dataset
    for batch_x, batch_y in dvalid:
        batch_x = decode_batch(batch_x)
        
        strategy.run(valid_step, args=(batch_x, batch_y))
        
    print("val_loss: {} | val_AUC: {}".format(valid_loss.result().numpy()/(step + 1), valid_auc.result().numpy()))
    print("Finishing after {} minutes...".format((time.time() - epoch_start_time)/(60)))
    print("")
    
    #save model if new best metric has been achieved
    if best_metric < valid_auc.result().numpy():
        model.save("./model.h5")
        best_metric = valid_auc.result().numpy()
    
    #reset metrics
    train_loss.reset_states()
    train_auc.reset_states()
    valid_loss.reset_states()
    valid_auc.reset_states()