# Simple Silver Medal SETI Model with Grad Cam - LB 0.780+
This notebook demonstrates a simple silver medal model for Kaggle's SETI comp. We train 1 fold of EfficientNet-B4 with image size 768x768. It only uses Hflip, Vflip, and Mixup augmentation. It uses a cosine train schedule with warmup and 40 epochs. Using full precision, this takes 24 hours to train on 4xV100 Nvidia GPU. (Using mixed precision trains twice as fast but unfortunately hurts accuracy for this model).

This notebook also demonstrates Grad Cam to show us what image features the model is using to predict targets.

In [None]:
import sys
sys.path.append('/kaggle/input/efficientnet-keras-dataset/efficientnet_kaggle')
! pip install -e /kaggle/input/efficientnet-keras-dataset/efficientnet_kaggle

In [None]:
import cv2, os, gc, sys
from sklearn.metrics import roc_auc_score
import albumentations as albu
import matplotlib.pyplot as plt
import pandas as pd, numpy as np
import efficientnet.tfkeras as efn
from sklearn.model_selection import KFold
import tensorflow as tf, math
import tensorflow.keras.backend as K
print('TF version',tf.__version__)

TRAIN_MODEL = False
FOLD_0_ONLY = True
MODEL_PATH = '/kaggle/input/setieb4768model/'
# IF ONLY INTERESTED IN GRAD CAM, SET BELOW TO FALSE
PREDICT_OOF = True
PREDICT_TEST = True

In [None]:
# LIST GPUS TO BE USED
os.environ["CUDA_VISIBLE_DEVICES"]="0"

# EXPERIMENT VERSION NUMBER
VER = 1003

In [None]:
# USE MULTIPLE GPUS
if os.environ["CUDA_VISIBLE_DEVICES"].count(',') == 0:
    strategy = tf.distribute.get_strategy()
    print('single strategy')
else:
    strategy = tf.distribute.MirroredStrategy()
    print('multiple strategy')

In [None]:
# USE MIXED PRECISION
# UNFORTUNATELY FOR THIS MODEL, MIXED PRECISION HURTS MODEL PERFORMANCE
#tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
#print('Mixed precision enabled')

# Load Train and Test

In [None]:
train = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')
print('Train shape is', train.shape )
train.head()

In [None]:
test = pd.read_csv('../input/seti-breakthrough-listen/sample_submission.csv')
print('Test shape is', test.shape )
test.head()

# Data Loader

In [None]:
SIZE = 768
BASE = '../input/seti-breakthrough-listen/train/'

class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, df, batch_size=32, shuffle=False, augment=False, visualize=False, size=SIZE, path=BASE,
                 flipH=False, flipV=False, mixup_prob=0, mixup_alpha=3, mixup_max=True): 

        self.df = df.reset_index(drop=True)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augment = augment
        self.mixup_prob = mixup_prob
        self.mixup_alpha = mixup_alpha
        self.mixup_max = mixup_max
        self.visualize = visualize
        self.size = size
        self.path = path
        self.flipH = flipH
        self.flipV = flipV
        self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        ct = int( np.ceil( len(self.df) / self.batch_size ) )
        return ct

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X, y = self.__data_generation(indexes)
        
        if self.augment: X = self.__augment_batch(X)                       
        if self.flipH: X = X[:,::-1,:,:]
        if self.flipV: X = X[:,:,::-1,:]
            
        return X,y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange( len(self.df ) )
        if self.shuffle: np.random.shuffle(self.indexes)
            
    def _get_image(self,row):
        data = np.load(self.path+row.id[0]+'/'+row.id+'.npy').astype('float32') 
        X = np.zeros((273*3,256),dtype='float32')
        
        for k in range(3):
            if self.visualize:
                md = np.median(data[2*k,].flatten())
                q75, q25 = np.percentile(data[2*k,].flatten(), [75 ,25])
                iqr = q75 - q25
                tmp = np.clip(data[2*k,],md-2*iqr,md+2*iqr)
                tmp -= md-2*iqr
                tmp /= 4*iqr
            else: 
                tmp = data[2*k,]       
            X[273*k:273*(k+1),] = tmp
            
        X = cv2.resize(X,(self.size,self.size))
                               
        return X,float(row.target)
        
            
    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' 
        
        X = np.zeros((len(indexes),self.size,self.size,1),dtype='float32')
        y = np.zeros((len(indexes)),dtype='float32')
        
        df = self.df.loc[indexes]
        for i,(index,row) in enumerate(df.iterrows()):
            X[i,:,:,0],y[i] = self._get_image(row)
                                
        # MIXUP WITHIN BATCH
        y2 = y.copy(); X2 = X.copy()
        for i in range(len(indexes)):
            if np.random.uniform(0,1) < self.mixup_prob:
                rw = np.random.randint(0,len(indexes),2)
                img,tar = X2[rw[0],], y2[rw[0]]  
                img2,tar2 = X2[rw[1],], y2[rw[1]]
                w = np.random.beta(self.mixup_alpha,self.mixup_alpha)
                X[i,] = w * img2 + (1-w) * img
                if self.mixup_max:
                    y[i] = np.max([tar,tar2])
                else:
                    y[i] = w * tar2 + (1-w) * tar
                    
        return X,y
 
    def __random_transform(self, img):
        composition = albu.Compose([
            albu.HorizontalFlip(p=0.5),
            albu.VerticalFlip(p=0.5),
            #albu.ShiftScaleRotate(rotate_limit=0,scale_limit=0.125,shift_limit=0.0625,p=0.25), 
            #albu.ColorJitter(brightness=0.3, contrast=0.3, saturation=0, hue=0, p=0.25),
        ])
        return composition(image=img)
            
    def __augment_batch(self, img_batch):
        for i in range(img_batch.shape[0]):
            tmp = self.__random_transform(img_batch[i, ])
            img_batch[i, ] = tmp['image']
        return img_batch

# Display Examples
When displaying example, we use the flag `visualize=True`. We do not use this during training, but when displaying images, this flag makes the colors easier for us to see with our human eye.

In [None]:
# DISPLAY EXAMPLES OF DATALOADER
cols = 4
train_gen = DataGenerator(train, augment=True, shuffle=True, batch_size=4, visualize=True, mixup_prob=1.0)

for i,b in enumerate(train_gen):
    plt.figure(figsize=(20,10))
    for k in range(cols):
        plt.subplot(1,cols,k+1)
        plt.imshow( b[0][k] ) 
        t = b[1][k] 
        plt.title('target = %i'%t,size=16)
    plt.show()
    if i>=3: break

# Models
In the model below we can use `tf.keras.layers.Concatenate` instead of `tf.keras.layers.Conv2D` and we can remove `tf.keras.layers.Dropout(0.15)` and achieve the same model performance. So they are not important. The most important thing is mixup augmentation, large image size, and large backbone.

In [None]:
def build_model():

    inp = tf.keras.layers.Input(shape=(None,None,1))
    #x = tf.keras.layers.Concatenate(axis=-1)([inp,inp,inp])
    x = tf.keras.layers.Conv2D(3,3,strides=1,padding='same')(inp)
    base = efn.EfficientNetB4(weights='imagenet',include_top=False, input_shape=None)
    x = base(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.15)(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid', dtype='float32')(x)
        
    model = tf.keras.Model(inputs=inp, outputs=x)
    
    opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
    met = tf.keras.metrics.AUC()
    loss = tf.keras.losses.BinaryCrossentropy()
    
    model.compile(loss=loss, optimizer=opt, metrics=met) 
        
    return model

In [None]:
def build_cam_model():

    inp = tf.keras.layers.Input(shape=(None,None,1))
    x = tf.keras.layers.Conv2D(3,3,strides=1,padding='same')(inp)
    base = efn.EfficientNetB4(weights='imagenet',include_top=False, input_shape=None)
    x0 = base(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x0)
    x = tf.keras.layers.Dropout(0.15)(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid', dtype='float32')(x)
        
    model = tf.keras.Model(inputs=inp, outputs=[x,x0])
        
    return model

# Learning Schedule

In [None]:
LR_START = 5e-5
LR_MAX = 5e-4
LR_MIN = 5e-7
LR_RAMPUP_EPOCHS = 3
LR_SUSTAIN_EPOCHS = 0
EPOCHS = 40

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        decay_total_epochs = EPOCHS - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS - 1
        decay_epoch_index = epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS
        phase = math.pi * decay_epoch_index / decay_total_epochs
        cosine_decay = 0.5 * (1 + math.cos(phase))
        lr = (LR_MAX - LR_MIN) * cosine_decay + LR_MIN
    return lr

rng = [i for i in range(EPOCHS)]
lr_y = [lrfn(x) for x in rng]
plt.figure(figsize=(10, 4))
plt.plot(rng, lr_y, '-o')
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}". \
      format(lr_y[0], max(lr_y), lr_y[-1]))
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)

# Train Model

In [None]:
FOLDS = 5
BATCH = 32
VAL_BATCH = 32 #make this larger offline

skf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)
train['fold'] = -1
oof = np.zeros(len(train))
preds = np.zeros(len(test))

for fold,(idx_t, idx_v) in enumerate(skf.split(train)):
    if (not TRAIN_MODEL)&(not PREDICT_OOF)&(not PREDICT_TEST): break
        
    K.clear_session()
    print('#'*25)
    print('### FOLD',fold+1)
    print('### train size',len(idx_t),'valid size',len(idx_v))
    print('#'*25)
    
    train_gen = DataGenerator(train.iloc[idx_t], shuffle=True, augment=True, batch_size=BATCH, mixup_prob=1.0)
    valid_gen = DataGenerator(train.iloc[idx_v], batch_size=VAL_BATCH) 
    test_gen = DataGenerator(test, batch_size=VAL_BATCH, path='../input/seti-breakthrough-listen/test/')
    
    sv = tf.keras.callbacks.ModelCheckpoint(
        'model_fold%i_v%i.h5'%(fold,VER), monitor='val_loss', verbose=1, save_best_only=True,
        save_weights_only=True, mode='auto', save_freq='epoch'
    )
    
    with strategy.scope():
        model = build_model()  
    if TRAIN_MODEL:
        model.fit(train_gen, epochs=EPOCHS, validation_data=valid_gen, verbose=1, callbacks=[sv,lr_callback]
             ,use_multiprocessing=True, workers=4)
               
    if PREDICT_OOF | PREDICT_TEST:
        print('Loading model to predict oof and preds...')
        model.load_weights(MODEL_PATH+'model_fold%i_v%i.h5'%(fold,VER))
    
    if PREDICT_OOF:
        print('Predicting oof with TTAx4...')
        oof[idx_v] += model.predict(valid_gen,verbose=1).flatten()/4.
        valid_gen = DataGenerator(train.iloc[idx_v], batch_size=VAL_BATCH, flipH=True) 
        oof[idx_v] += model.predict(valid_gen,verbose=1).flatten()/4.
        valid_gen = DataGenerator(train.iloc[idx_v], batch_size=VAL_BATCH, flipV=True) 
        oof[idx_v] += model.predict(valid_gen,verbose=1).flatten()/4.
        valid_gen = DataGenerator(train.iloc[idx_v], batch_size=VAL_BATCH, flipH=True, flipV=True) 
        oof[idx_v] += model.predict(valid_gen,verbose=1).flatten()/4.
    
        auc = roc_auc_score(train.target.values[idx_v],oof[idx_v])
        print(f'Fold {fold+1} AUC =',auc)
        print('wrote OOF to disk')
        print('#'*25)
    
        # SAVE EACH OOF IN CASE WE STOP TRAINING EARLY
        train.loc[idx_v,'fold'] = fold
        train['oof'] = oof
        train.to_csv(f'oof_v{VER}_f{fold}.csv',index=False)  
    
        # LOG FOLD OOF AUC SCORE
        f = open(f'log_v{VER}.txt','a')
        f.write(f'Fold {fold+1} AUC = {auc}\n')
        f.close()
        
    if PREDICT_TEST:    
        print('Predicting test with TTAx4...')
        preds += model.predict(test_gen,verbose=1).flatten()/FOLDS/4
        test_gen = DataGenerator(test, batch_size=VAL_BATCH, path='../input/seti-breakthrough-listen/test/',flipH=True)
        preds += model.predict(test_gen,verbose=1).flatten()/FOLDS/4
        test_gen = DataGenerator(test, batch_size=VAL_BATCH, path='../input/seti-breakthrough-listen/test/',flipV=True)
        preds += model.predict(test_gen,verbose=1).flatten()/FOLDS/4
        test_gen = DataGenerator(test, batch_size=VAL_BATCH, path='../input/seti-breakthrough-listen/test/',flipH=True,flipV=True)
        preds += model.predict(test_gen,verbose=1).flatten()/FOLDS/4
    
        # SAVE EACH TEST IN CASE WE STOP TRAINING EARLY
        test['target'] = preds*5/(fold+1)
        test.to_csv(f'submission_v{VER}_f{fold}.csv',index=False)
        print('wrote submission to disk')
        
    del model, train_gen, valid_gen, test_gen, sv
    _ = gc.collect()
    
    if FOLD_0_ONLY: break

# Grad Cam OOF Preds

In [None]:
# LOAD WEIGHTS INTO GRAD CAM MODEL
with strategy.scope():
    model = build_cam_model()    
model.load_weights(MODEL_PATH+'model_fold%i_v%i.h5'%(fold,VER))
layer_weights = model.layers[-1].get_weights()[0][:,0]

In [None]:
# HELPER FUNCTION
def mask2contour(mask, width=5):
    w = mask.shape[1]
    h = mask.shape[0]
    mask2 = np.concatenate([mask[:,width:],np.zeros((h,width))],axis=1)
    mask2 = np.logical_xor(mask,mask2)
    mask3 = np.concatenate([mask[width:,:],np.zeros((width,w))],axis=0)
    mask3 = np.logical_xor(mask,mask3)
    return np.logical_or(mask2,mask3) 

clahe = cv2.createCLAHE(clipLimit=16.0, tileGridSize=(8,8))

In [None]:
# GET OOF WITH TARGET EQUAL 1
PORTION = 512
tmp = train.iloc[idx_v[:PORTION]]
tmp = tmp.reset_index(drop=True)
IDX = tmp.loc[tmp.target==1].index.values
len(IDX)

In [None]:
# PREDICT OOF SAMPLES FOR GRAD CAM
valid_gen = DataGenerator(train.iloc[idx_v[IDX]], batch_size=VAL_BATCH)
p,x = model.predict(valid_gen,verbose=1)
print(x.shape)

In [None]:
SKIP = 0
SHOW = 32

for i,k in enumerate(IDX[SKIP:SKIP+SHOW]):
    
    plt.figure(figsize=(20,5))
    
    # PLOT GRAD CAM
    img = np.sum(x[i+SKIP,] * layer_weights,axis=-1)
    img = cv2.resize(img,(320,320))
    plt.subplot(1,4,4)
    plt.imshow(img)
    
    # GET GRAD CAM CONTOUR
    cut = np.percentile(img.flatten(), [90])[0]
    cntr = img.copy()
    cntr[cntr>=cut] = 100
    cntr[cntr<cut] = 0
    cntr = mask2contour(cntr)

    # PLOT ORIGINAL ON CADENCE
    name = train.iloc[idx_v[k],0]
    tar = train.iloc[idx_v[k],1]
    img0 = np.load(BASE+name[0]+'/'+name+'.npy').astype('float32')
    img = np.vstack(img0[::2])
    img = cv2.resize(img,(320,320))
    plt.subplot(1,4,1)
    plt.imshow(img)
    plt.title(f'Train ID = {name}',size=14)
        
    # PLOT ON CADENCE WITH IMPROVED VISIBILITY FILTER
    plt.subplot(1,4,2)
    img = img[1:,1:] - img[:-1,:-1] #emboss
    img -= np.min(img)
    img /= np.max(img)
    img = (img*255).astype('uint8')
    img = cv2.GaussianBlur(img,(5,5),0)
    img = clahe.apply(img)
    mx = np.max(img)
    if p[i+SKIP,0]>0.5: 
        cntr = cntr[1:,1:]
        img[cntr>0] = mx
    plt.imshow(img)
    plt.title(f'True = {tar}',size=14)
    
    # PLOT OFF CADENCE WITH IMPROVED VISIBILITY
    img = np.vstack(img0[1::2])
    img = cv2.resize(img,(320,320))
    plt.subplot(1,4,3)  
    img = img[1:,1:] - img[:-1,:-1] #emboss
    img -= np.min(img)
    img /= np.max(img)
    img = (img*255).astype('uint8')
    img = cv2.GaussianBlur(img,(5,5),0)
    img = clahe.apply(img)
    plt.imshow(img)
    plt.title(f'Pred = {p[i+SKIP,0]:.3}',size=14)
    
    plt.show()

# Grad Cam Test Preds

In [None]:
# PREDICT OOF SAMPLES FOR GRAD CAM
PORTION = 256
test_gen = DataGenerator(test.iloc[:PORTION], batch_size=VAL_BATCH, path='../input/seti-breakthrough-listen/test/')
p,x = model.predict(test_gen,verbose=1)
print(x.shape)

In [None]:
# FIND PREDICTIONS WITH TARGET EQUAL 1
IDX = np.where(p>0.75)[0]
len(IDX)

In [None]:
SKIP = 0
SHOW = 32
BASE2 = '../input/seti-breakthrough-listen/test/'

for i,k in enumerate(IDX[SKIP:SKIP+SHOW]):
    
    plt.figure(figsize=(20,5))
    
    # PLOT GRAD CAM
    img = np.sum(x[k,] * layer_weights,axis=-1)
    img = cv2.resize(img,(320,320))
    plt.subplot(1,4,4)
    plt.imshow(img)
    
    # GET GRAD CAM CONTOUR
    cut = np.percentile(img.flatten(), [90])[0]
    cntr = img.copy()
    cntr[cntr>=cut] = 100
    cntr[cntr<cut] = 0
    cntr = mask2contour(cntr)

    # PLOT ORIGINAL ON CADENCE
    name = test.iloc[k,0]
    img0 = np.load(BASE2+name[0]+'/'+name+'.npy').astype('float32')
    img = np.vstack(img0[::2])
    img = cv2.resize(img,(320,320))
    plt.subplot(1,4,1)
    plt.imshow(img)
    plt.title(f'Test ID = {name}',size=14)
        
    # PLOT ON CADENCE WITH IMPROVED VISIBILITY FILTER
    plt.subplot(1,4,2)
    img = img[1:,1:] - img[:-1,:-1] #emboss
    img -= np.min(img)
    img /= np.max(img)
    img = (img*255).astype('uint8')
    img = cv2.GaussianBlur(img,(5,5),0)
    img = clahe.apply(img)
    mx = np.max(img)
    if p[k,0]>0.5: 
        cntr = cntr[1:,1:]
        img[cntr>0] = mx
    plt.imshow(img)
    #plt.title(f'True = {tar}',size=14)
    
    # PLOT OFF CADENCE WITH IMPROVED VISIBILITY
    img = np.vstack(img0[1::2])
    img = cv2.resize(img,(320,320))
    plt.subplot(1,4,3)  
    img = img[1:,1:] - img[:-1,:-1] #emboss
    img -= np.min(img)
    img /= np.max(img)
    img = (img*255).astype('uint8')
    img = cv2.GaussianBlur(img,(5,5),0)
    img = clahe.apply(img)
    plt.imshow(img)
    plt.title(f'Pred = {p[k,0]:.3}',size=14)
    
    plt.show()